|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5415894389152527, |
|
"eval_runtime": 2.9244, |
|
"eval_samples_per_second": 34.195, |
|
"eval_steps_per_second": 4.445, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5393697023391724, |
|
"eval_runtime": 2.9776, |
|
"eval_samples_per_second": 33.584, |
|
"eval_steps_per_second": 4.366, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5375608801841736, |
|
"eval_runtime": 3.0286, |
|
"eval_samples_per_second": 33.018, |
|
"eval_steps_per_second": 4.292, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5475878119468689, |
|
"eval_runtime": 3.06, |
|
"eval_samples_per_second": 32.68, |
|
"eval_steps_per_second": 4.248, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5370528101921082, |
|
"eval_runtime": 3.0811, |
|
"eval_samples_per_second": 32.456, |
|
"eval_steps_per_second": 4.219, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5441551208496094, |
|
"eval_runtime": 3.0936, |
|
"eval_samples_per_second": 32.324, |
|
"eval_steps_per_second": 4.202, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5412868857383728, |
|
"eval_runtime": 3.1033, |
|
"eval_samples_per_second": 32.224, |
|
"eval_steps_per_second": 4.189, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5381051898002625, |
|
"eval_runtime": 3.1116, |
|
"eval_samples_per_second": 32.138, |
|
"eval_steps_per_second": 4.178, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5366445183753967, |
|
"eval_runtime": 3.1179, |
|
"eval_samples_per_second": 32.073, |
|
"eval_steps_per_second": 4.17, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5402125120162964, |
|
"eval_runtime": 3.1204, |
|
"eval_samples_per_second": 32.047, |
|
"eval_steps_per_second": 4.166, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5405026078224182, |
|
"eval_runtime": 3.1198, |
|
"eval_samples_per_second": 32.053, |
|
"eval_steps_per_second": 4.167, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5396003127098083, |
|
"eval_runtime": 3.1242, |
|
"eval_samples_per_second": 32.009, |
|
"eval_steps_per_second": 4.161, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5378925204277039, |
|
"eval_runtime": 3.1244, |
|
"eval_samples_per_second": 32.006, |
|
"eval_steps_per_second": 4.161, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5375378727912903, |
|
"eval_runtime": 3.1264, |
|
"eval_samples_per_second": 31.985, |
|
"eval_steps_per_second": 4.158, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5392658114433289, |
|
"eval_runtime": 3.1259, |
|
"eval_samples_per_second": 31.991, |
|
"eval_steps_per_second": 4.159, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5370680689811707, |
|
"eval_runtime": 3.1284, |
|
"eval_samples_per_second": 31.965, |
|
"eval_steps_per_second": 4.155, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5286482572555542, |
|
"eval_runtime": 3.128, |
|
"eval_samples_per_second": 31.969, |
|
"eval_steps_per_second": 4.156, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5312860608100891, |
|
"eval_runtime": 3.1331, |
|
"eval_samples_per_second": 31.917, |
|
"eval_steps_per_second": 4.149, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5426998734474182, |
|
"eval_runtime": 3.1294, |
|
"eval_samples_per_second": 31.955, |
|
"eval_steps_per_second": 4.154, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.616, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5469082593917847, |
|
"eval_runtime": 3.1318, |
|
"eval_samples_per_second": 31.931, |
|
"eval_steps_per_second": 4.151, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5347804427146912, |
|
"eval_runtime": 3.1345, |
|
"eval_samples_per_second": 31.903, |
|
"eval_steps_per_second": 4.147, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5352023243904114, |
|
"eval_runtime": 3.1312, |
|
"eval_samples_per_second": 31.936, |
|
"eval_steps_per_second": 4.152, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5434411764144897, |
|
"eval_runtime": 3.1342, |
|
"eval_samples_per_second": 31.906, |
|
"eval_steps_per_second": 4.148, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.543671190738678, |
|
"eval_runtime": 3.1348, |
|
"eval_samples_per_second": 31.9, |
|
"eval_steps_per_second": 4.147, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5343627333641052, |
|
"eval_runtime": 3.1317, |
|
"eval_samples_per_second": 31.932, |
|
"eval_steps_per_second": 4.151, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5343685746192932, |
|
"eval_runtime": 3.132, |
|
"eval_samples_per_second": 31.929, |
|
"eval_steps_per_second": 4.151, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5318924188613892, |
|
"eval_runtime": 3.1314, |
|
"eval_samples_per_second": 31.934, |
|
"eval_steps_per_second": 4.151, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5328655242919922, |
|
"eval_runtime": 3.1348, |
|
"eval_samples_per_second": 31.9, |
|
"eval_steps_per_second": 4.147, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5312965512275696, |
|
"eval_runtime": 3.1312, |
|
"eval_samples_per_second": 31.937, |
|
"eval_steps_per_second": 4.152, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5321267247200012, |
|
"eval_runtime": 3.1321, |
|
"eval_samples_per_second": 31.927, |
|
"eval_steps_per_second": 4.151, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5341834425926208, |
|
"eval_runtime": 3.1318, |
|
"eval_samples_per_second": 31.93, |
|
"eval_steps_per_second": 4.151, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5363546013832092, |
|
"eval_runtime": 3.131, |
|
"eval_samples_per_second": 31.938, |
|
"eval_steps_per_second": 4.152, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5350150465965271, |
|
"eval_runtime": 3.1305, |
|
"eval_samples_per_second": 31.944, |
|
"eval_steps_per_second": 4.153, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5382318496704102, |
|
"eval_runtime": 3.1297, |
|
"eval_samples_per_second": 31.952, |
|
"eval_steps_per_second": 4.154, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5329770445823669, |
|
"eval_runtime": 3.1308, |
|
"eval_samples_per_second": 31.941, |
|
"eval_steps_per_second": 4.152, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5360609292984009, |
|
"eval_runtime": 3.129, |
|
"eval_samples_per_second": 31.959, |
|
"eval_steps_per_second": 4.155, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5378895998001099, |
|
"eval_runtime": 3.1316, |
|
"eval_samples_per_second": 31.932, |
|
"eval_steps_per_second": 4.151, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5313640236854553, |
|
"eval_runtime": 3.1315, |
|
"eval_samples_per_second": 31.933, |
|
"eval_steps_per_second": 4.151, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5307939648628235, |
|
"eval_runtime": 3.1306, |
|
"eval_samples_per_second": 31.943, |
|
"eval_steps_per_second": 4.153, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.6054, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5347521901130676, |
|
"eval_runtime": 3.132, |
|
"eval_samples_per_second": 31.929, |
|
"eval_steps_per_second": 4.151, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5373522043228149, |
|
"eval_runtime": 3.1333, |
|
"eval_samples_per_second": 31.915, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5363101363182068, |
|
"eval_runtime": 3.1311, |
|
"eval_samples_per_second": 31.938, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5361413359642029, |
|
"eval_runtime": 3.1442, |
|
"eval_samples_per_second": 31.804, |
|
"eval_steps_per_second": 4.135, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5333101153373718, |
|
"eval_runtime": 3.1351, |
|
"eval_samples_per_second": 31.896, |
|
"eval_steps_per_second": 4.147, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5345572829246521, |
|
"eval_runtime": 3.131, |
|
"eval_samples_per_second": 31.939, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5354093313217163, |
|
"eval_runtime": 3.132, |
|
"eval_samples_per_second": 31.929, |
|
"eval_steps_per_second": 4.151, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5338207483291626, |
|
"eval_runtime": 3.1317, |
|
"eval_samples_per_second": 31.931, |
|
"eval_steps_per_second": 4.151, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5332033634185791, |
|
"eval_runtime": 3.1333, |
|
"eval_samples_per_second": 31.915, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5333837270736694, |
|
"eval_runtime": 3.1309, |
|
"eval_samples_per_second": 31.94, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5361276268959045, |
|
"eval_runtime": 3.1288, |
|
"eval_samples_per_second": 31.962, |
|
"eval_steps_per_second": 4.155, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5311057567596436, |
|
"eval_runtime": 3.1314, |
|
"eval_samples_per_second": 31.935, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5332194566726685, |
|
"eval_runtime": 3.1329, |
|
"eval_samples_per_second": 31.919, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5312312841415405, |
|
"eval_runtime": 3.1314, |
|
"eval_samples_per_second": 31.935, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5333719253540039, |
|
"eval_runtime": 3.1308, |
|
"eval_samples_per_second": 31.941, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5306155681610107, |
|
"eval_runtime": 3.1331, |
|
"eval_samples_per_second": 31.917, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5325944423675537, |
|
"eval_runtime": 3.1292, |
|
"eval_samples_per_second": 31.957, |
|
"eval_steps_per_second": 4.154, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5336495041847229, |
|
"eval_runtime": 3.1276, |
|
"eval_samples_per_second": 31.973, |
|
"eval_steps_per_second": 4.157, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5360873937606812, |
|
"eval_runtime": 3.1422, |
|
"eval_samples_per_second": 31.825, |
|
"eval_steps_per_second": 4.137, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5358687043190002, |
|
"eval_runtime": 3.1436, |
|
"eval_samples_per_second": 31.811, |
|
"eval_steps_per_second": 4.135, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.5996, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5341857671737671, |
|
"eval_runtime": 3.1334, |
|
"eval_samples_per_second": 31.914, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5345585942268372, |
|
"eval_runtime": 3.1431, |
|
"eval_samples_per_second": 31.816, |
|
"eval_steps_per_second": 4.136, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5333269238471985, |
|
"eval_runtime": 3.1337, |
|
"eval_samples_per_second": 31.912, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5321747660636902, |
|
"eval_runtime": 3.1351, |
|
"eval_samples_per_second": 31.896, |
|
"eval_steps_per_second": 4.147, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5307139754295349, |
|
"eval_runtime": 3.1326, |
|
"eval_samples_per_second": 31.923, |
|
"eval_steps_per_second": 4.15, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5297897458076477, |
|
"eval_runtime": 3.1367, |
|
"eval_samples_per_second": 31.881, |
|
"eval_steps_per_second": 4.144, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5299915671348572, |
|
"eval_runtime": 3.1344, |
|
"eval_samples_per_second": 31.904, |
|
"eval_steps_per_second": 4.147, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5306195616722107, |
|
"eval_runtime": 3.1331, |
|
"eval_samples_per_second": 31.917, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5310782790184021, |
|
"eval_runtime": 3.135, |
|
"eval_samples_per_second": 31.898, |
|
"eval_steps_per_second": 4.147, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5317740440368652, |
|
"eval_runtime": 3.1334, |
|
"eval_samples_per_second": 31.914, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5319586396217346, |
|
"eval_runtime": 3.1355, |
|
"eval_samples_per_second": 31.892, |
|
"eval_steps_per_second": 4.146, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5320259928703308, |
|
"eval_runtime": 3.1356, |
|
"eval_samples_per_second": 31.892, |
|
"eval_steps_per_second": 4.146, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5309186577796936, |
|
"eval_runtime": 3.1334, |
|
"eval_samples_per_second": 31.914, |
|
"eval_steps_per_second": 4.149, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5307186245918274, |
|
"eval_runtime": 3.1346, |
|
"eval_samples_per_second": 31.902, |
|
"eval_steps_per_second": 4.147, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5306009650230408, |
|
"eval_runtime": 3.135, |
|
"eval_samples_per_second": 31.898, |
|
"eval_steps_per_second": 4.147, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5314067006111145, |
|
"eval_runtime": 3.1521, |
|
"eval_samples_per_second": 31.725, |
|
"eval_steps_per_second": 4.124, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5311477184295654, |
|
"eval_runtime": 3.1462, |
|
"eval_samples_per_second": 31.785, |
|
"eval_steps_per_second": 4.132, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5311266779899597, |
|
"eval_runtime": 3.1309, |
|
"eval_samples_per_second": 31.94, |
|
"eval_steps_per_second": 4.152, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.53106290102005, |
|
"eval_runtime": 3.1356, |
|
"eval_samples_per_second": 31.892, |
|
"eval_steps_per_second": 4.146, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5310864448547363, |
|
"eval_runtime": 3.1316, |
|
"eval_samples_per_second": 31.932, |
|
"eval_steps_per_second": 4.151, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.596, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5310752391815186, |
|
"eval_runtime": 3.1362, |
|
"eval_samples_per_second": 31.886, |
|
"eval_steps_per_second": 4.145, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.6042405700683594, |
|
"train_runtime": 1661.2119, |
|
"train_samples_per_second": 19.263, |
|
"train_steps_per_second": 1.204 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|