|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5447632074356079, |
|
"eval_runtime": 2.9004, |
|
"eval_samples_per_second": 34.478, |
|
"eval_steps_per_second": 4.482, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5351557731628418, |
|
"eval_runtime": 2.9353, |
|
"eval_samples_per_second": 34.068, |
|
"eval_steps_per_second": 4.429, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5466512441635132, |
|
"eval_runtime": 2.9987, |
|
"eval_samples_per_second": 33.348, |
|
"eval_steps_per_second": 4.335, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5432061553001404, |
|
"eval_runtime": 3.0354, |
|
"eval_samples_per_second": 32.945, |
|
"eval_steps_per_second": 4.283, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5446352958679199, |
|
"eval_runtime": 3.0683, |
|
"eval_samples_per_second": 32.591, |
|
"eval_steps_per_second": 4.237, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5419210195541382, |
|
"eval_runtime": 3.0777, |
|
"eval_samples_per_second": 32.492, |
|
"eval_steps_per_second": 4.224, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5364170074462891, |
|
"eval_runtime": 3.075, |
|
"eval_samples_per_second": 32.52, |
|
"eval_steps_per_second": 4.228, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5399757027626038, |
|
"eval_runtime": 3.0939, |
|
"eval_samples_per_second": 32.322, |
|
"eval_steps_per_second": 4.202, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5460018515586853, |
|
"eval_runtime": 3.0861, |
|
"eval_samples_per_second": 32.404, |
|
"eval_steps_per_second": 4.212, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.547890305519104, |
|
"eval_runtime": 3.0888, |
|
"eval_samples_per_second": 32.375, |
|
"eval_steps_per_second": 4.209, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.542870819568634, |
|
"eval_runtime": 3.0902, |
|
"eval_samples_per_second": 32.361, |
|
"eval_steps_per_second": 4.207, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5363436341285706, |
|
"eval_runtime": 3.0944, |
|
"eval_samples_per_second": 32.316, |
|
"eval_steps_per_second": 4.201, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5431585907936096, |
|
"eval_runtime": 3.0952, |
|
"eval_samples_per_second": 32.308, |
|
"eval_steps_per_second": 4.2, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5446100831031799, |
|
"eval_runtime": 3.0959, |
|
"eval_samples_per_second": 32.301, |
|
"eval_steps_per_second": 4.199, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5619304180145264, |
|
"eval_runtime": 3.0987, |
|
"eval_samples_per_second": 32.272, |
|
"eval_steps_per_second": 4.195, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5399982929229736, |
|
"eval_runtime": 3.0957, |
|
"eval_samples_per_second": 32.303, |
|
"eval_steps_per_second": 4.199, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5394828915596008, |
|
"eval_runtime": 3.0984, |
|
"eval_samples_per_second": 32.274, |
|
"eval_steps_per_second": 4.196, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5439099073410034, |
|
"eval_runtime": 3.0994, |
|
"eval_samples_per_second": 32.264, |
|
"eval_steps_per_second": 4.194, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5420183539390564, |
|
"eval_runtime": 3.0999, |
|
"eval_samples_per_second": 32.259, |
|
"eval_steps_per_second": 4.194, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.00075, |
|
"loss": 0.6126, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.5401513576507568, |
|
"eval_runtime": 3.0996, |
|
"eval_samples_per_second": 32.262, |
|
"eval_steps_per_second": 4.194, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5430670976638794, |
|
"eval_runtime": 3.1006, |
|
"eval_samples_per_second": 32.252, |
|
"eval_steps_per_second": 4.193, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5421171188354492, |
|
"eval_runtime": 3.1013, |
|
"eval_samples_per_second": 32.245, |
|
"eval_steps_per_second": 4.192, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5431703329086304, |
|
"eval_runtime": 3.1002, |
|
"eval_samples_per_second": 32.256, |
|
"eval_steps_per_second": 4.193, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5438470244407654, |
|
"eval_runtime": 3.0981, |
|
"eval_samples_per_second": 32.277, |
|
"eval_steps_per_second": 4.196, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5364149212837219, |
|
"eval_runtime": 3.1003, |
|
"eval_samples_per_second": 32.255, |
|
"eval_steps_per_second": 4.193, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.541420578956604, |
|
"eval_runtime": 3.0998, |
|
"eval_samples_per_second": 32.26, |
|
"eval_steps_per_second": 4.194, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5394607782363892, |
|
"eval_runtime": 3.0995, |
|
"eval_samples_per_second": 32.263, |
|
"eval_steps_per_second": 4.194, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5440071225166321, |
|
"eval_runtime": 3.1009, |
|
"eval_samples_per_second": 32.249, |
|
"eval_steps_per_second": 4.192, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5445890426635742, |
|
"eval_runtime": 3.1002, |
|
"eval_samples_per_second": 32.256, |
|
"eval_steps_per_second": 4.193, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5472090840339661, |
|
"eval_runtime": 3.1018, |
|
"eval_samples_per_second": 32.239, |
|
"eval_steps_per_second": 4.191, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.5418501496315002, |
|
"eval_runtime": 3.0989, |
|
"eval_samples_per_second": 32.27, |
|
"eval_steps_per_second": 4.195, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5413060188293457, |
|
"eval_runtime": 3.1001, |
|
"eval_samples_per_second": 32.257, |
|
"eval_steps_per_second": 4.193, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5530202388763428, |
|
"eval_runtime": 3.1009, |
|
"eval_samples_per_second": 32.249, |
|
"eval_steps_per_second": 4.192, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5460859537124634, |
|
"eval_runtime": 3.1, |
|
"eval_samples_per_second": 32.258, |
|
"eval_steps_per_second": 4.194, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5440330505371094, |
|
"eval_runtime": 3.1019, |
|
"eval_samples_per_second": 32.238, |
|
"eval_steps_per_second": 4.191, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5436714291572571, |
|
"eval_runtime": 3.1024, |
|
"eval_samples_per_second": 32.234, |
|
"eval_steps_per_second": 4.19, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.543533444404602, |
|
"eval_runtime": 3.1035, |
|
"eval_samples_per_second": 32.221, |
|
"eval_steps_per_second": 4.189, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5481903553009033, |
|
"eval_runtime": 3.1013, |
|
"eval_samples_per_second": 32.244, |
|
"eval_steps_per_second": 4.192, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5449146628379822, |
|
"eval_runtime": 3.1036, |
|
"eval_samples_per_second": 32.221, |
|
"eval_steps_per_second": 4.189, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.0005, |
|
"loss": 0.6037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5441970229148865, |
|
"eval_runtime": 3.1022, |
|
"eval_samples_per_second": 32.235, |
|
"eval_steps_per_second": 4.191, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5376811027526855, |
|
"eval_runtime": 3.1035, |
|
"eval_samples_per_second": 32.222, |
|
"eval_steps_per_second": 4.189, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5410917401313782, |
|
"eval_runtime": 3.1029, |
|
"eval_samples_per_second": 32.228, |
|
"eval_steps_per_second": 4.19, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.59, |
|
"eval_loss": 0.5482169985771179, |
|
"eval_runtime": 3.1058, |
|
"eval_samples_per_second": 32.198, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5494033694267273, |
|
"eval_runtime": 3.1008, |
|
"eval_samples_per_second": 32.25, |
|
"eval_steps_per_second": 4.193, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.6, |
|
"eval_loss": 0.5510344505310059, |
|
"eval_runtime": 3.1036, |
|
"eval_samples_per_second": 32.221, |
|
"eval_steps_per_second": 4.189, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.61, |
|
"eval_loss": 0.5471860766410828, |
|
"eval_runtime": 3.1029, |
|
"eval_samples_per_second": 32.228, |
|
"eval_steps_per_second": 4.19, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5415754318237305, |
|
"eval_runtime": 3.1057, |
|
"eval_samples_per_second": 32.199, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5396535396575928, |
|
"eval_runtime": 3.1031, |
|
"eval_samples_per_second": 32.226, |
|
"eval_steps_per_second": 4.189, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5417464375495911, |
|
"eval_runtime": 3.1027, |
|
"eval_samples_per_second": 32.23, |
|
"eval_steps_per_second": 4.19, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5390459895133972, |
|
"eval_runtime": 3.1051, |
|
"eval_samples_per_second": 32.205, |
|
"eval_steps_per_second": 4.187, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5389121174812317, |
|
"eval_runtime": 3.1044, |
|
"eval_samples_per_second": 32.213, |
|
"eval_steps_per_second": 4.188, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5365983843803406, |
|
"eval_runtime": 3.1052, |
|
"eval_samples_per_second": 32.204, |
|
"eval_steps_per_second": 4.187, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5367568731307983, |
|
"eval_runtime": 3.1042, |
|
"eval_samples_per_second": 32.215, |
|
"eval_steps_per_second": 4.188, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5392716526985168, |
|
"eval_runtime": 3.1024, |
|
"eval_samples_per_second": 32.233, |
|
"eval_steps_per_second": 4.19, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5378238558769226, |
|
"eval_runtime": 3.1049, |
|
"eval_samples_per_second": 32.207, |
|
"eval_steps_per_second": 4.187, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5391019582748413, |
|
"eval_runtime": 3.106, |
|
"eval_samples_per_second": 32.196, |
|
"eval_steps_per_second": 4.185, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5383239388465881, |
|
"eval_runtime": 3.1054, |
|
"eval_samples_per_second": 32.202, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5379219055175781, |
|
"eval_runtime": 3.1022, |
|
"eval_samples_per_second": 32.235, |
|
"eval_steps_per_second": 4.191, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5381463766098022, |
|
"eval_runtime": 3.1059, |
|
"eval_samples_per_second": 32.197, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.00025, |
|
"loss": 0.6021, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5409640669822693, |
|
"eval_runtime": 3.1098, |
|
"eval_samples_per_second": 32.156, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5401256084442139, |
|
"eval_runtime": 3.1056, |
|
"eval_samples_per_second": 32.2, |
|
"eval_steps_per_second": 4.186, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5403258800506592, |
|
"eval_runtime": 3.1087, |
|
"eval_samples_per_second": 32.167, |
|
"eval_steps_per_second": 4.182, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5411276817321777, |
|
"eval_runtime": 3.103, |
|
"eval_samples_per_second": 32.226, |
|
"eval_steps_per_second": 4.189, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5414915680885315, |
|
"eval_runtime": 3.1078, |
|
"eval_samples_per_second": 32.177, |
|
"eval_steps_per_second": 4.183, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5414855480194092, |
|
"eval_runtime": 3.1111, |
|
"eval_samples_per_second": 32.143, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5408964157104492, |
|
"eval_runtime": 3.1087, |
|
"eval_samples_per_second": 32.168, |
|
"eval_steps_per_second": 4.182, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5418642163276672, |
|
"eval_runtime": 3.1104, |
|
"eval_samples_per_second": 32.15, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5401394367218018, |
|
"eval_runtime": 3.1106, |
|
"eval_samples_per_second": 32.148, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5424190163612366, |
|
"eval_runtime": 3.1114, |
|
"eval_samples_per_second": 32.139, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5420389175415039, |
|
"eval_runtime": 3.1113, |
|
"eval_samples_per_second": 32.141, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.541526734828949, |
|
"eval_runtime": 3.1099, |
|
"eval_samples_per_second": 32.155, |
|
"eval_steps_per_second": 4.18, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5391311049461365, |
|
"eval_runtime": 3.1129, |
|
"eval_samples_per_second": 32.124, |
|
"eval_steps_per_second": 4.176, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5395915508270264, |
|
"eval_runtime": 3.1177, |
|
"eval_samples_per_second": 32.075, |
|
"eval_steps_per_second": 4.17, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5395561456680298, |
|
"eval_runtime": 3.1126, |
|
"eval_samples_per_second": 32.128, |
|
"eval_steps_per_second": 4.177, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.54047030210495, |
|
"eval_runtime": 3.1118, |
|
"eval_samples_per_second": 32.135, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5403570532798767, |
|
"eval_runtime": 3.1117, |
|
"eval_samples_per_second": 32.137, |
|
"eval_steps_per_second": 4.178, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5400457978248596, |
|
"eval_runtime": 3.122, |
|
"eval_samples_per_second": 32.031, |
|
"eval_steps_per_second": 4.164, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5401412844657898, |
|
"eval_runtime": 3.1203, |
|
"eval_samples_per_second": 32.048, |
|
"eval_steps_per_second": 4.166, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5402973890304565, |
|
"eval_runtime": 3.1111, |
|
"eval_samples_per_second": 32.143, |
|
"eval_steps_per_second": 4.179, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.5946, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.5402593612670898, |
|
"eval_runtime": 3.1254, |
|
"eval_samples_per_second": 31.995, |
|
"eval_steps_per_second": 4.159, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.6032544250488281, |
|
"train_runtime": 1653.7835, |
|
"train_samples_per_second": 19.35, |
|
"train_steps_per_second": 1.209 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|