|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 80.0, |
|
"global_step": 2000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.48, |
|
"eval_loss": 0.6259328126907349, |
|
"eval_runtime": 2.8974, |
|
"eval_samples_per_second": 34.514, |
|
"eval_steps_per_second": 4.487, |
|
"step": 25 |
|
}, |
|
{ |
|
"best_epoch": 0, |
|
"best_eval_accuracy": 0.48, |
|
"epoch": 1.0, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.7321136593818665, |
|
"eval_runtime": 2.9205, |
|
"eval_samples_per_second": 34.241, |
|
"eval_steps_per_second": 4.451, |
|
"step": 50 |
|
}, |
|
{ |
|
"best_epoch": 1, |
|
"best_eval_accuracy": 0.62, |
|
"epoch": 2.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.64, |
|
"eval_loss": 0.7952651381492615, |
|
"eval_runtime": 2.9672, |
|
"eval_samples_per_second": 33.702, |
|
"eval_steps_per_second": 4.381, |
|
"step": 75 |
|
}, |
|
{ |
|
"best_epoch": 2, |
|
"best_eval_accuracy": 0.64, |
|
"epoch": 3.0, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.65, |
|
"eval_loss": 0.6992718577384949, |
|
"eval_runtime": 2.9932, |
|
"eval_samples_per_second": 33.409, |
|
"eval_steps_per_second": 4.343, |
|
"step": 100 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 4.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.62, |
|
"eval_loss": 0.5882292985916138, |
|
"eval_runtime": 3.0071, |
|
"eval_samples_per_second": 33.255, |
|
"eval_steps_per_second": 4.323, |
|
"step": 125 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 5.0, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.5895842909812927, |
|
"eval_runtime": 3.0189, |
|
"eval_samples_per_second": 33.125, |
|
"eval_steps_per_second": 4.306, |
|
"step": 150 |
|
}, |
|
{ |
|
"best_epoch": 3, |
|
"best_eval_accuracy": 0.65, |
|
"epoch": 6.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 0.6143475770950317, |
|
"eval_runtime": 3.0217, |
|
"eval_samples_per_second": 33.093, |
|
"eval_steps_per_second": 4.302, |
|
"step": 175 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 7.0, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.63, |
|
"eval_loss": 0.706989586353302, |
|
"eval_runtime": 3.0244, |
|
"eval_samples_per_second": 33.064, |
|
"eval_steps_per_second": 4.298, |
|
"step": 200 |
|
}, |
|
{ |
|
"best_epoch": 6, |
|
"best_eval_accuracy": 0.66, |
|
"epoch": 8.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.6440533995628357, |
|
"eval_runtime": 3.0216, |
|
"eval_samples_per_second": 33.095, |
|
"eval_steps_per_second": 4.302, |
|
"step": 225 |
|
}, |
|
{ |
|
"best_epoch": 8, |
|
"best_eval_accuracy": 0.67, |
|
"epoch": 9.0, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.704773485660553, |
|
"eval_runtime": 3.0257, |
|
"eval_samples_per_second": 33.05, |
|
"eval_steps_per_second": 4.296, |
|
"step": 250 |
|
}, |
|
{ |
|
"best_epoch": 9, |
|
"best_eval_accuracy": 0.68, |
|
"epoch": 10.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5610386729240417, |
|
"eval_runtime": 3.0256, |
|
"eval_samples_per_second": 33.052, |
|
"eval_steps_per_second": 4.297, |
|
"step": 275 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 11.0, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.6845036149024963, |
|
"eval_runtime": 3.0242, |
|
"eval_samples_per_second": 33.067, |
|
"eval_steps_per_second": 4.299, |
|
"step": 300 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 12.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.7743116617202759, |
|
"eval_runtime": 3.0303, |
|
"eval_samples_per_second": 33.0, |
|
"eval_steps_per_second": 4.29, |
|
"step": 325 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 13.0, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.7745197415351868, |
|
"eval_runtime": 3.0249, |
|
"eval_samples_per_second": 33.059, |
|
"eval_steps_per_second": 4.298, |
|
"step": 350 |
|
}, |
|
{ |
|
"best_epoch": 10, |
|
"best_eval_accuracy": 0.7, |
|
"epoch": 14.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.7992074489593506, |
|
"eval_runtime": 3.033, |
|
"eval_samples_per_second": 32.97, |
|
"eval_steps_per_second": 4.286, |
|
"step": 375 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 15.0, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.7165772914886475, |
|
"eval_runtime": 3.0399, |
|
"eval_samples_per_second": 32.896, |
|
"eval_steps_per_second": 4.276, |
|
"step": 400 |
|
}, |
|
{ |
|
"best_epoch": 14, |
|
"best_eval_accuracy": 0.72, |
|
"epoch": 16.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.7013249397277832, |
|
"eval_runtime": 3.0281, |
|
"eval_samples_per_second": 33.024, |
|
"eval_steps_per_second": 4.293, |
|
"step": 425 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 17.0, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.8815276622772217, |
|
"eval_runtime": 3.04, |
|
"eval_samples_per_second": 32.894, |
|
"eval_steps_per_second": 4.276, |
|
"step": 450 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 18.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.7997011542320251, |
|
"eval_runtime": 3.0324, |
|
"eval_samples_per_second": 32.977, |
|
"eval_steps_per_second": 4.287, |
|
"step": 475 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 19.0, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 0.015, |
|
"loss": 0.6923, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.741098940372467, |
|
"eval_runtime": 3.0327, |
|
"eval_samples_per_second": 32.974, |
|
"eval_steps_per_second": 4.287, |
|
"step": 500 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 20.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7322430610656738, |
|
"eval_runtime": 3.0313, |
|
"eval_samples_per_second": 32.989, |
|
"eval_steps_per_second": 4.289, |
|
"step": 525 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 21.0, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 0.8924081325531006, |
|
"eval_runtime": 3.0335, |
|
"eval_samples_per_second": 32.965, |
|
"eval_steps_per_second": 4.285, |
|
"step": 550 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 22.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.72379469871521, |
|
"eval_runtime": 3.0331, |
|
"eval_samples_per_second": 32.97, |
|
"eval_steps_per_second": 4.286, |
|
"step": 575 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 23.0, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7784560322761536, |
|
"eval_runtime": 3.0334, |
|
"eval_samples_per_second": 32.966, |
|
"eval_steps_per_second": 4.286, |
|
"step": 600 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 24.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6886314153671265, |
|
"eval_runtime": 3.034, |
|
"eval_samples_per_second": 32.96, |
|
"eval_steps_per_second": 4.285, |
|
"step": 625 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 25.0, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.7781857848167419, |
|
"eval_runtime": 3.0361, |
|
"eval_samples_per_second": 32.937, |
|
"eval_steps_per_second": 4.282, |
|
"step": 650 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 26.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7322260141372681, |
|
"eval_runtime": 3.0417, |
|
"eval_samples_per_second": 32.877, |
|
"eval_steps_per_second": 4.274, |
|
"step": 675 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 27.0, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.68, |
|
"eval_loss": 0.7590029239654541, |
|
"eval_runtime": 3.0373, |
|
"eval_samples_per_second": 32.924, |
|
"eval_steps_per_second": 4.28, |
|
"step": 700 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 28.0, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7170096635818481, |
|
"eval_runtime": 3.036, |
|
"eval_samples_per_second": 32.938, |
|
"eval_steps_per_second": 4.282, |
|
"step": 725 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 29.0, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7993258833885193, |
|
"eval_runtime": 3.0342, |
|
"eval_samples_per_second": 32.958, |
|
"eval_steps_per_second": 4.285, |
|
"step": 750 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 30.0, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.746490478515625, |
|
"eval_runtime": 3.0396, |
|
"eval_samples_per_second": 32.899, |
|
"eval_steps_per_second": 4.277, |
|
"step": 775 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 31.0, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6626662611961365, |
|
"eval_runtime": 3.0423, |
|
"eval_samples_per_second": 32.869, |
|
"eval_steps_per_second": 4.273, |
|
"step": 800 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 32.0, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.71281898021698, |
|
"eval_runtime": 3.0422, |
|
"eval_samples_per_second": 32.871, |
|
"eval_steps_per_second": 4.273, |
|
"step": 825 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 33.0, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.6698910593986511, |
|
"eval_runtime": 3.0416, |
|
"eval_samples_per_second": 32.877, |
|
"eval_steps_per_second": 4.274, |
|
"step": 850 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 34.0, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 0.6974368095397949, |
|
"eval_runtime": 3.0432, |
|
"eval_samples_per_second": 32.861, |
|
"eval_steps_per_second": 4.272, |
|
"step": 875 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 35.0, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6626088619232178, |
|
"eval_runtime": 3.0454, |
|
"eval_samples_per_second": 32.837, |
|
"eval_steps_per_second": 4.269, |
|
"step": 900 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 36.0, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6843433976173401, |
|
"eval_runtime": 3.0433, |
|
"eval_samples_per_second": 32.859, |
|
"eval_steps_per_second": 4.272, |
|
"step": 925 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 37.0, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6846016049385071, |
|
"eval_runtime": 3.0416, |
|
"eval_samples_per_second": 32.878, |
|
"eval_steps_per_second": 4.274, |
|
"step": 950 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 38.0, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.7097976207733154, |
|
"eval_runtime": 3.0459, |
|
"eval_samples_per_second": 32.831, |
|
"eval_steps_per_second": 4.268, |
|
"step": 975 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 39.0, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 0.01, |
|
"loss": 0.2907, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.684529185295105, |
|
"eval_runtime": 3.046, |
|
"eval_samples_per_second": 32.83, |
|
"eval_steps_per_second": 4.268, |
|
"step": 1000 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 40.0, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6782032251358032, |
|
"eval_runtime": 3.044, |
|
"eval_samples_per_second": 32.851, |
|
"eval_steps_per_second": 4.271, |
|
"step": 1025 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 41.0, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.6635347008705139, |
|
"eval_runtime": 3.0457, |
|
"eval_samples_per_second": 32.833, |
|
"eval_steps_per_second": 4.268, |
|
"step": 1050 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 42.0, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_accuracy": 0.7, |
|
"eval_loss": 0.5903083682060242, |
|
"eval_runtime": 3.0458, |
|
"eval_samples_per_second": 32.832, |
|
"eval_steps_per_second": 4.268, |
|
"step": 1075 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 43.0, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6071918606758118, |
|
"eval_runtime": 3.0441, |
|
"eval_samples_per_second": 32.851, |
|
"eval_steps_per_second": 4.271, |
|
"step": 1100 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 44.0, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5961195230484009, |
|
"eval_runtime": 3.0477, |
|
"eval_samples_per_second": 32.811, |
|
"eval_steps_per_second": 4.265, |
|
"step": 1125 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 45.0, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.6114852428436279, |
|
"eval_runtime": 3.0485, |
|
"eval_samples_per_second": 32.803, |
|
"eval_steps_per_second": 4.264, |
|
"step": 1150 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 46.0, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.6239812970161438, |
|
"eval_runtime": 3.0453, |
|
"eval_samples_per_second": 32.837, |
|
"eval_steps_per_second": 4.269, |
|
"step": 1175 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 47.0, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.6327325701713562, |
|
"eval_runtime": 3.0465, |
|
"eval_samples_per_second": 32.824, |
|
"eval_steps_per_second": 4.267, |
|
"step": 1200 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 48.0, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_accuracy": 0.71, |
|
"eval_loss": 0.693472146987915, |
|
"eval_runtime": 3.0485, |
|
"eval_samples_per_second": 32.803, |
|
"eval_steps_per_second": 4.264, |
|
"step": 1225 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 49.0, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5863702893257141, |
|
"eval_runtime": 3.0495, |
|
"eval_samples_per_second": 32.793, |
|
"eval_steps_per_second": 4.263, |
|
"step": 1250 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 50.0, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.577936053276062, |
|
"eval_runtime": 3.0454, |
|
"eval_samples_per_second": 32.836, |
|
"eval_steps_per_second": 4.269, |
|
"step": 1275 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 51.0, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.6012981534004211, |
|
"eval_runtime": 3.0456, |
|
"eval_samples_per_second": 32.834, |
|
"eval_steps_per_second": 4.268, |
|
"step": 1300 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 52.0, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 53.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5664993524551392, |
|
"eval_runtime": 3.0459, |
|
"eval_samples_per_second": 32.831, |
|
"eval_steps_per_second": 4.268, |
|
"step": 1325 |
|
}, |
|
{ |
|
"best_epoch": 16, |
|
"best_eval_accuracy": 0.75, |
|
"epoch": 53.0, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 54.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.5744893550872803, |
|
"eval_runtime": 3.0448, |
|
"eval_samples_per_second": 32.842, |
|
"eval_steps_per_second": 4.27, |
|
"step": 1350 |
|
}, |
|
{ |
|
"best_epoch": 53, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 54.0, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 55.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.6107723712921143, |
|
"eval_runtime": 3.0477, |
|
"eval_samples_per_second": 32.812, |
|
"eval_steps_per_second": 4.266, |
|
"step": 1375 |
|
}, |
|
{ |
|
"best_epoch": 53, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 55.0, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.584411084651947, |
|
"eval_runtime": 3.045, |
|
"eval_samples_per_second": 32.841, |
|
"eval_steps_per_second": 4.269, |
|
"step": 1400 |
|
}, |
|
{ |
|
"best_epoch": 53, |
|
"best_eval_accuracy": 0.76, |
|
"epoch": 56.0, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 57.0, |
|
"eval_accuracy": 0.77, |
|
"eval_loss": 0.5647205114364624, |
|
"eval_runtime": 3.0572, |
|
"eval_samples_per_second": 32.71, |
|
"eval_steps_per_second": 4.252, |
|
"step": 1425 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 57.0, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 58.0, |
|
"eval_accuracy": 0.76, |
|
"eval_loss": 0.584377110004425, |
|
"eval_runtime": 3.0552, |
|
"eval_samples_per_second": 32.731, |
|
"eval_steps_per_second": 4.255, |
|
"step": 1450 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 58.0, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 59.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5720120668411255, |
|
"eval_runtime": 3.0488, |
|
"eval_samples_per_second": 32.8, |
|
"eval_steps_per_second": 4.264, |
|
"step": 1475 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 59.0, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 0.005, |
|
"loss": 0.2156, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5814980864524841, |
|
"eval_runtime": 3.0453, |
|
"eval_samples_per_second": 32.838, |
|
"eval_steps_per_second": 4.269, |
|
"step": 1500 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 60.0, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 61.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5615140795707703, |
|
"eval_runtime": 3.045, |
|
"eval_samples_per_second": 32.841, |
|
"eval_steps_per_second": 4.269, |
|
"step": 1525 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 61.0, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 62.0, |
|
"eval_accuracy": 0.75, |
|
"eval_loss": 0.5819821953773499, |
|
"eval_runtime": 3.0442, |
|
"eval_samples_per_second": 32.85, |
|
"eval_steps_per_second": 4.27, |
|
"step": 1550 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 62.0, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 63.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5712084174156189, |
|
"eval_runtime": 3.0434, |
|
"eval_samples_per_second": 32.858, |
|
"eval_steps_per_second": 4.272, |
|
"step": 1575 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 63.0, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.5682441592216492, |
|
"eval_runtime": 3.0401, |
|
"eval_samples_per_second": 32.893, |
|
"eval_steps_per_second": 4.276, |
|
"step": 1600 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 64.0, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 65.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.6267381310462952, |
|
"eval_runtime": 3.0478, |
|
"eval_samples_per_second": 32.811, |
|
"eval_steps_per_second": 4.265, |
|
"step": 1625 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 65.0, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 66.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5814998745918274, |
|
"eval_runtime": 3.0406, |
|
"eval_samples_per_second": 32.888, |
|
"eval_steps_per_second": 4.275, |
|
"step": 1650 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 66.0, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 67.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.617068886756897, |
|
"eval_runtime": 3.0363, |
|
"eval_samples_per_second": 32.935, |
|
"eval_steps_per_second": 4.282, |
|
"step": 1675 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 67.0, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.74, |
|
"eval_loss": 0.5554370284080505, |
|
"eval_runtime": 3.0371, |
|
"eval_samples_per_second": 32.926, |
|
"eval_steps_per_second": 4.28, |
|
"step": 1700 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 68.0, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 69.0, |
|
"eval_accuracy": 0.72, |
|
"eval_loss": 0.6060196161270142, |
|
"eval_runtime": 3.0399, |
|
"eval_samples_per_second": 32.896, |
|
"eval_steps_per_second": 4.276, |
|
"step": 1725 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 69.0, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5574981570243835, |
|
"eval_runtime": 3.0343, |
|
"eval_samples_per_second": 32.956, |
|
"eval_steps_per_second": 4.284, |
|
"step": 1750 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 70.0, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 71.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5885199904441833, |
|
"eval_runtime": 3.0375, |
|
"eval_samples_per_second": 32.922, |
|
"eval_steps_per_second": 4.28, |
|
"step": 1775 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 71.0, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5570526123046875, |
|
"eval_runtime": 3.0354, |
|
"eval_samples_per_second": 32.945, |
|
"eval_steps_per_second": 4.283, |
|
"step": 1800 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 72.0, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 73.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5845230221748352, |
|
"eval_runtime": 3.0345, |
|
"eval_samples_per_second": 32.954, |
|
"eval_steps_per_second": 4.284, |
|
"step": 1825 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 73.0, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 74.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5710142254829407, |
|
"eval_runtime": 3.0367, |
|
"eval_samples_per_second": 32.93, |
|
"eval_steps_per_second": 4.281, |
|
"step": 1850 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 74.0, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 75.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5680321455001831, |
|
"eval_runtime": 3.0337, |
|
"eval_samples_per_second": 32.963, |
|
"eval_steps_per_second": 4.285, |
|
"step": 1875 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 75.0, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5799435973167419, |
|
"eval_runtime": 3.0373, |
|
"eval_samples_per_second": 32.924, |
|
"eval_steps_per_second": 4.28, |
|
"step": 1900 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 76.0, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 77.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.563614010810852, |
|
"eval_runtime": 3.0394, |
|
"eval_samples_per_second": 32.901, |
|
"eval_steps_per_second": 4.277, |
|
"step": 1925 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 77.0, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 78.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5738111734390259, |
|
"eval_runtime": 3.0401, |
|
"eval_samples_per_second": 32.893, |
|
"eval_steps_per_second": 4.276, |
|
"step": 1950 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 78.0, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 79.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5750021934509277, |
|
"eval_runtime": 3.0413, |
|
"eval_samples_per_second": 32.881, |
|
"eval_steps_per_second": 4.275, |
|
"step": 1975 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 79.0, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"learning_rate": 0.0, |
|
"loss": 0.194, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.73, |
|
"eval_loss": 0.5758109092712402, |
|
"eval_runtime": 3.0331, |
|
"eval_samples_per_second": 32.97, |
|
"eval_steps_per_second": 4.286, |
|
"step": 2000 |
|
}, |
|
{ |
|
"best_epoch": 56, |
|
"best_eval_accuracy": 0.77, |
|
"epoch": 80.0, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"step": 2000, |
|
"total_flos": 2.9821702864896e+16, |
|
"train_loss": 0.3481477241516113, |
|
"train_runtime": 1624.5487, |
|
"train_samples_per_second": 19.698, |
|
"train_steps_per_second": 1.231 |
|
} |
|
], |
|
"max_steps": 2000, |
|
"num_train_epochs": 80, |
|
"total_flos": 2.9821702864896e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|