|
{ |
|
"best_metric": 0.3634186694531522, |
|
"best_model_checkpoint": "/xdisk/msurdeanu/enoriega/kw_pubmed/kw_pubmed_1000_0.0003/checkpoint-12", |
|
"epoch": 1.1524390243902438, |
|
"global_step": 52, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.09, |
|
"eval_accuracy": 0.3436084987809126, |
|
"eval_loss": 4.372271537780762, |
|
"eval_runtime": 16.6642, |
|
"eval_samples_per_second": 600.088, |
|
"eval_steps_per_second": 18.783, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029466666666666666, |
|
"loss": 6.0386, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.34417450365726227, |
|
"eval_loss": 4.2112579345703125, |
|
"eval_runtime": 16.5968, |
|
"eval_samples_per_second": 602.524, |
|
"eval_steps_per_second": 18.859, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002893333333333333, |
|
"loss": 3.7573, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_accuracy": 0.3634186694531522, |
|
"eval_loss": 4.2079362869262695, |
|
"eval_runtime": 16.5847, |
|
"eval_samples_per_second": 602.965, |
|
"eval_steps_per_second": 18.873, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00028266666666666663, |
|
"loss": 2.9944, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_accuracy": 0.3512713340299547, |
|
"eval_loss": 4.3369622230529785, |
|
"eval_runtime": 16.6084, |
|
"eval_samples_per_second": 602.106, |
|
"eval_steps_per_second": 18.846, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.000276, |
|
"loss": 2.7048, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_accuracy": 0.30673110414489724, |
|
"eval_loss": 4.859361171722412, |
|
"eval_runtime": 16.6048, |
|
"eval_samples_per_second": 602.234, |
|
"eval_steps_per_second": 18.85, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_accuracy": 0.3382967607105538, |
|
"eval_loss": 4.492859840393066, |
|
"eval_runtime": 16.5439, |
|
"eval_samples_per_second": 604.454, |
|
"eval_steps_per_second": 18.919, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00027066666666666667, |
|
"loss": 2.9458, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_accuracy": 0.34077847439916403, |
|
"eval_loss": 4.514556884765625, |
|
"eval_runtime": 16.5866, |
|
"eval_samples_per_second": 602.897, |
|
"eval_steps_per_second": 18.871, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00026399999999999997, |
|
"loss": 2.3783, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"eval_accuracy": 0.3429989550679206, |
|
"eval_loss": 4.5680060386657715, |
|
"eval_runtime": 16.5703, |
|
"eval_samples_per_second": 603.491, |
|
"eval_steps_per_second": 18.889, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0002573333333333333, |
|
"loss": 2.2485, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_accuracy": 0.34770114942528735, |
|
"eval_loss": 4.509522914886475, |
|
"eval_runtime": 16.5871, |
|
"eval_samples_per_second": 602.877, |
|
"eval_steps_per_second": 18.87, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00025066666666666667, |
|
"loss": 2.1701, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.3449146638801811, |
|
"eval_loss": 4.4971489906311035, |
|
"eval_runtime": 16.5577, |
|
"eval_samples_per_second": 603.949, |
|
"eval_steps_per_second": 18.904, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.33207070707070707, |
|
"eval_loss": 4.7050604820251465, |
|
"eval_runtime": 16.5693, |
|
"eval_samples_per_second": 603.527, |
|
"eval_steps_per_second": 18.89, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.000244, |
|
"loss": 2.0861, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.3310257749912922, |
|
"eval_loss": 4.761545658111572, |
|
"eval_runtime": 16.5548, |
|
"eval_samples_per_second": 604.054, |
|
"eval_steps_per_second": 18.907, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00023733333333333332, |
|
"loss": 2.4168, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.33938523162661094, |
|
"eval_loss": 4.7085795402526855, |
|
"eval_runtime": 16.5811, |
|
"eval_samples_per_second": 603.096, |
|
"eval_steps_per_second": 18.877, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"step": 52, |
|
"total_flos": 3.18987289303776e+16, |
|
"train_loss": 2.93648067345986, |
|
"train_runtime": 1938.9621, |
|
"train_samples_per_second": 947.283, |
|
"train_steps_per_second": 0.116 |
|
} |
|
], |
|
"max_steps": 225, |
|
"num_train_epochs": 5, |
|
"total_flos": 3.18987289303776e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|