|
{ |
|
"best_metric": 0.06724460422992706, |
|
"best_model_checkpoint": "/data/jcanete/all_results/pos/albeto_xlarge/epochs_4_bs_32_lr_5e-6/checkpoint-1600", |
|
"epoch": 4.0, |
|
"global_step": 1792, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.45, |
|
"eval_accuracy": 0.9701462191423391, |
|
"eval_f1": 0.9659476055664102, |
|
"eval_loss": 0.11576754599809647, |
|
"eval_precision": 0.9638913757087436, |
|
"eval_recall": 0.9680126271203373, |
|
"eval_runtime": 8.2971, |
|
"eval_samples_per_second": 199.347, |
|
"eval_steps_per_second": 6.267, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"eval_accuracy": 0.9774148184544484, |
|
"eval_f1": 0.9743559053233191, |
|
"eval_loss": 0.08359552174806595, |
|
"eval_precision": 0.9728518503764491, |
|
"eval_recall": 0.9758646180895486, |
|
"eval_runtime": 8.2688, |
|
"eval_samples_per_second": 200.028, |
|
"eval_steps_per_second": 6.289, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 3.61328125e-06, |
|
"loss": 0.3126, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_accuracy": 0.9796174243066027, |
|
"eval_f1": 0.9774613052497209, |
|
"eval_loss": 0.07587467133998871, |
|
"eval_precision": 0.9758069333545728, |
|
"eval_recall": 0.9791212962777966, |
|
"eval_runtime": 8.2627, |
|
"eval_samples_per_second": 200.176, |
|
"eval_steps_per_second": 6.293, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_accuracy": 0.981243964012809, |
|
"eval_f1": 0.9795327764971211, |
|
"eval_loss": 0.07019403576850891, |
|
"eval_precision": 0.978487977989553, |
|
"eval_recall": 0.9805798085952329, |
|
"eval_runtime": 8.2592, |
|
"eval_samples_per_second": 200.262, |
|
"eval_steps_per_second": 6.296, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.2181919642857142e-06, |
|
"loss": 0.0602, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"eval_accuracy": 0.9813286796225072, |
|
"eval_f1": 0.9797755051134948, |
|
"eval_loss": 0.06836934387683868, |
|
"eval_precision": 0.9785745605293579, |
|
"eval_recall": 0.9809794010109688, |
|
"eval_runtime": 8.2691, |
|
"eval_samples_per_second": 200.021, |
|
"eval_steps_per_second": 6.288, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_accuracy": 0.981531997085783, |
|
"eval_f1": 0.9803721997704935, |
|
"eval_loss": 0.06962081789970398, |
|
"eval_precision": 0.9792680009568615, |
|
"eval_recall": 0.9814788915306387, |
|
"eval_runtime": 8.2695, |
|
"eval_samples_per_second": 200.012, |
|
"eval_steps_per_second": 6.288, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_accuracy": 0.9818369732806967, |
|
"eval_f1": 0.9806183756162798, |
|
"eval_loss": 0.06813845783472061, |
|
"eval_precision": 0.9796797415598142, |
|
"eval_recall": 0.981558810013786, |
|
"eval_runtime": 8.2599, |
|
"eval_samples_per_second": 200.245, |
|
"eval_steps_per_second": 6.295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 8.231026785714287e-07, |
|
"loss": 0.0439, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_accuracy": 0.9816844851832399, |
|
"eval_f1": 0.980655593707578, |
|
"eval_loss": 0.06724460422992706, |
|
"eval_precision": 0.9798535894520575, |
|
"eval_recall": 0.9814589119098519, |
|
"eval_runtime": 8.2557, |
|
"eval_samples_per_second": 200.345, |
|
"eval_steps_per_second": 6.299, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1792, |
|
"total_flos": 1487933302262400.0, |
|
"train_loss": 0.12245277049286026, |
|
"train_runtime": 1186.9763, |
|
"train_samples_per_second": 48.207, |
|
"train_steps_per_second": 1.51 |
|
} |
|
], |
|
"max_steps": 1792, |
|
"num_train_epochs": 4, |
|
"total_flos": 1487933302262400.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|