|
{ |
|
"best_metric": 0.20911003649234772, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_xlarge/epochs_3_bs_16_lr_5e-6/checkpoint-1500", |
|
"epoch": 2.9991546914623837, |
|
"global_step": 1773, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.2770000100135803, |
|
"eval_loss": 1.3528416156768799, |
|
"eval_runtime": 24.2204, |
|
"eval_samples_per_second": 41.287, |
|
"eval_steps_per_second": 5.161, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.47999998927116394, |
|
"eval_loss": 1.0430142879486084, |
|
"eval_runtime": 23.7736, |
|
"eval_samples_per_second": 42.063, |
|
"eval_steps_per_second": 5.258, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.5640000104904175, |
|
"eval_loss": 1.0979323387145996, |
|
"eval_runtime": 23.5496, |
|
"eval_samples_per_second": 42.464, |
|
"eval_steps_per_second": 5.308, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.5720000267028809, |
|
"eval_loss": 0.9388349652290344, |
|
"eval_runtime": 25.4729, |
|
"eval_samples_per_second": 39.257, |
|
"eval_steps_per_second": 4.907, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.601240834743373e-06, |
|
"loss": 0.2676, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_accuracy": 0.7160000205039978, |
|
"eval_loss": 0.6925698518753052, |
|
"eval_runtime": 24.8675, |
|
"eval_samples_per_second": 40.213, |
|
"eval_steps_per_second": 5.027, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_accuracy": 0.8560000061988831, |
|
"eval_loss": 0.673073947429657, |
|
"eval_runtime": 25.8013, |
|
"eval_samples_per_second": 38.758, |
|
"eval_steps_per_second": 4.845, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9369999766349792, |
|
"eval_loss": 0.3746216893196106, |
|
"eval_runtime": 25.6232, |
|
"eval_samples_per_second": 39.027, |
|
"eval_steps_per_second": 4.878, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.9350000023841858, |
|
"eval_loss": 0.35705798864364624, |
|
"eval_runtime": 25.7852, |
|
"eval_samples_per_second": 38.782, |
|
"eval_steps_per_second": 4.848, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.9409999847412109, |
|
"eval_loss": 0.27689608931541443, |
|
"eval_runtime": 24.8438, |
|
"eval_samples_per_second": 40.252, |
|
"eval_steps_per_second": 5.031, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.1912013536379022e-06, |
|
"loss": 0.1019, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.9350000023841858, |
|
"eval_loss": 0.291538268327713, |
|
"eval_runtime": 26.3153, |
|
"eval_samples_per_second": 38.001, |
|
"eval_steps_per_second": 4.75, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.9509999752044678, |
|
"eval_loss": 0.21956732869148254, |
|
"eval_runtime": 24.9343, |
|
"eval_samples_per_second": 40.105, |
|
"eval_steps_per_second": 5.013, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.9520000219345093, |
|
"eval_loss": 0.2259828746318817, |
|
"eval_runtime": 25.7253, |
|
"eval_samples_per_second": 38.872, |
|
"eval_steps_per_second": 4.859, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9509999752044678, |
|
"eval_loss": 0.256782591342926, |
|
"eval_runtime": 25.7431, |
|
"eval_samples_per_second": 38.845, |
|
"eval_steps_per_second": 4.856, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"eval_accuracy": 0.9549999833106995, |
|
"eval_loss": 0.2290709912776947, |
|
"eval_runtime": 24.8624, |
|
"eval_samples_per_second": 40.221, |
|
"eval_steps_per_second": 5.028, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 7.81161872532431e-07, |
|
"loss": 0.0593, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_accuracy": 0.9589999914169312, |
|
"eval_loss": 0.20911003649234772, |
|
"eval_runtime": 25.438, |
|
"eval_samples_per_second": 39.311, |
|
"eval_steps_per_second": 4.914, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"eval_accuracy": 0.9490000009536743, |
|
"eval_loss": 0.2575547695159912, |
|
"eval_runtime": 25.2822, |
|
"eval_samples_per_second": 39.554, |
|
"eval_steps_per_second": 4.944, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"eval_accuracy": 0.9509999752044678, |
|
"eval_loss": 0.23298123478889465, |
|
"eval_runtime": 25.3064, |
|
"eval_samples_per_second": 39.516, |
|
"eval_steps_per_second": 4.939, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1773, |
|
"total_flos": 3375408735295488.0, |
|
"train_loss": 0.1292072490857808, |
|
"train_runtime": 3156.6951, |
|
"train_samples_per_second": 8.989, |
|
"train_steps_per_second": 0.562 |
|
} |
|
], |
|
"max_steps": 1773, |
|
"num_train_epochs": 3, |
|
"total_flos": 3375408735295488.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|