|
{ |
|
"best_metric": 0.15715740621089935, |
|
"best_model_checkpoint": "/data/jcanete/all_results/mldoc/albeto_base/epochs_4_bs_16_lr_5e-5/checkpoint-1400", |
|
"epoch": 4.0, |
|
"global_step": 2368, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.7229999899864197, |
|
"eval_loss": 1.0613000392913818, |
|
"eval_runtime": 4.4363, |
|
"eval_samples_per_second": 225.412, |
|
"eval_steps_per_second": 14.201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_accuracy": 0.7990000247955322, |
|
"eval_loss": 0.672461748123169, |
|
"eval_runtime": 4.3549, |
|
"eval_samples_per_second": 229.626, |
|
"eval_steps_per_second": 14.466, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_accuracy": 0.8479999899864197, |
|
"eval_loss": 0.523521900177002, |
|
"eval_runtime": 4.3517, |
|
"eval_samples_per_second": 229.796, |
|
"eval_steps_per_second": 14.477, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"eval_accuracy": 0.8930000066757202, |
|
"eval_loss": 0.4281160831451416, |
|
"eval_runtime": 4.3489, |
|
"eval_samples_per_second": 229.946, |
|
"eval_steps_per_second": 14.487, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.94847972972973e-05, |
|
"loss": 0.1597, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_accuracy": 0.9039999842643738, |
|
"eval_loss": 0.4125765860080719, |
|
"eval_runtime": 4.3451, |
|
"eval_samples_per_second": 230.146, |
|
"eval_steps_per_second": 14.499, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_accuracy": 0.9020000100135803, |
|
"eval_loss": 0.40425220131874084, |
|
"eval_runtime": 4.3435, |
|
"eval_samples_per_second": 230.229, |
|
"eval_steps_per_second": 14.504, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_accuracy": 0.9340000152587891, |
|
"eval_loss": 0.3148828148841858, |
|
"eval_runtime": 4.3516, |
|
"eval_samples_per_second": 229.803, |
|
"eval_steps_per_second": 14.478, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_accuracy": 0.9399999976158142, |
|
"eval_loss": 0.30127567052841187, |
|
"eval_runtime": 4.3657, |
|
"eval_samples_per_second": 229.057, |
|
"eval_steps_per_second": 14.431, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_accuracy": 0.949999988079071, |
|
"eval_loss": 0.18032802641391754, |
|
"eval_runtime": 4.3502, |
|
"eval_samples_per_second": 229.873, |
|
"eval_steps_per_second": 14.482, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.8927364864864863e-05, |
|
"loss": 0.0779, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_accuracy": 0.906000018119812, |
|
"eval_loss": 0.4576403498649597, |
|
"eval_runtime": 4.3481, |
|
"eval_samples_per_second": 229.983, |
|
"eval_steps_per_second": 14.489, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_accuracy": 0.9229999780654907, |
|
"eval_loss": 0.30020907521247864, |
|
"eval_runtime": 4.3386, |
|
"eval_samples_per_second": 230.489, |
|
"eval_steps_per_second": 14.521, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_accuracy": 0.9449999928474426, |
|
"eval_loss": 0.23847746849060059, |
|
"eval_runtime": 4.3357, |
|
"eval_samples_per_second": 230.642, |
|
"eval_steps_per_second": 14.53, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"eval_accuracy": 0.9580000042915344, |
|
"eval_loss": 0.2000477910041809, |
|
"eval_runtime": 4.3536, |
|
"eval_samples_per_second": 229.694, |
|
"eval_steps_per_second": 14.471, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_accuracy": 0.9639999866485596, |
|
"eval_loss": 0.15715740621089935, |
|
"eval_runtime": 4.3678, |
|
"eval_samples_per_second": 228.95, |
|
"eval_steps_per_second": 14.424, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 1.8369932432432433e-05, |
|
"loss": 0.0475, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_accuracy": 0.9620000123977661, |
|
"eval_loss": 0.19045613706111908, |
|
"eval_runtime": 4.3587, |
|
"eval_samples_per_second": 229.426, |
|
"eval_steps_per_second": 14.454, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_accuracy": 0.9639999866485596, |
|
"eval_loss": 0.20174138247966766, |
|
"eval_runtime": 4.348, |
|
"eval_samples_per_second": 229.989, |
|
"eval_steps_per_second": 14.489, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"eval_accuracy": 0.9520000219345093, |
|
"eval_loss": 0.23974017798900604, |
|
"eval_runtime": 4.3452, |
|
"eval_samples_per_second": 230.141, |
|
"eval_steps_per_second": 14.499, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"eval_accuracy": 0.9549999833106995, |
|
"eval_loss": 0.1878175288438797, |
|
"eval_runtime": 4.3435, |
|
"eval_samples_per_second": 230.227, |
|
"eval_steps_per_second": 14.504, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"eval_accuracy": 0.9520000219345093, |
|
"eval_loss": 0.2655267119407654, |
|
"eval_runtime": 4.3468, |
|
"eval_samples_per_second": 230.053, |
|
"eval_steps_per_second": 14.493, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 7.8125e-06, |
|
"loss": 0.0362, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_accuracy": 0.9629999995231628, |
|
"eval_loss": 0.1756611317396164, |
|
"eval_runtime": 4.3509, |
|
"eval_samples_per_second": 229.836, |
|
"eval_steps_per_second": 14.48, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 3.55, |
|
"eval_accuracy": 0.9620000123977661, |
|
"eval_loss": 0.19551004469394684, |
|
"eval_runtime": 4.3485, |
|
"eval_samples_per_second": 229.966, |
|
"eval_steps_per_second": 14.488, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_accuracy": 0.9559999704360962, |
|
"eval_loss": 0.21292370557785034, |
|
"eval_runtime": 4.357, |
|
"eval_samples_per_second": 229.514, |
|
"eval_steps_per_second": 14.459, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 3.89, |
|
"eval_accuracy": 0.9559999704360962, |
|
"eval_loss": 0.22236208617687225, |
|
"eval_runtime": 4.3612, |
|
"eval_samples_per_second": 229.297, |
|
"eval_steps_per_second": 14.446, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 2368, |
|
"total_flos": 739476514563456.0, |
|
"train_loss": 0.07163469533662538, |
|
"train_runtime": 1355.547, |
|
"train_samples_per_second": 27.909, |
|
"train_steps_per_second": 1.747 |
|
} |
|
], |
|
"max_steps": 2368, |
|
"num_train_epochs": 4, |
|
"total_flos": 739476514563456.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|