|
{ |
|
"best_metric": 0.3455486595630646, |
|
"best_model_checkpoint": "mobilebert_add_GLUE_Experiment_logit_kd_wnli_128/checkpoint-35", |
|
"epoch": 12.0, |
|
"global_step": 60, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 0.3469, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3455769717693329, |
|
"eval_runtime": 0.1243, |
|
"eval_samples_per_second": 571.247, |
|
"eval_steps_per_second": 8.046, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.3467, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3458103537559509, |
|
"eval_runtime": 0.1242, |
|
"eval_samples_per_second": 571.857, |
|
"eval_steps_per_second": 8.054, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.3466, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34594178199768066, |
|
"eval_runtime": 0.1244, |
|
"eval_samples_per_second": 570.716, |
|
"eval_steps_per_second": 8.038, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.3466, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3457295894622803, |
|
"eval_runtime": 0.1244, |
|
"eval_samples_per_second": 570.787, |
|
"eval_steps_per_second": 8.039, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.3466, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34556901454925537, |
|
"eval_runtime": 0.1262, |
|
"eval_samples_per_second": 562.776, |
|
"eval_steps_per_second": 7.926, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.3466, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3455694019794464, |
|
"eval_runtime": 0.1236, |
|
"eval_samples_per_second": 574.427, |
|
"eval_steps_per_second": 8.091, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.3466, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3455486595630646, |
|
"eval_runtime": 0.1248, |
|
"eval_samples_per_second": 569.047, |
|
"eval_steps_per_second": 8.015, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.3466, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34575846791267395, |
|
"eval_runtime": 0.1262, |
|
"eval_samples_per_second": 562.639, |
|
"eval_steps_per_second": 7.924, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.1e-05, |
|
"loss": 0.3466, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.345662385225296, |
|
"eval_runtime": 0.1238, |
|
"eval_samples_per_second": 573.305, |
|
"eval_steps_per_second": 8.075, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.3465, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.34582069516181946, |
|
"eval_runtime": 0.1253, |
|
"eval_samples_per_second": 566.847, |
|
"eval_steps_per_second": 7.984, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 3.9000000000000006e-05, |
|
"loss": 0.3466, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3460276126861572, |
|
"eval_runtime": 0.1243, |
|
"eval_samples_per_second": 571.348, |
|
"eval_steps_per_second": 8.047, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.8e-05, |
|
"loss": 0.3465, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.3459770977497101, |
|
"eval_runtime": 0.1258, |
|
"eval_samples_per_second": 564.353, |
|
"eval_steps_per_second": 7.949, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"step": 60, |
|
"total_flos": 181909497839616.0, |
|
"train_loss": 0.3466017405192057, |
|
"train_runtime": 61.4918, |
|
"train_samples_per_second": 516.329, |
|
"train_steps_per_second": 4.066 |
|
} |
|
], |
|
"max_steps": 250, |
|
"num_train_epochs": 50, |
|
"total_flos": 181909497839616.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|