|
{ |
|
"best_metric": 0.9573363661766052, |
|
"best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_qnli/checkpoint-1638", |
|
"epoch": 7.0, |
|
"global_step": 5733, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0984, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.6220025626944902, |
|
"eval_loss": 0.9626317620277405, |
|
"eval_runtime": 12.6845, |
|
"eval_samples_per_second": 430.682, |
|
"eval_steps_per_second": 3.39, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 1.0171, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.615595826468973, |
|
"eval_loss": 0.9573363661766052, |
|
"eval_runtime": 12.7142, |
|
"eval_samples_per_second": 429.676, |
|
"eval_steps_per_second": 3.382, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.9717, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.6104704374885594, |
|
"eval_loss": 0.9651486277580261, |
|
"eval_runtime": 12.7327, |
|
"eval_samples_per_second": 429.053, |
|
"eval_steps_per_second": 3.377, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.9377, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6024162548050521, |
|
"eval_loss": 0.9713066220283508, |
|
"eval_runtime": 12.6949, |
|
"eval_samples_per_second": 430.33, |
|
"eval_steps_per_second": 3.387, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.9132, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5987552626761853, |
|
"eval_loss": 0.9811589121818542, |
|
"eval_runtime": 12.741, |
|
"eval_samples_per_second": 428.772, |
|
"eval_steps_per_second": 3.375, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.4000000000000006e-05, |
|
"loss": 0.89, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5982061138568552, |
|
"eval_loss": 1.0107508897781372, |
|
"eval_runtime": 12.8729, |
|
"eval_samples_per_second": 424.379, |
|
"eval_steps_per_second": 3.34, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3e-05, |
|
"loss": 0.8683, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5914332784184514, |
|
"eval_loss": 1.0290495157241821, |
|
"eval_runtime": 12.6935, |
|
"eval_samples_per_second": 430.378, |
|
"eval_steps_per_second": 3.388, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"step": 5733, |
|
"total_flos": 2.2988974586855424e+16, |
|
"train_loss": 0.9566188341823108, |
|
"train_runtime": 4545.7624, |
|
"train_samples_per_second": 1152.095, |
|
"train_steps_per_second": 9.008 |
|
} |
|
], |
|
"max_steps": 40950, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.2988974586855424e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|