|
{ |
|
"best_metric": 0.4098166227340698, |
|
"best_model_checkpoint": "hBERTv1_new_pretrain_48_KD_sst2/checkpoint-527", |
|
"epoch": 6.0, |
|
"global_step": 3162, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 3.9200000000000004e-05, |
|
"loss": 0.3947, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8165137614678899, |
|
"eval_loss": 0.4098166227340698, |
|
"eval_runtime": 1.8008, |
|
"eval_samples_per_second": 484.227, |
|
"eval_steps_per_second": 3.887, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 3.8400000000000005e-05, |
|
"loss": 0.2426, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.8256880733944955, |
|
"eval_loss": 0.47963330149650574, |
|
"eval_runtime": 1.8005, |
|
"eval_samples_per_second": 484.31, |
|
"eval_steps_per_second": 3.888, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 3.76e-05, |
|
"loss": 0.1948, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8188073394495413, |
|
"eval_loss": 0.4834706783294678, |
|
"eval_runtime": 1.8087, |
|
"eval_samples_per_second": 482.113, |
|
"eval_steps_per_second": 3.87, |
|
"step": 1581 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 3.680000000000001e-05, |
|
"loss": 0.1702, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8027522935779816, |
|
"eval_loss": 0.5115558505058289, |
|
"eval_runtime": 1.8015, |
|
"eval_samples_per_second": 484.037, |
|
"eval_steps_per_second": 3.886, |
|
"step": 2108 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 3.6e-05, |
|
"loss": 0.1484, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8084862385321101, |
|
"eval_loss": 0.5547378063201904, |
|
"eval_runtime": 1.8032, |
|
"eval_samples_per_second": 483.586, |
|
"eval_steps_per_second": 3.882, |
|
"step": 2635 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.52e-05, |
|
"loss": 0.1355, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7993119266055045, |
|
"eval_loss": 0.6597509980201721, |
|
"eval_runtime": 1.8009, |
|
"eval_samples_per_second": 484.198, |
|
"eval_steps_per_second": 3.887, |
|
"step": 3162 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"step": 3162, |
|
"total_flos": 5.984501559302554e+16, |
|
"train_loss": 0.21436870558061305, |
|
"train_runtime": 2799.5615, |
|
"train_samples_per_second": 1202.849, |
|
"train_steps_per_second": 9.412 |
|
} |
|
], |
|
"max_steps": 26350, |
|
"num_train_epochs": 50, |
|
"total_flos": 5.984501559302554e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|