|
{ |
|
"best_metric": 0.6855193376541138, |
|
"best_model_checkpoint": "hBERTv1_new_pretrain_w_init__wnli/checkpoint-45", |
|
"epoch": 14.0, |
|
"global_step": 70, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00049, |
|
"loss": 12.3688, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 6.223592281341553, |
|
"eval_runtime": 0.1345, |
|
"eval_samples_per_second": 527.809, |
|
"eval_steps_per_second": 7.434, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00048, |
|
"loss": 3.5093, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.7491196393966675, |
|
"eval_runtime": 0.1348, |
|
"eval_samples_per_second": 526.695, |
|
"eval_steps_per_second": 7.418, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00047, |
|
"loss": 1.9112, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 2.5145533084869385, |
|
"eval_runtime": 0.1344, |
|
"eval_samples_per_second": 528.182, |
|
"eval_steps_per_second": 7.439, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00046, |
|
"loss": 1.4995, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 1.8103646039962769, |
|
"eval_runtime": 0.1343, |
|
"eval_samples_per_second": 528.656, |
|
"eval_steps_per_second": 7.446, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00045000000000000004, |
|
"loss": 1.3047, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6936343908309937, |
|
"eval_runtime": 0.1346, |
|
"eval_samples_per_second": 527.436, |
|
"eval_steps_per_second": 7.429, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00044, |
|
"loss": 1.4685, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.7440030574798584, |
|
"eval_runtime": 0.1342, |
|
"eval_samples_per_second": 528.98, |
|
"eval_steps_per_second": 7.45, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00043, |
|
"loss": 0.924, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 1.1065967082977295, |
|
"eval_runtime": 0.1344, |
|
"eval_samples_per_second": 528.272, |
|
"eval_steps_per_second": 7.44, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00042, |
|
"loss": 0.8423, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.8221006989479065, |
|
"eval_runtime": 0.1346, |
|
"eval_samples_per_second": 527.644, |
|
"eval_steps_per_second": 7.432, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.00041, |
|
"loss": 0.8166, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6855193376541138, |
|
"eval_runtime": 0.1345, |
|
"eval_samples_per_second": 528.041, |
|
"eval_steps_per_second": 7.437, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.0004, |
|
"loss": 0.7552, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.718089759349823, |
|
"eval_runtime": 0.1345, |
|
"eval_samples_per_second": 527.974, |
|
"eval_steps_per_second": 7.436, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.00039000000000000005, |
|
"loss": 0.7515, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6951475739479065, |
|
"eval_runtime": 0.1352, |
|
"eval_samples_per_second": 525.152, |
|
"eval_steps_per_second": 7.397, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00038, |
|
"loss": 0.7127, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.7139633893966675, |
|
"eval_runtime": 0.1349, |
|
"eval_samples_per_second": 526.347, |
|
"eval_steps_per_second": 7.413, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.00037, |
|
"loss": 0.7112, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6901406645774841, |
|
"eval_runtime": 0.1345, |
|
"eval_samples_per_second": 527.704, |
|
"eval_steps_per_second": 7.432, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.00035999999999999997, |
|
"loss": 0.6976, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.43661971830985913, |
|
"eval_loss": 0.700924277305603, |
|
"eval_runtime": 0.1352, |
|
"eval_samples_per_second": 525.153, |
|
"eval_steps_per_second": 7.397, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"step": 70, |
|
"total_flos": 1316580365434880.0, |
|
"train_loss": 2.0194963932037355, |
|
"train_runtime": 90.6818, |
|
"train_samples_per_second": 350.126, |
|
"train_steps_per_second": 2.757 |
|
} |
|
], |
|
"max_steps": 250, |
|
"num_train_epochs": 50, |
|
"total_flos": 1316580365434880.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|