|
{ |
|
"best_metric": 0.6930651664733887, |
|
"best_model_checkpoint": "hBERTv1_no_pretrain_qnli/checkpoint-9009", |
|
"epoch": 16.0, |
|
"global_step": 13104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00049002442002442, |
|
"loss": 0.715, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.4946000366099213, |
|
"eval_loss": 0.6931389570236206, |
|
"eval_runtime": 9.5675, |
|
"eval_samples_per_second": 570.995, |
|
"eval_steps_per_second": 4.494, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00048002442002442004, |
|
"loss": 0.6932, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.4946000366099213, |
|
"eval_loss": 0.6931495070457458, |
|
"eval_runtime": 9.5694, |
|
"eval_samples_per_second": 570.88, |
|
"eval_steps_per_second": 4.493, |
|
"step": 1638 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00047002442002442, |
|
"loss": 0.6936, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6931167840957642, |
|
"eval_runtime": 9.5644, |
|
"eval_samples_per_second": 571.18, |
|
"eval_steps_per_second": 4.496, |
|
"step": 2457 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.00046002442002442004, |
|
"loss": 0.6932, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.4946000366099213, |
|
"eval_loss": 0.6931969523429871, |
|
"eval_runtime": 9.5713, |
|
"eval_samples_per_second": 570.77, |
|
"eval_steps_per_second": 4.493, |
|
"step": 3276 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 0.00045002442002442, |
|
"loss": 0.6932, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6933488249778748, |
|
"eval_runtime": 9.573, |
|
"eval_samples_per_second": 570.669, |
|
"eval_steps_per_second": 4.492, |
|
"step": 4095 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 0.00044002442002442004, |
|
"loss": 0.6932, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6930915117263794, |
|
"eval_runtime": 9.5567, |
|
"eval_samples_per_second": 571.639, |
|
"eval_steps_per_second": 4.499, |
|
"step": 4914 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 0.00043002442002442007, |
|
"loss": 0.6932, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6931073069572449, |
|
"eval_runtime": 9.5808, |
|
"eval_samples_per_second": 570.202, |
|
"eval_steps_per_second": 4.488, |
|
"step": 5733 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 0.00042002442002442005, |
|
"loss": 0.6932, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6930862069129944, |
|
"eval_runtime": 9.5904, |
|
"eval_samples_per_second": 569.63, |
|
"eval_steps_per_second": 4.484, |
|
"step": 6552 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 0.00041003663003663003, |
|
"loss": 0.6935, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.693478524684906, |
|
"eval_runtime": 9.5344, |
|
"eval_samples_per_second": 572.981, |
|
"eval_steps_per_second": 4.51, |
|
"step": 7371 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 0.00040003663003663006, |
|
"loss": 0.6932, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6930915117263794, |
|
"eval_runtime": 9.5321, |
|
"eval_samples_per_second": 573.116, |
|
"eval_steps_per_second": 4.511, |
|
"step": 8190 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 0.00039003663003663004, |
|
"loss": 0.6932, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6930651664733887, |
|
"eval_runtime": 9.5448, |
|
"eval_samples_per_second": 572.352, |
|
"eval_steps_per_second": 4.505, |
|
"step": 9009 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 0.00038004884004884, |
|
"loss": 0.6932, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6931073069572449, |
|
"eval_runtime": 9.5697, |
|
"eval_samples_per_second": 570.866, |
|
"eval_steps_per_second": 4.493, |
|
"step": 9828 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 0.00037004884004884005, |
|
"loss": 0.6932, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5053999633900788, |
|
"eval_loss": 0.6931073069572449, |
|
"eval_runtime": 9.5909, |
|
"eval_samples_per_second": 569.603, |
|
"eval_steps_per_second": 4.483, |
|
"step": 10647 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 0.00036004884004884, |
|
"loss": 0.6932, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.4946000366099213, |
|
"eval_loss": 0.6931231617927551, |
|
"eval_runtime": 9.5446, |
|
"eval_samples_per_second": 572.363, |
|
"eval_steps_per_second": 4.505, |
|
"step": 11466 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 0.00035004884004884005, |
|
"loss": 0.6932, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.4946000366099213, |
|
"eval_loss": 0.693396270275116, |
|
"eval_runtime": 9.5573, |
|
"eval_samples_per_second": 571.603, |
|
"eval_steps_per_second": 4.499, |
|
"step": 12285 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 0.0003400488400488401, |
|
"loss": 0.6932, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.4946000366099213, |
|
"eval_loss": 0.6931284070014954, |
|
"eval_runtime": 9.5555, |
|
"eval_samples_per_second": 571.71, |
|
"eval_steps_per_second": 4.5, |
|
"step": 13104 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 13104, |
|
"total_flos": 2.481936015514665e+17, |
|
"train_loss": 0.694585192334521, |
|
"train_runtime": 8903.1517, |
|
"train_samples_per_second": 588.236, |
|
"train_steps_per_second": 4.599 |
|
} |
|
], |
|
"max_steps": 40950, |
|
"num_train_epochs": 50, |
|
"total_flos": 2.481936015514665e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|