|
{ |
|
"best_metric": 0.848714292049408, |
|
"best_model_checkpoint": "distilbert_sa_GLUE_Experiment_mnli_384/checkpoint-7670", |
|
"epoch": 10.0, |
|
"global_step": 15340, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.9e-05, |
|
"loss": 1.0075, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5303107488537953, |
|
"eval_loss": 0.9587203860282898, |
|
"eval_runtime": 5.6935, |
|
"eval_samples_per_second": 1723.896, |
|
"eval_steps_per_second": 6.85, |
|
"step": 1534 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 4.8e-05, |
|
"loss": 0.9233, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5728986245542537, |
|
"eval_loss": 0.9005416631698608, |
|
"eval_runtime": 5.5438, |
|
"eval_samples_per_second": 1770.436, |
|
"eval_steps_per_second": 7.035, |
|
"step": 3068 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 4.7e-05, |
|
"loss": 0.8749, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5887926642893531, |
|
"eval_loss": 0.8833683133125305, |
|
"eval_runtime": 5.9778, |
|
"eval_samples_per_second": 1641.912, |
|
"eval_steps_per_second": 6.524, |
|
"step": 4602 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 4.600000000000001e-05, |
|
"loss": 0.8389, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.610697911360163, |
|
"eval_loss": 0.8563822507858276, |
|
"eval_runtime": 5.6621, |
|
"eval_samples_per_second": 1733.461, |
|
"eval_steps_per_second": 6.888, |
|
"step": 6136 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 4.50006518904824e-05, |
|
"loss": 0.8058, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.6141619969434539, |
|
"eval_loss": 0.848714292049408, |
|
"eval_runtime": 5.565, |
|
"eval_samples_per_second": 1763.688, |
|
"eval_steps_per_second": 7.008, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.40013037809648e-05, |
|
"loss": 0.776, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6220071319409067, |
|
"eval_loss": 0.8578256368637085, |
|
"eval_runtime": 5.8496, |
|
"eval_samples_per_second": 1677.901, |
|
"eval_steps_per_second": 6.667, |
|
"step": 9204 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 4.3001955671447194e-05, |
|
"loss": 0.7467, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.6187468160978095, |
|
"eval_loss": 0.8617885708808899, |
|
"eval_runtime": 5.8167, |
|
"eval_samples_per_second": 1687.379, |
|
"eval_steps_per_second": 6.705, |
|
"step": 10738 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 4.20019556714472e-05, |
|
"loss": 0.7171, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6206826286296485, |
|
"eval_loss": 0.8828079104423523, |
|
"eval_runtime": 5.7638, |
|
"eval_samples_per_second": 1702.865, |
|
"eval_steps_per_second": 6.766, |
|
"step": 12272 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 4.10026075619296e-05, |
|
"loss": 0.6876, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6292409577177789, |
|
"eval_loss": 0.890081524848938, |
|
"eval_runtime": 5.7709, |
|
"eval_samples_per_second": 1700.785, |
|
"eval_steps_per_second": 6.758, |
|
"step": 13806 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 4.00026075619296e-05, |
|
"loss": 0.6589, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.62190524707081, |
|
"eval_loss": 0.8952736854553223, |
|
"eval_runtime": 5.7281, |
|
"eval_samples_per_second": 1713.472, |
|
"eval_steps_per_second": 6.808, |
|
"step": 15340 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 15340, |
|
"total_flos": 1.0787244155600896e+17, |
|
"train_loss": 0.8036706678248757, |
|
"train_runtime": 5092.3208, |
|
"train_samples_per_second": 3855.825, |
|
"train_steps_per_second": 15.062 |
|
} |
|
], |
|
"max_steps": 76700, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.0787244155600896e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|