|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 3.486544132232666, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3438, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9492119089316988, |
|
"eval_f1": 0.7840440165061899, |
|
"eval_loss": 0.16529129445552826, |
|
"eval_precision": 0.7345360824742269, |
|
"eval_recall": 0.8407079646017699, |
|
"eval_runtime": 0.8917, |
|
"eval_samples_per_second": 209.705, |
|
"eval_steps_per_second": 3.364, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.105318307876587, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1133, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9569677257943457, |
|
"eval_f1": 0.8272108843537416, |
|
"eval_loss": 0.13404884934425354, |
|
"eval_precision": 0.7676767676767676, |
|
"eval_recall": 0.8967551622418879, |
|
"eval_runtime": 0.9428, |
|
"eval_samples_per_second": 198.343, |
|
"eval_steps_per_second": 3.182, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.1835291385650635, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.0736, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9589692269201902, |
|
"eval_f1": 0.8222222222222222, |
|
"eval_loss": 0.1448056399822235, |
|
"eval_precision": 0.7769028871391076, |
|
"eval_recall": 0.8731563421828908, |
|
"eval_runtime": 0.893, |
|
"eval_samples_per_second": 209.417, |
|
"eval_steps_per_second": 3.36, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 4.041090965270996, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0473, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.961220915686765, |
|
"eval_f1": 0.8274894810659187, |
|
"eval_loss": 0.158503457903862, |
|
"eval_precision": 0.7887700534759359, |
|
"eval_recall": 0.8702064896755162, |
|
"eval_runtime": 0.9078, |
|
"eval_samples_per_second": 206.001, |
|
"eval_steps_per_second": 3.305, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 2.1877193450927734, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0311, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9604703527645734, |
|
"eval_f1": 0.8344923504867873, |
|
"eval_loss": 0.18446023762226105, |
|
"eval_precision": 0.7894736842105263, |
|
"eval_recall": 0.8849557522123894, |
|
"eval_runtime": 0.911, |
|
"eval_samples_per_second": 205.279, |
|
"eval_steps_per_second": 3.293, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 2.7992465496063232, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0179, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9602201651238429, |
|
"eval_f1": 0.8263305322128851, |
|
"eval_loss": 0.21451354026794434, |
|
"eval_precision": 0.7866666666666666, |
|
"eval_recall": 0.8702064896755162, |
|
"eval_runtime": 0.9051, |
|
"eval_samples_per_second": 206.598, |
|
"eval_steps_per_second": 3.314, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 0.3002428412437439, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.0126, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9567175381536153, |
|
"eval_f1": 0.82336578581363, |
|
"eval_loss": 0.22246094048023224, |
|
"eval_precision": 0.7789473684210526, |
|
"eval_recall": 0.8731563421828908, |
|
"eval_runtime": 0.9044, |
|
"eval_samples_per_second": 206.757, |
|
"eval_steps_per_second": 3.317, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.2764192223548889, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0091, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9582186639979985, |
|
"eval_f1": 0.8326417704011065, |
|
"eval_loss": 0.2556192874908447, |
|
"eval_precision": 0.7838541666666666, |
|
"eval_recall": 0.887905604719764, |
|
"eval_runtime": 0.9143, |
|
"eval_samples_per_second": 204.535, |
|
"eval_steps_per_second": 3.281, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.3533385396003723, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0041, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9609707280460346, |
|
"eval_f1": 0.8321579689703806, |
|
"eval_loss": 0.2573556900024414, |
|
"eval_precision": 0.7972972972972973, |
|
"eval_recall": 0.8702064896755162, |
|
"eval_runtime": 0.9137, |
|
"eval_samples_per_second": 204.666, |
|
"eval_steps_per_second": 3.283, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.1199791431427, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0036, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9554665999499625, |
|
"eval_f1": 0.817174515235457, |
|
"eval_loss": 0.3124300241470337, |
|
"eval_precision": 0.7702349869451697, |
|
"eval_recall": 0.8702064896755162, |
|
"eval_runtime": 0.9012, |
|
"eval_samples_per_second": 207.51, |
|
"eval_steps_per_second": 3.329, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.014265856705605984, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0038, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.960720540405304, |
|
"eval_f1": 0.8324022346368715, |
|
"eval_loss": 0.2836814224720001, |
|
"eval_precision": 0.7904509283819628, |
|
"eval_recall": 0.8790560471976401, |
|
"eval_runtime": 0.916, |
|
"eval_samples_per_second": 204.138, |
|
"eval_steps_per_second": 3.275, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.1654541492462158, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0017, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9574681010758068, |
|
"eval_f1": 0.825, |
|
"eval_loss": 0.3034752905368805, |
|
"eval_precision": 0.7795275590551181, |
|
"eval_recall": 0.8761061946902655, |
|
"eval_runtime": 0.91, |
|
"eval_samples_per_second": 205.506, |
|
"eval_steps_per_second": 3.297, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.017983168363571167, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0015, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9604703527645734, |
|
"eval_f1": 0.8333333333333335, |
|
"eval_loss": 0.3068053126335144, |
|
"eval_precision": 0.7874015748031497, |
|
"eval_recall": 0.8849557522123894, |
|
"eval_runtime": 0.902, |
|
"eval_samples_per_second": 207.324, |
|
"eval_steps_per_second": 3.326, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.011942153796553612, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0012, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9577182887165374, |
|
"eval_f1": 0.8326417704011065, |
|
"eval_loss": 0.32863807678222656, |
|
"eval_precision": 0.7838541666666666, |
|
"eval_recall": 0.887905604719764, |
|
"eval_runtime": 0.8986, |
|
"eval_samples_per_second": 208.111, |
|
"eval_steps_per_second": 3.339, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.0062804995104670525, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0006, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.960720540405304, |
|
"eval_f1": 0.840782122905028, |
|
"eval_loss": 0.3137037754058838, |
|
"eval_precision": 0.7984084880636605, |
|
"eval_recall": 0.887905604719764, |
|
"eval_runtime": 0.9082, |
|
"eval_samples_per_second": 205.909, |
|
"eval_steps_per_second": 3.303, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.2206662893295288, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0008, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9617212909682261, |
|
"eval_f1": 0.8382559774964837, |
|
"eval_loss": 0.3065112233161926, |
|
"eval_precision": 0.8010752688172043, |
|
"eval_recall": 0.8790560471976401, |
|
"eval_runtime": 0.9053, |
|
"eval_samples_per_second": 206.56, |
|
"eval_steps_per_second": 3.314, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 0.00254653743468225, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0014, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.9589692269201902, |
|
"eval_f1": 0.8365650969529086, |
|
"eval_loss": 0.33052197098731995, |
|
"eval_precision": 0.7885117493472585, |
|
"eval_recall": 0.8908554572271387, |
|
"eval_runtime": 0.9027, |
|
"eval_samples_per_second": 207.161, |
|
"eval_steps_per_second": 3.323, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.0013508679112419486, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0005, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9597197898423818, |
|
"eval_f1": 0.8344923504867873, |
|
"eval_loss": 0.3244776129722595, |
|
"eval_precision": 0.7894736842105263, |
|
"eval_recall": 0.8849557522123894, |
|
"eval_runtime": 0.8996, |
|
"eval_samples_per_second": 207.879, |
|
"eval_steps_per_second": 3.335, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0013845202047377825, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0004, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9602201651238429, |
|
"eval_f1": 0.8372739916550764, |
|
"eval_loss": 0.32481154799461365, |
|
"eval_precision": 0.7921052631578948, |
|
"eval_recall": 0.887905604719764, |
|
"eval_runtime": 0.9114, |
|
"eval_samples_per_second": 205.168, |
|
"eval_steps_per_second": 3.291, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.003232144983485341, |
|
"learning_rate": 0.0, |
|
"loss": 0.0003, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9604703527645734, |
|
"eval_f1": 0.8384401114206128, |
|
"eval_loss": 0.3243328630924225, |
|
"eval_precision": 0.7941952506596306, |
|
"eval_recall": 0.887905604719764, |
|
"eval_runtime": 0.9046, |
|
"eval_samples_per_second": 206.727, |
|
"eval_steps_per_second": 3.316, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 898035701398080.0, |
|
"train_loss": 0.03343723254489168, |
|
"train_runtime": 509.8629, |
|
"train_samples_per_second": 66.175, |
|
"train_steps_per_second": 4.158 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 898035701398080.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|