|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.4341020584106445, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.3817, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9515033947623667, |
|
"eval_f1": 0.793193717277487, |
|
"eval_loss": 0.14420253038406372, |
|
"eval_precision": 0.7266187050359713, |
|
"eval_recall": 0.8731988472622478, |
|
"eval_runtime": 0.9468, |
|
"eval_samples_per_second": 197.515, |
|
"eval_steps_per_second": 3.169, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 2.4988839626312256, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.1266, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9551406401551892, |
|
"eval_f1": 0.8083441981747066, |
|
"eval_loss": 0.13850077986717224, |
|
"eval_precision": 0.7380952380952381, |
|
"eval_recall": 0.8933717579250721, |
|
"eval_runtime": 0.9888, |
|
"eval_samples_per_second": 189.116, |
|
"eval_steps_per_second": 3.034, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 2.6240944862365723, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.087, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9568380213385063, |
|
"eval_f1": 0.8100929614873836, |
|
"eval_loss": 0.13670620322227478, |
|
"eval_precision": 0.7512315270935961, |
|
"eval_recall": 0.8789625360230547, |
|
"eval_runtime": 0.9262, |
|
"eval_samples_per_second": 201.901, |
|
"eval_steps_per_second": 3.239, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 0.2754727900028229, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0528, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9595053346265762, |
|
"eval_f1": 0.8163265306122449, |
|
"eval_loss": 0.14675553143024445, |
|
"eval_precision": 0.7731958762886598, |
|
"eval_recall": 0.8645533141210374, |
|
"eval_runtime": 0.9413, |
|
"eval_samples_per_second": 198.665, |
|
"eval_steps_per_second": 3.187, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 3.4952049255371094, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.0424, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.960717749757517, |
|
"eval_f1": 0.8215767634854771, |
|
"eval_loss": 0.1664169430732727, |
|
"eval_precision": 0.7898936170212766, |
|
"eval_recall": 0.8559077809798271, |
|
"eval_runtime": 1.041, |
|
"eval_samples_per_second": 179.63, |
|
"eval_steps_per_second": 2.882, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.5230388641357422, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.0275, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9582929194956353, |
|
"eval_f1": 0.8241610738255033, |
|
"eval_loss": 0.20444069802761078, |
|
"eval_precision": 0.7713567839195979, |
|
"eval_recall": 0.8847262247838616, |
|
"eval_runtime": 0.9522, |
|
"eval_samples_per_second": 196.387, |
|
"eval_steps_per_second": 3.151, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.4965060949325562, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.019, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9553831231813773, |
|
"eval_f1": 0.8089005235602094, |
|
"eval_loss": 0.23766779899597168, |
|
"eval_precision": 0.7410071942446043, |
|
"eval_recall": 0.8904899135446686, |
|
"eval_runtime": 0.9461, |
|
"eval_samples_per_second": 197.656, |
|
"eval_steps_per_second": 3.171, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 2.0428667068481445, |
|
"learning_rate": 3e-05, |
|
"loss": 0.0145, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9587778855480117, |
|
"eval_f1": 0.8279569892473119, |
|
"eval_loss": 0.24319829046726227, |
|
"eval_precision": 0.7758186397984886, |
|
"eval_recall": 0.8876080691642652, |
|
"eval_runtime": 0.9448, |
|
"eval_samples_per_second": 197.924, |
|
"eval_steps_per_second": 3.175, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 0.5034522414207458, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.0102, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9641125121241513, |
|
"eval_f1": 0.854024556616644, |
|
"eval_loss": 0.22865384817123413, |
|
"eval_precision": 0.810880829015544, |
|
"eval_recall": 0.9020172910662824, |
|
"eval_runtime": 0.9532, |
|
"eval_samples_per_second": 196.191, |
|
"eval_steps_per_second": 3.147, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 0.0857200101017952, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.0067, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9616876818622696, |
|
"eval_f1": 0.8442622950819673, |
|
"eval_loss": 0.24303482472896576, |
|
"eval_precision": 0.8025974025974026, |
|
"eval_recall": 0.8904899135446686, |
|
"eval_runtime": 0.9473, |
|
"eval_samples_per_second": 197.406, |
|
"eval_steps_per_second": 3.167, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.07605205476284027, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.0064, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9602327837051406, |
|
"eval_f1": 0.8396739130434782, |
|
"eval_loss": 0.2674606740474701, |
|
"eval_precision": 0.794344473007712, |
|
"eval_recall": 0.8904899135446686, |
|
"eval_runtime": 0.9552, |
|
"eval_samples_per_second": 195.768, |
|
"eval_steps_per_second": 3.141, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 1.8096215724945068, |
|
"learning_rate": 2e-05, |
|
"loss": 0.0046, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9619301648884578, |
|
"eval_f1": 0.8344549125168236, |
|
"eval_loss": 0.2742844820022583, |
|
"eval_precision": 0.7828282828282829, |
|
"eval_recall": 0.8933717579250721, |
|
"eval_runtime": 1.0274, |
|
"eval_samples_per_second": 182.02, |
|
"eval_steps_per_second": 2.92, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 0.9064086675643921, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.0034, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9619301648884578, |
|
"eval_f1": 0.8451086956521738, |
|
"eval_loss": 0.2666186988353729, |
|
"eval_precision": 0.7994858611825193, |
|
"eval_recall": 0.8962536023054755, |
|
"eval_runtime": 0.9543, |
|
"eval_samples_per_second": 195.964, |
|
"eval_steps_per_second": 3.144, |
|
"step": 1378 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 0.7048726081848145, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.0036, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9633850630455868, |
|
"eval_f1": 0.8453038674033149, |
|
"eval_loss": 0.2606286108493805, |
|
"eval_precision": 0.8116710875331565, |
|
"eval_recall": 0.8818443804034583, |
|
"eval_runtime": 0.9565, |
|
"eval_samples_per_second": 195.509, |
|
"eval_steps_per_second": 3.137, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.10259877145290375, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.0027, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.9626576139670223, |
|
"eval_f1": 0.8405405405405406, |
|
"eval_loss": 0.2861556112766266, |
|
"eval_precision": 0.7913486005089059, |
|
"eval_recall": 0.8962536023054755, |
|
"eval_runtime": 0.9627, |
|
"eval_samples_per_second": 194.249, |
|
"eval_steps_per_second": 3.116, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.023264136165380478, |
|
"learning_rate": 1e-05, |
|
"loss": 0.0016, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.9629000969932104, |
|
"eval_f1": 0.8426812585499316, |
|
"eval_loss": 0.2792609930038452, |
|
"eval_precision": 0.8020833333333334, |
|
"eval_recall": 0.8876080691642652, |
|
"eval_runtime": 0.9674, |
|
"eval_samples_per_second": 193.295, |
|
"eval_steps_per_second": 3.101, |
|
"step": 1696 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.410804271697998, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.0012, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.962172647914646, |
|
"eval_f1": 0.841248303934871, |
|
"eval_loss": 0.29505419731140137, |
|
"eval_precision": 0.7948717948717948, |
|
"eval_recall": 0.8933717579250721, |
|
"eval_runtime": 0.9612, |
|
"eval_samples_per_second": 194.551, |
|
"eval_steps_per_second": 3.121, |
|
"step": 1802 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 0.004150555469095707, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0012, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.9616876818622696, |
|
"eval_f1": 0.8380952380952381, |
|
"eval_loss": 0.29299911856651306, |
|
"eval_precision": 0.7938144329896907, |
|
"eval_recall": 0.8876080691642652, |
|
"eval_runtime": 0.9455, |
|
"eval_samples_per_second": 197.787, |
|
"eval_steps_per_second": 3.173, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 0.0056025926023721695, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.0014, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.9612027158098934, |
|
"eval_f1": 0.8353741496598639, |
|
"eval_loss": 0.29529693722724915, |
|
"eval_precision": 0.7912371134020618, |
|
"eval_recall": 0.8847262247838616, |
|
"eval_runtime": 0.9556, |
|
"eval_samples_per_second": 195.689, |
|
"eval_steps_per_second": 3.139, |
|
"step": 2014 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 0.012953029945492744, |
|
"learning_rate": 0.0, |
|
"loss": 0.0007, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9619301648884578, |
|
"eval_f1": 0.8387978142076504, |
|
"eval_loss": 0.29178306460380554, |
|
"eval_precision": 0.7974025974025974, |
|
"eval_recall": 0.8847262247838616, |
|
"eval_runtime": 0.9529, |
|
"eval_samples_per_second": 196.248, |
|
"eval_steps_per_second": 3.148, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2120, |
|
"total_flos": 918687991011936.0, |
|
"train_loss": 0.03976648653734405, |
|
"train_runtime": 515.0309, |
|
"train_samples_per_second": 65.549, |
|
"train_steps_per_second": 4.116 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 918687991011936.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|