|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 2440, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 4.234379768371582, |
|
"learning_rate": 4.75e-05, |
|
"loss": 0.5459, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7393483709273183, |
|
"eval_f1": 0.6458660476548099, |
|
"eval_loss": 0.47629594802856445, |
|
"eval_precision": 0.6804065499717673, |
|
"eval_recall": 0.63557010365521, |
|
"eval_runtime": 4.7443, |
|
"eval_samples_per_second": 84.101, |
|
"eval_steps_per_second": 10.539, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 4.299708843231201, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.4528, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7844611528822055, |
|
"eval_f1": 0.7677594888732471, |
|
"eval_loss": 0.43055105209350586, |
|
"eval_precision": 0.7630357142857143, |
|
"eval_recall": 0.8125113657028551, |
|
"eval_runtime": 5.0592, |
|
"eval_samples_per_second": 78.866, |
|
"eval_steps_per_second": 9.883, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 3.729501962661743, |
|
"learning_rate": 4.25e-05, |
|
"loss": 0.3653, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.8621553884711779, |
|
"eval_f1": 0.8217317487266553, |
|
"eval_loss": 0.3334697186946869, |
|
"eval_precision": 0.853331681065005, |
|
"eval_recall": 0.802464084378978, |
|
"eval_runtime": 5.0519, |
|
"eval_samples_per_second": 78.98, |
|
"eval_steps_per_second": 9.897, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.0170559883117676, |
|
"learning_rate": 4e-05, |
|
"loss": 0.2987, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8246499363520641, |
|
"eval_loss": 0.335675448179245, |
|
"eval_precision": 0.8246499363520641, |
|
"eval_recall": 0.8246499363520641, |
|
"eval_runtime": 5.0598, |
|
"eval_samples_per_second": 78.858, |
|
"eval_steps_per_second": 9.882, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 0.7316147089004517, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.2746, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8546365914786967, |
|
"eval_f1": 0.8338931955211025, |
|
"eval_loss": 0.3400849401950836, |
|
"eval_precision": 0.8216641926439057, |
|
"eval_recall": 0.8546553918894344, |
|
"eval_runtime": 5.0556, |
|
"eval_samples_per_second": 78.923, |
|
"eval_steps_per_second": 9.89, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.9782907962799072, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.2477, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.849624060150376, |
|
"eval_f1": 0.8308270676691729, |
|
"eval_loss": 0.33233991265296936, |
|
"eval_precision": 0.8176375838926174, |
|
"eval_recall": 0.858610656482997, |
|
"eval_runtime": 5.0483, |
|
"eval_samples_per_second": 79.037, |
|
"eval_steps_per_second": 9.904, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 1.8530523777008057, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 0.24, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8646616541353384, |
|
"eval_f1": 0.8447157518450185, |
|
"eval_loss": 0.3171246647834778, |
|
"eval_precision": 0.8325401217487549, |
|
"eval_recall": 0.864248045099109, |
|
"eval_runtime": 5.2354, |
|
"eval_samples_per_second": 76.211, |
|
"eval_steps_per_second": 9.55, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 3.7897450923919678, |
|
"learning_rate": 3e-05, |
|
"loss": 0.2069, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8683279483657071, |
|
"eval_loss": 0.27698931097984314, |
|
"eval_precision": 0.873366724738676, |
|
"eval_recall": 0.863747954173486, |
|
"eval_runtime": 5.0519, |
|
"eval_samples_per_second": 78.98, |
|
"eval_steps_per_second": 9.897, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 5.382436275482178, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 0.2197, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.8671679197994987, |
|
"eval_f1": 0.8491192020377148, |
|
"eval_loss": 0.3091033399105072, |
|
"eval_precision": 0.8356209150326797, |
|
"eval_recall": 0.8735224586288416, |
|
"eval_runtime": 5.0573, |
|
"eval_samples_per_second": 78.896, |
|
"eval_steps_per_second": 9.887, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 3.9839751720428467, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.2005, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8811928811928812, |
|
"eval_loss": 0.2552439570426941, |
|
"eval_precision": 0.8842105263157894, |
|
"eval_recall": 0.878341516639389, |
|
"eval_runtime": 5.0478, |
|
"eval_samples_per_second": 79.044, |
|
"eval_steps_per_second": 9.905, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 0.8527041673660278, |
|
"learning_rate": 2.25e-05, |
|
"loss": 0.1867, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9047619047619048, |
|
"eval_f1": 0.8867831541218638, |
|
"eval_loss": 0.2726534903049469, |
|
"eval_precision": 0.88158359844468, |
|
"eval_recall": 0.8926168394253501, |
|
"eval_runtime": 5.0594, |
|
"eval_samples_per_second": 78.863, |
|
"eval_steps_per_second": 9.883, |
|
"step": 1342 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 2.9348244667053223, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1722, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.8922305764411027, |
|
"eval_f1": 0.8727838950061173, |
|
"eval_loss": 0.27391675114631653, |
|
"eval_precision": 0.8656898656898657, |
|
"eval_recall": 0.8812511365702855, |
|
"eval_runtime": 5.0516, |
|
"eval_samples_per_second": 78.984, |
|
"eval_steps_per_second": 9.898, |
|
"step": 1464 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 1.8993942737579346, |
|
"learning_rate": 1.75e-05, |
|
"loss": 0.161, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8765393898137261, |
|
"eval_loss": 0.2714451551437378, |
|
"eval_precision": 0.8852261942423283, |
|
"eval_recall": 0.8690671031096563, |
|
"eval_runtime": 5.0831, |
|
"eval_samples_per_second": 78.495, |
|
"eval_steps_per_second": 9.836, |
|
"step": 1586 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 5.6604695320129395, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.1684, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8781334505389722, |
|
"eval_loss": 0.27736207842826843, |
|
"eval_precision": 0.872316715542522, |
|
"eval_recall": 0.8847972358610656, |
|
"eval_runtime": 5.0631, |
|
"eval_samples_per_second": 78.805, |
|
"eval_steps_per_second": 9.875, |
|
"step": 1708 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"grad_norm": 0.4771318733692169, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.1548, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.8802521008403361, |
|
"eval_loss": 0.27421677112579346, |
|
"eval_precision": 0.8767168083714847, |
|
"eval_recall": 0.8840698308783415, |
|
"eval_runtime": 5.0573, |
|
"eval_samples_per_second": 78.896, |
|
"eval_steps_per_second": 9.887, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"grad_norm": 0.8061890006065369, |
|
"learning_rate": 1e-05, |
|
"loss": 0.1526, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.8872180451127819, |
|
"eval_f1": 0.8703282417939551, |
|
"eval_loss": 0.29698580503463745, |
|
"eval_precision": 0.8574462768615692, |
|
"eval_recall": 0.8902073104200764, |
|
"eval_runtime": 5.0603, |
|
"eval_samples_per_second": 78.848, |
|
"eval_steps_per_second": 9.881, |
|
"step": 1952 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"grad_norm": 2.3830015659332275, |
|
"learning_rate": 7.5e-06, |
|
"loss": 0.1467, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.8897243107769424, |
|
"eval_f1": 0.8707140332272888, |
|
"eval_loss": 0.2729242146015167, |
|
"eval_precision": 0.8618432385874246, |
|
"eval_recall": 0.8819785415530097, |
|
"eval_runtime": 5.0444, |
|
"eval_samples_per_second": 79.097, |
|
"eval_steps_per_second": 9.912, |
|
"step": 2074 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"grad_norm": 4.170547962188721, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1484, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.8972431077694235, |
|
"eval_f1": 0.8781334505389722, |
|
"eval_loss": 0.27389636635780334, |
|
"eval_precision": 0.872316715542522, |
|
"eval_recall": 0.8847972358610656, |
|
"eval_runtime": 5.0888, |
|
"eval_samples_per_second": 78.407, |
|
"eval_steps_per_second": 9.825, |
|
"step": 2196 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"grad_norm": 3.1922006607055664, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.1434, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.899749373433584, |
|
"eval_f1": 0.879667048676036, |
|
"eval_loss": 0.27286645770072937, |
|
"eval_precision": 0.8778361344537815, |
|
"eval_recall": 0.8815693762502272, |
|
"eval_runtime": 5.0573, |
|
"eval_samples_per_second": 78.895, |
|
"eval_steps_per_second": 9.887, |
|
"step": 2318 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 1.1650974750518799, |
|
"learning_rate": 0.0, |
|
"loss": 0.1354, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.9022556390977443, |
|
"eval_f1": 0.8829621606985718, |
|
"eval_loss": 0.2765507400035858, |
|
"eval_precision": 0.8802419354838709, |
|
"eval_recall": 0.8858428805237315, |
|
"eval_runtime": 5.0471, |
|
"eval_samples_per_second": 79.055, |
|
"eval_steps_per_second": 9.907, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"step": 2440, |
|
"total_flos": 8444128359504000.0, |
|
"train_loss": 0.23107414167435442, |
|
"train_runtime": 1893.6576, |
|
"train_samples_per_second": 38.423, |
|
"train_steps_per_second": 1.289 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 2440, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 8444128359504000.0, |
|
"train_batch_size": 30, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|