{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 480, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 2.2837767601013184, "learning_rate": 4e-05, "loss": 0.2529, "step": 96 }, { "epoch": 1.0, "eval_LOCATION_f1": 0.9180327868852459, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.9438202247191011, "eval_LOCATION_recall": 0.8936170212765957, "eval_ORGANIZATION_f1": 0.887608069164265, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8555555555555555, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.966789667896679, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9776119402985075, "eval_PERSON_recall": 0.9562043795620438, "eval_loss": 0.04780818894505501, "eval_overall_accuracy": 0.9850828729281768, "eval_overall_f1": 0.9213483146067416, "eval_overall_precision": 0.9156327543424317, "eval_overall_recall": 0.9271356783919598, "eval_runtime": 0.5324, "eval_samples_per_second": 319.327, "eval_steps_per_second": 5.635, "step": 96 }, { "epoch": 2.0, "grad_norm": 1.9025866985321045, "learning_rate": 3e-05, "loss": 0.0617, "step": 192 }, { "epoch": 2.0, "eval_LOCATION_f1": 0.8969072164948454, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.87, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.9005847953216375, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.88, "eval_ORGANIZATION_recall": 0.9221556886227545, "eval_PERSON_f1": 0.9675090252707581, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9571428571428572, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05445471405982971, "eval_overall_accuracy": 0.9814917127071823, "eval_overall_f1": 0.9225092250922509, "eval_overall_precision": 0.9036144578313253, "eval_overall_recall": 0.9422110552763819, "eval_runtime": 0.5469, "eval_samples_per_second": 310.833, "eval_steps_per_second": 5.485, "step": 192 }, { "epoch": 3.0, "grad_norm": 1.029209852218628, "learning_rate": 2e-05, "loss": 0.0309, "step": 288 }, { "epoch": 3.0, "eval_LOCATION_f1": 0.883248730964467, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8446601941747572, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.8650306748466258, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8867924528301887, "eval_ORGANIZATION_recall": 0.844311377245509, "eval_PERSON_f1": 0.9708029197080292, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9708029197080292, "eval_PERSON_recall": 0.9708029197080292, "eval_loss": 0.05393547564744949, "eval_overall_accuracy": 0.9828729281767956, "eval_overall_f1": 0.9058971141781681, "eval_overall_precision": 0.9047619047619048, "eval_overall_recall": 0.907035175879397, "eval_runtime": 0.5535, "eval_samples_per_second": 307.121, "eval_steps_per_second": 5.42, "step": 288 }, { "epoch": 4.0, "grad_norm": 0.655804455280304, "learning_rate": 1e-05, "loss": 0.0178, "step": 384 }, { "epoch": 4.0, "eval_LOCATION_f1": 0.90625, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8877551020408163, "eval_LOCATION_recall": 0.925531914893617, "eval_ORGANIZATION_f1": 0.9020771513353116, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.8941176470588236, "eval_ORGANIZATION_recall": 0.9101796407185628, "eval_PERSON_f1": 0.9816849816849818, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9852941176470589, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05562783032655716, "eval_overall_accuracy": 0.9845303867403314, "eval_overall_f1": 0.9301745635910225, "eval_overall_precision": 0.9232673267326733, "eval_overall_recall": 0.9371859296482412, "eval_runtime": 0.5799, "eval_samples_per_second": 293.171, "eval_steps_per_second": 5.174, "step": 384 }, { "epoch": 5.0, "grad_norm": 3.4944188594818115, "learning_rate": 0.0, "loss": 0.0103, "step": 480 }, { "epoch": 5.0, "eval_LOCATION_f1": 0.8934010152284263, "eval_LOCATION_number": 94, "eval_LOCATION_precision": 0.8543689320388349, "eval_LOCATION_recall": 0.9361702127659575, "eval_ORGANIZATION_f1": 0.8895705521472391, "eval_ORGANIZATION_number": 167, "eval_ORGANIZATION_precision": 0.9119496855345912, "eval_ORGANIZATION_recall": 0.8682634730538922, "eval_PERSON_f1": 0.9852941176470589, "eval_PERSON_number": 137, "eval_PERSON_precision": 0.9925925925925926, "eval_PERSON_recall": 0.9781021897810219, "eval_loss": 0.05397149175405502, "eval_overall_accuracy": 0.9850828729281768, "eval_overall_f1": 0.9232704402515725, "eval_overall_precision": 0.924433249370277, "eval_overall_recall": 0.9221105527638191, "eval_runtime": 0.5969, "eval_samples_per_second": 284.821, "eval_steps_per_second": 5.026, "step": 480 }, { "epoch": 5.0, "step": 480, "total_flos": 192740587739700.0, "train_loss": 0.07472380660474301, "train_runtime": 113.3975, "train_samples_per_second": 67.462, "train_steps_per_second": 4.233 } ], "logging_steps": 500, "max_steps": 480, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 192740587739700.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }