{ "best_metric": 0.8090804377039739, "best_model_checkpoint": "./models/entities/test_v6/checkpoint-7000", "epoch": 19.997926744989634, "eval_steps": 7000, "global_step": 7220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.3842432619212164, "grad_norm": 7.1875, "learning_rate": 0.0002, "loss": 8.369, "step": 500 }, { "epoch": 2.7684865238424328, "grad_norm": 4.84375, "learning_rate": 0.00018511904761904765, "loss": 1.059, "step": 1000 }, { "epoch": 4.1548030407740155, "grad_norm": 5.28125, "learning_rate": 0.00017023809523809523, "loss": 0.6034, "step": 1500 }, { "epoch": 5.5390463026952315, "grad_norm": 3.90625, "learning_rate": 0.00015535714285714287, "loss": 0.3745, "step": 2000 }, { "epoch": 6.923289564616448, "grad_norm": 3.515625, "learning_rate": 0.00014047619047619049, "loss": 0.2708, "step": 2500 }, { "epoch": 8.309606081548031, "grad_norm": 3.203125, "learning_rate": 0.0001255952380952381, "loss": 0.1967, "step": 3000 }, { "epoch": 9.693849343469246, "grad_norm": 2.3125, "learning_rate": 0.00011071428571428572, "loss": 0.1566, "step": 3500 }, { "epoch": 11.080165860400829, "grad_norm": 3.03125, "learning_rate": 9.583333333333334e-05, "loss": 0.1318, "step": 4000 }, { "epoch": 12.464409122322046, "grad_norm": 4.84375, "learning_rate": 8.095238095238096e-05, "loss": 0.1131, "step": 4500 }, { "epoch": 13.848652384243263, "grad_norm": 2.484375, "learning_rate": 6.607142857142857e-05, "loss": 0.1041, "step": 5000 }, { "epoch": 15.234968901174845, "grad_norm": 3.078125, "learning_rate": 5.119047619047619e-05, "loss": 0.0986, "step": 5500 }, { "epoch": 16.619212163096062, "grad_norm": 2.53125, "learning_rate": 3.630952380952381e-05, "loss": 0.0951, "step": 6000 }, { "epoch": 18.005528680027645, "grad_norm": 3.53125, "learning_rate": 2.1428571428571428e-05, "loss": 0.0921, "step": 6500 }, { "epoch": 19.389771941948858, "grad_norm": 1.671875, "learning_rate": 6.547619047619048e-06, "loss": 0.0925, "step": 7000 }, { "epoch": 19.389771941948858, "eval_accuracy": 0.9699217442249749, "eval_f1": 0.8090804377039739, "eval_loss": 0.15359356999397278, "eval_precision": 0.7678083439606486, "eval_recall": 0.8550415905863258, "eval_runtime": 169.5033, "eval_samples_per_second": 29.262, "eval_steps_per_second": 29.262, "step": 7000 }, { "epoch": 19.997926744989634, "step": 7220, "total_flos": 6132708568035504.0, "train_loss": 0.8169754918592458, "train_runtime": 915.3849, "train_samples_per_second": 505.754, "train_steps_per_second": 7.887 } ], "logging_steps": 500, "max_steps": 7220, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 7000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6132708568035504.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }