{ "best_metric": 0.02586853690445423, "best_model_checkpoint": "autotrain-kno3k-fiasf/checkpoint-678", "epoch": 3.0, "eval_steps": 500, "global_step": 678, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "grad_norm": 10.68883228302002, "learning_rate": 3.0882352941176475e-05, "loss": 0.8263, "step": 45 }, { "epoch": 0.4, "grad_norm": 58.380653381347656, "learning_rate": 4.852459016393443e-05, "loss": 0.2512, "step": 90 }, { "epoch": 0.6, "grad_norm": 0.1297217607498169, "learning_rate": 4.491803278688525e-05, "loss": 0.1081, "step": 135 }, { "epoch": 0.8, "grad_norm": 0.23067115247249603, "learning_rate": 4.122950819672131e-05, "loss": 0.1776, "step": 180 }, { "epoch": 1.0, "grad_norm": 0.0700414851307869, "learning_rate": 3.754098360655738e-05, "loss": 0.0775, "step": 225 }, { "epoch": 1.0, "eval_accuracy": 0.9844530816213215, "eval_f1_macro": 0.9790876623736272, "eval_f1_micro": 0.9844530816213215, "eval_f1_weighted": 0.9845629146174618, "eval_loss": 0.05487915500998497, "eval_precision_macro": 0.9728622841711356, "eval_precision_micro": 0.9844530816213215, "eval_precision_weighted": 0.9849390553498074, "eval_recall_macro": 0.9856721822982112, "eval_recall_micro": 0.9844530816213215, "eval_recall_weighted": 0.9844530816213215, "eval_runtime": 10.4593, "eval_samples_per_second": 172.191, "eval_steps_per_second": 10.804, "step": 226 }, { "epoch": 1.19, "grad_norm": 0.03147374466061592, "learning_rate": 3.39344262295082e-05, "loss": 0.1134, "step": 270 }, { "epoch": 1.39, "grad_norm": 0.15661084651947021, "learning_rate": 3.0245901639344264e-05, "loss": 0.069, "step": 315 }, { "epoch": 1.59, "grad_norm": 0.04975114390254021, "learning_rate": 2.6557377049180327e-05, "loss": 0.0821, "step": 360 }, { "epoch": 1.79, "grad_norm": 0.02913733199238777, "learning_rate": 2.2868852459016393e-05, "loss": 0.0636, "step": 405 }, { "epoch": 1.99, "grad_norm": 0.03793076425790787, "learning_rate": 1.918032786885246e-05, "loss": 0.135, "step": 450 }, { "epoch": 2.0, "eval_accuracy": 0.9922265408106608, "eval_f1_macro": 0.9885732001605064, "eval_f1_micro": 0.9922265408106608, "eval_f1_weighted": 0.9922495320181636, "eval_loss": 0.0289547611027956, "eval_precision_macro": 0.9857016924464328, "eval_precision_micro": 0.9922265408106608, "eval_precision_weighted": 0.9923116897185307, "eval_recall_macro": 0.991509527641691, "eval_recall_micro": 0.9922265408106608, "eval_recall_weighted": 0.9922265408106608, "eval_runtime": 10.5141, "eval_samples_per_second": 171.294, "eval_steps_per_second": 10.747, "step": 452 }, { "epoch": 2.19, "grad_norm": 23.56169891357422, "learning_rate": 1.5491803278688525e-05, "loss": 0.0508, "step": 495 }, { "epoch": 2.39, "grad_norm": 0.0823952853679657, "learning_rate": 1.1803278688524591e-05, "loss": 0.0443, "step": 540 }, { "epoch": 2.59, "grad_norm": 0.025019163265824318, "learning_rate": 8.114754098360657e-06, "loss": 0.1144, "step": 585 }, { "epoch": 2.79, "grad_norm": 0.830748975276947, "learning_rate": 4.426229508196722e-06, "loss": 0.0624, "step": 630 }, { "epoch": 2.99, "grad_norm": 1.670173168182373, "learning_rate": 7.377049180327869e-07, "loss": 0.131, "step": 675 }, { "epoch": 3.0, "eval_accuracy": 0.9938922820655192, "eval_f1_macro": 0.990657441189161, "eval_f1_micro": 0.9938922820655192, "eval_f1_weighted": 0.9939035669908675, "eval_loss": 0.02586853690445423, "eval_precision_macro": 0.9889770605232528, "eval_precision_micro": 0.9938922820655192, "eval_precision_weighted": 0.9939283176272281, "eval_recall_macro": 0.9923605914714783, "eval_recall_micro": 0.9938922820655192, "eval_recall_weighted": 0.9938922820655192, "eval_runtime": 10.4887, "eval_samples_per_second": 171.709, "eval_steps_per_second": 10.774, "step": 678 } ], "logging_steps": 45, "max_steps": 678, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 1.3599465039988531e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }