{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20833333333333334, "grad_norm": 0.03092513605952263, "learning_rate": 1.0000000000000002e-06, "loss": 0.4044, "step": 10 }, { "epoch": 0.4166666666666667, "grad_norm": 0.03055603615939617, "learning_rate": 2.0000000000000003e-06, "loss": 0.3836, "step": 20 }, { "epoch": 0.625, "grad_norm": 0.023527173325419426, "learning_rate": 3e-06, "loss": 0.0017, "step": 30 }, { "epoch": 0.8333333333333334, "grad_norm": 0.02501656673848629, "learning_rate": 4.000000000000001e-06, "loss": 0.0008, "step": 40 }, { "epoch": 1.0, "eval_accuracy": 0.968421052631579, "eval_f1": 0.9770992366412214, "eval_loss": 0.23919111490249634, "eval_precision": 0.9696969696969697, "eval_recall": 0.9846153846153847, "eval_runtime": 2.2913, "eval_samples_per_second": 41.461, "eval_steps_per_second": 2.619, "step": 48 }, { "epoch": 1.0416666666666667, "grad_norm": 0.027175450697541237, "learning_rate": 5e-06, "loss": 0.0007, "step": 50 }, { "epoch": 1.25, "grad_norm": 0.017659997567534447, "learning_rate": 6e-06, "loss": 0.0006, "step": 60 }, { "epoch": 1.4583333333333333, "grad_norm": 0.01459481567144394, "learning_rate": 7.000000000000001e-06, "loss": 0.0007, "step": 70 }, { "epoch": 1.6666666666666665, "grad_norm": 0.011688518337905407, "learning_rate": 8.000000000000001e-06, "loss": 0.0028, "step": 80 }, { "epoch": 1.875, "grad_norm": 0.08688002079725266, "learning_rate": 9e-06, "loss": 0.0006, "step": 90 }, { "epoch": 2.0, "eval_accuracy": 0.9052631578947369, "eval_f1": 0.9312977099236641, "eval_loss": 0.7143413424491882, "eval_precision": 0.9242424242424242, "eval_recall": 0.9384615384615385, "eval_runtime": 2.2819, "eval_samples_per_second": 41.632, "eval_steps_per_second": 2.629, "step": 96 }, { "epoch": 2.0833333333333335, "grad_norm": 0.029978642240166664, "learning_rate": 1e-05, "loss": 0.1005, "step": 100 }, { "epoch": 2.2916666666666665, "grad_norm": 0.010506043210625648, "learning_rate": 1.1000000000000001e-05, "loss": 0.0003, "step": 110 }, { "epoch": 2.5, "grad_norm": 413.86444091796875, "learning_rate": 1.2e-05, "loss": 0.1257, "step": 120 }, { "epoch": 2.7083333333333335, "grad_norm": 0.022496210411190987, "learning_rate": 1.3000000000000001e-05, "loss": 0.0018, "step": 130 }, { "epoch": 2.9166666666666665, "grad_norm": 4.833926677703857, "learning_rate": 1.4000000000000001e-05, "loss": 0.0823, "step": 140 }, { "epoch": 3.0, "eval_accuracy": 0.9473684210526315, "eval_f1": 0.962406015037594, "eval_loss": 0.2848361134529114, "eval_precision": 0.9411764705882353, "eval_recall": 0.9846153846153847, "eval_runtime": 2.3284, "eval_samples_per_second": 40.8, "eval_steps_per_second": 2.577, "step": 144 }, { "epoch": 3.125, "grad_norm": 133.06631469726562, "learning_rate": 1.5e-05, "loss": 0.0298, "step": 150 }, { "epoch": 3.3333333333333335, "grad_norm": 5.606410503387451, "learning_rate": 1.6000000000000003e-05, "loss": 0.1977, "step": 160 }, { "epoch": 3.5416666666666665, "grad_norm": 1.3438758850097656, "learning_rate": 1.7000000000000003e-05, "loss": 0.0345, "step": 170 }, { "epoch": 3.75, "grad_norm": 219.62388610839844, "learning_rate": 1.8e-05, "loss": 0.2788, "step": 180 }, { "epoch": 3.9583333333333335, "grad_norm": 0.02319713868200779, "learning_rate": 1.9e-05, "loss": 0.0778, "step": 190 }, { "epoch": 4.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.9285714285714286, "eval_loss": 0.73874431848526, "eval_precision": 0.8666666666666667, "eval_recall": 1.0, "eval_runtime": 2.3292, "eval_samples_per_second": 40.786, "eval_steps_per_second": 2.576, "step": 192 }, { "epoch": 4.166666666666667, "grad_norm": 0.07177528738975525, "learning_rate": 2e-05, "loss": 0.2188, "step": 200 }, { "epoch": 4.375, "grad_norm": 0.025927864015102386, "learning_rate": 2.1e-05, "loss": 0.0018, "step": 210 }, { "epoch": 4.583333333333333, "grad_norm": 0.25771522521972656, "learning_rate": 2.2000000000000003e-05, "loss": 0.2547, "step": 220 }, { "epoch": 4.791666666666667, "grad_norm": 0.1835576444864273, "learning_rate": 2.3000000000000003e-05, "loss": 0.1684, "step": 230 }, { "epoch": 5.0, "grad_norm": 0.051910556852817535, "learning_rate": 2.4e-05, "loss": 0.1683, "step": 240 }, { "epoch": 5.0, "eval_accuracy": 0.9263157894736842, "eval_f1": 0.9465648854961831, "eval_loss": 0.482939213514328, "eval_precision": 0.9393939393939394, "eval_recall": 0.9538461538461539, "eval_runtime": 2.321, "eval_samples_per_second": 40.931, "eval_steps_per_second": 2.585, "step": 240 } ], "logging_steps": 10, "max_steps": 240, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "total_flos": 442667397580800.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }