{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 10560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 1.1915496587753296, "learning_rate": 4.75e-05, "loss": 0.7886, "step": 528 }, { "epoch": 1.0, "eval_accuracy": 0.859733677829673, "eval_f1": 0.13302540415704386, "eval_loss": 0.4607163071632385, "eval_precision": 0.32432432432432434, "eval_recall": 0.08367228355607205, "eval_runtime": 4.523, "eval_samples_per_second": 206.723, "eval_steps_per_second": 3.316, "step": 528 }, { "epoch": 2.0, "grad_norm": 0.8614588975906372, "learning_rate": 4.5e-05, "loss": 0.3911, "step": 1056 }, { "epoch": 2.0, "eval_accuracy": 0.9292954387359634, "eval_f1": 0.6470908102229471, "eval_loss": 0.254240483045578, "eval_precision": 0.6080735820132857, "eval_recall": 0.6914584543869843, "eval_runtime": 4.6489, "eval_samples_per_second": 201.121, "eval_steps_per_second": 3.227, "step": 1056 }, { "epoch": 3.0, "grad_norm": 1.2399094104766846, "learning_rate": 4.25e-05, "loss": 0.2384, "step": 1584 }, { "epoch": 3.0, "eval_accuracy": 0.9376428500447183, "eval_f1": 0.7163083377031987, "eval_loss": 0.19337689876556396, "eval_precision": 0.652651696129957, "eval_recall": 0.7937245787332946, "eval_runtime": 4.6125, "eval_samples_per_second": 202.712, "eval_steps_per_second": 3.252, "step": 1584 }, { "epoch": 4.0, "grad_norm": 1.5993666648864746, "learning_rate": 4e-05, "loss": 0.1934, "step": 2112 }, { "epoch": 4.0, "eval_accuracy": 0.9445990261353473, "eval_f1": 0.7476784292915892, "eval_loss": 0.1678435504436493, "eval_precision": 0.68798828125, "eval_recall": 0.8187100522951772, "eval_runtime": 4.5469, "eval_samples_per_second": 205.634, "eval_steps_per_second": 3.299, "step": 2112 }, { "epoch": 5.0, "grad_norm": 1.2465081214904785, "learning_rate": 3.7500000000000003e-05, "loss": 0.172, "step": 2640 }, { "epoch": 5.0, "eval_accuracy": 0.9467852529066879, "eval_f1": 0.7566290364925177, "eval_loss": 0.1589244157075882, "eval_precision": 0.6901340996168582, "eval_recall": 0.8373038930854154, "eval_runtime": 4.5536, "eval_samples_per_second": 205.333, "eval_steps_per_second": 3.294, "step": 2640 }, { "epoch": 6.0, "grad_norm": 1.3054755926132202, "learning_rate": 3.5e-05, "loss": 0.1602, "step": 3168 }, { "epoch": 6.0, "eval_accuracy": 0.9487727317897248, "eval_f1": 0.7631235309480282, "eval_loss": 0.15331855416297913, "eval_precision": 0.6930740037950665, "eval_recall": 0.8489250435793143, "eval_runtime": 4.6183, "eval_samples_per_second": 202.456, "eval_steps_per_second": 3.248, "step": 3168 }, { "epoch": 7.0, "grad_norm": 3.6129133701324463, "learning_rate": 3.2500000000000004e-05, "loss": 0.1532, "step": 3696 }, { "epoch": 7.0, "eval_accuracy": 0.9497664712312431, "eval_f1": 0.7661898569570872, "eval_loss": 0.15049894154071808, "eval_precision": 0.693502824858757, "eval_recall": 0.8558977338756537, "eval_runtime": 4.5946, "eval_samples_per_second": 203.499, "eval_steps_per_second": 3.265, "step": 3696 }, { "epoch": 8.0, "grad_norm": 0.5060432553291321, "learning_rate": 3e-05, "loss": 0.1457, "step": 4224 }, { "epoch": 8.0, "eval_accuracy": 0.9521514458908874, "eval_f1": 0.7754024808656638, "eval_loss": 0.14558807015419006, "eval_precision": 0.710348162475822, "eval_recall": 0.8535735037768739, "eval_runtime": 4.5124, "eval_samples_per_second": 207.206, "eval_steps_per_second": 3.324, "step": 4224 }, { "epoch": 9.0, "grad_norm": 1.1229195594787598, "learning_rate": 2.7500000000000004e-05, "loss": 0.1401, "step": 4752 }, { "epoch": 9.0, "eval_accuracy": 0.9543376726622279, "eval_f1": 0.7870345566568443, "eval_loss": 0.1418333500623703, "eval_precision": 0.7301192842942346, "eval_recall": 0.8535735037768739, "eval_runtime": 4.662, "eval_samples_per_second": 200.56, "eval_steps_per_second": 3.218, "step": 4752 }, { "epoch": 10.0, "grad_norm": 1.4011176824569702, "learning_rate": 2.5e-05, "loss": 0.1375, "step": 5280 }, { "epoch": 10.0, "eval_accuracy": 0.9550829772433668, "eval_f1": 0.7894174238375201, "eval_loss": 0.13877230882644653, "eval_precision": 0.7308263236021771, "eval_recall": 0.8582219639744335, "eval_runtime": 4.5718, "eval_samples_per_second": 204.514, "eval_steps_per_second": 3.281, "step": 5280 }, { "epoch": 11.0, "grad_norm": 1.9114675521850586, "learning_rate": 2.25e-05, "loss": 0.1331, "step": 5808 }, { "epoch": 11.0, "eval_accuracy": 0.9554804730199742, "eval_f1": 0.7894174238375201, "eval_loss": 0.1359723061323166, "eval_precision": 0.7308263236021771, "eval_recall": 0.8582219639744335, "eval_runtime": 4.5381, "eval_samples_per_second": 206.032, "eval_steps_per_second": 3.305, "step": 5808 }, { "epoch": 12.0, "grad_norm": 1.272605061531067, "learning_rate": 2e-05, "loss": 0.1304, "step": 6336 }, { "epoch": 12.0, "eval_accuracy": 0.954933916327139, "eval_f1": 0.784512683578104, "eval_loss": 0.13654367625713348, "eval_precision": 0.7257905138339921, "eval_recall": 0.8535735037768739, "eval_runtime": 4.5791, "eval_samples_per_second": 204.187, "eval_steps_per_second": 3.276, "step": 6336 }, { "epoch": 13.0, "grad_norm": 1.4920827150344849, "learning_rate": 1.75e-05, "loss": 0.1285, "step": 6864 }, { "epoch": 13.0, "eval_accuracy": 0.9558779687965815, "eval_f1": 0.7906098219104155, "eval_loss": 0.13434641063213348, "eval_precision": 0.7380352644836272, "eval_recall": 0.8512492736780941, "eval_runtime": 4.6498, "eval_samples_per_second": 201.085, "eval_steps_per_second": 3.226, "step": 6864 }, { "epoch": 14.0, "grad_norm": 1.0049793720245361, "learning_rate": 1.5e-05, "loss": 0.1255, "step": 7392 }, { "epoch": 14.0, "eval_accuracy": 0.9558779687965815, "eval_f1": 0.7958087049973133, "eval_loss": 0.13445836305618286, "eval_precision": 0.7401299350324838, "eval_recall": 0.8605461940732132, "eval_runtime": 4.5469, "eval_samples_per_second": 205.636, "eval_steps_per_second": 3.299, "step": 7392 }, { "epoch": 15.0, "grad_norm": 1.0454351902008057, "learning_rate": 1.25e-05, "loss": 0.1249, "step": 7920 }, { "epoch": 15.0, "eval_accuracy": 0.954933916327139, "eval_f1": 0.7917669072440523, "eval_loss": 0.13459959626197815, "eval_precision": 0.7331683168316832, "eval_recall": 0.8605461940732132, "eval_runtime": 4.5061, "eval_samples_per_second": 207.495, "eval_steps_per_second": 3.329, "step": 7920 }, { "epoch": 16.0, "grad_norm": 1.3529750108718872, "learning_rate": 1e-05, "loss": 0.1238, "step": 8448 }, { "epoch": 16.0, "eval_accuracy": 0.9551326642154427, "eval_f1": 0.7883328873427883, "eval_loss": 0.13417887687683105, "eval_precision": 0.7306547619047619, "eval_recall": 0.8558977338756537, "eval_runtime": 4.547, "eval_samples_per_second": 205.632, "eval_steps_per_second": 3.299, "step": 8448 }, { "epoch": 17.0, "grad_norm": 0.9557709693908691, "learning_rate": 7.5e-06, "loss": 0.1232, "step": 8976 }, { "epoch": 17.0, "eval_accuracy": 0.9557289078803538, "eval_f1": 0.7904736419587904, "eval_loss": 0.13424266874790192, "eval_precision": 0.7326388888888888, "eval_recall": 0.8582219639744335, "eval_runtime": 4.6257, "eval_samples_per_second": 202.13, "eval_steps_per_second": 3.243, "step": 8976 }, { "epoch": 18.0, "grad_norm": 1.4014308452606201, "learning_rate": 5e-06, "loss": 0.1215, "step": 9504 }, { "epoch": 18.0, "eval_accuracy": 0.954933916327139, "eval_f1": 0.7909212283044059, "eval_loss": 0.13506156206130981, "eval_precision": 0.7317193675889329, "eval_recall": 0.8605461940732132, "eval_runtime": 4.539, "eval_samples_per_second": 205.994, "eval_steps_per_second": 3.305, "step": 9504 }, { "epoch": 19.0, "grad_norm": 0.8569299578666687, "learning_rate": 2.5e-06, "loss": 0.1209, "step": 10032 }, { "epoch": 19.0, "eval_accuracy": 0.9547351684388353, "eval_f1": 0.786648865153538, "eval_loss": 0.1337263584136963, "eval_precision": 0.7277667984189723, "eval_recall": 0.8558977338756537, "eval_runtime": 4.6671, "eval_samples_per_second": 200.337, "eval_steps_per_second": 3.214, "step": 10032 }, { "epoch": 20.0, "grad_norm": 1.4876703023910522, "learning_rate": 0.0, "loss": 0.1207, "step": 10560 }, { "epoch": 20.0, "eval_accuracy": 0.95553015999205, "eval_f1": 0.7861921327267862, "eval_loss": 0.133191779255867, "eval_precision": 0.7286706349206349, "eval_recall": 0.8535735037768739, "eval_runtime": 4.6265, "eval_samples_per_second": 202.096, "eval_steps_per_second": 3.242, "step": 10560 }, { "epoch": 20.0, "step": 10560, "total_flos": 4541164131293502.0, "train_loss": 0.18862926418131049, "train_runtime": 1256.3234, "train_samples_per_second": 134.313, "train_steps_per_second": 8.405 } ], "logging_steps": 500, "max_steps": 10560, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 4541164131293502.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }