{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.232791900634766, "learning_rate": 4.75e-05, "loss": 0.5568, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6144317942230656, "eval_loss": 0.48217353224754333, "eval_precision": 0.65566534914361, "eval_recall": 0.6074286233860702, "eval_runtime": 1.8124, "eval_samples_per_second": 220.155, "eval_steps_per_second": 27.588, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.6621267795562744, "learning_rate": 4.5e-05, "loss": 0.4661, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7543859649122807, "eval_f1": 0.7304054054054054, "eval_loss": 0.44529902935028076, "eval_precision": 0.7240563585317666, "eval_recall": 0.7612293144208038, "eval_runtime": 1.7908, "eval_samples_per_second": 222.81, "eval_steps_per_second": 27.921, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.223342180252075, "learning_rate": 4.25e-05, "loss": 0.3875, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8238834717707957, "eval_loss": 0.3446956276893616, "eval_precision": 0.8487520627062706, "eval_recall": 0.8074649936352064, "eval_runtime": 1.7891, "eval_samples_per_second": 223.02, "eval_steps_per_second": 27.947, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.7967684268951416, "learning_rate": 4e-05, "loss": 0.318, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8267427049559981, "eval_loss": 0.34423500299453735, "eval_precision": 0.8158466596088483, "eval_recall": 0.8436079287143117, "eval_runtime": 1.786, "eval_samples_per_second": 223.404, "eval_steps_per_second": 27.996, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.4756224453449249, "learning_rate": 3.7500000000000003e-05, "loss": 0.2855, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.826007326007326, "eval_loss": 0.3348763883113861, "eval_precision": 0.8157828282828283, "eval_recall": 0.8411074740861975, "eval_runtime": 1.7834, "eval_samples_per_second": 223.733, "eval_steps_per_second": 28.037, "step": 610 }, { "epoch": 6.0, "grad_norm": 1.1933702230453491, "learning_rate": 3.5e-05, "loss": 0.2638, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8177454831659652, "eval_loss": 0.35479673743247986, "eval_precision": 0.805161943319838, "eval_recall": 0.8472449536279323, "eval_runtime": 1.7837, "eval_samples_per_second": 223.688, "eval_steps_per_second": 28.031, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.3131154775619507, "learning_rate": 3.2500000000000004e-05, "loss": 0.2397, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8434065934065934, "eval_loss": 0.3253527581691742, "eval_precision": 0.8325757575757575, "eval_recall": 0.8592471358428806, "eval_runtime": 1.7938, "eval_samples_per_second": 222.438, "eval_steps_per_second": 27.874, "step": 854 }, { "epoch": 8.0, "grad_norm": 5.750446319580078, "learning_rate": 3e-05, "loss": 0.2428, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8654532336864889, "eval_loss": 0.2798740565776825, "eval_precision": 0.8804269882659713, "eval_recall": 0.8537461356610292, "eval_runtime": 1.7849, "eval_samples_per_second": 223.546, "eval_steps_per_second": 28.013, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.672217845916748, "learning_rate": 2.7500000000000004e-05, "loss": 0.2229, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8484099018899409, "eval_loss": 0.29030779004096985, "eval_precision": 0.8430645161290322, "eval_recall": 0.8545644662665939, "eval_runtime": 1.7837, "eval_samples_per_second": 223.689, "eval_steps_per_second": 28.031, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.1360011100769043, "learning_rate": 2.5e-05, "loss": 0.2144, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8769602202215754, "eval_loss": 0.2583388686180115, "eval_precision": 0.8742831541218639, "eval_recall": 0.8797963266048372, "eval_runtime": 1.7944, "eval_samples_per_second": 222.362, "eval_steps_per_second": 27.865, "step": 1220 }, { "epoch": 11.0, "grad_norm": 4.677872657775879, "learning_rate": 2.25e-05, "loss": 0.1967, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8622085718274466, "eval_loss": 0.27431806921958923, "eval_precision": 0.8530168716042322, "eval_recall": 0.8741589379887251, "eval_runtime": 1.7823, "eval_samples_per_second": 223.874, "eval_steps_per_second": 28.054, "step": 1342 }, { "epoch": 12.0, "grad_norm": 2.564518451690674, "learning_rate": 2e-05, "loss": 0.1855, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8563451067988272, "eval_loss": 0.29132312536239624, "eval_precision": 0.8473119816985988, "eval_recall": 0.8681123840698308, "eval_runtime": 1.7831, "eval_samples_per_second": 223.773, "eval_steps_per_second": 28.042, "step": 1464 }, { "epoch": 13.0, "grad_norm": 2.314499855041504, "learning_rate": 1.75e-05, "loss": 0.1761, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8786430103333984, "eval_loss": 0.26596176624298096, "eval_precision": 0.8913001481099878, "eval_recall": 0.8683396981269322, "eval_runtime": 1.7906, "eval_samples_per_second": 222.829, "eval_steps_per_second": 27.923, "step": 1586 }, { "epoch": 14.0, "grad_norm": 7.584296226501465, "learning_rate": 1.5e-05, "loss": 0.1733, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8622085718274466, "eval_loss": 0.28683483600616455, "eval_precision": 0.8530168716042322, "eval_recall": 0.8741589379887251, "eval_runtime": 1.7857, "eval_samples_per_second": 223.438, "eval_steps_per_second": 28.0, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.435178518295288, "learning_rate": 1.25e-05, "loss": 0.1582, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8847117794486216, "eval_f1": 0.8648373983739837, "eval_loss": 0.28010857105255127, "eval_precision": 0.8561154177433248, "eval_recall": 0.8759319876341153, "eval_runtime": 1.788, "eval_samples_per_second": 223.159, "eval_steps_per_second": 27.965, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.7755215167999268, "learning_rate": 1e-05, "loss": 0.1537, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8550061050061051, "eval_loss": 0.30731528997421265, "eval_precision": 0.8437710437710437, "eval_recall": 0.8713402436806692, "eval_runtime": 1.7883, "eval_samples_per_second": 223.118, "eval_steps_per_second": 27.96, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.1951849460601807, "learning_rate": 7.5e-06, "loss": 0.1537, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8781334505389722, "eval_loss": 0.2702249586582184, "eval_precision": 0.872316715542522, "eval_recall": 0.8847972358610656, "eval_runtime": 1.7845, "eval_samples_per_second": 223.597, "eval_steps_per_second": 28.02, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.639573097229004, "learning_rate": 5e-06, "loss": 0.1461, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8760282890453928, "eval_loss": 0.29228049516677856, "eval_precision": 0.8682260305697083, "eval_recall": 0.8855246408437898, "eval_runtime": 1.8077, "eval_samples_per_second": 220.726, "eval_steps_per_second": 27.66, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.177137613296509, "learning_rate": 2.5e-06, "loss": 0.1449, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8754533563232059, "eval_loss": 0.27906104922294617, "eval_precision": 0.8689781021897811, "eval_recall": 0.8830241862156756, "eval_runtime": 1.7931, "eval_samples_per_second": 222.52, "eval_steps_per_second": 27.885, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.6333515048027039, "learning_rate": 0.0, "loss": 0.1502, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8781334505389722, "eval_loss": 0.2797949016094208, "eval_precision": 0.872316715542522, "eval_recall": 0.8847972358610656, "eval_runtime": 1.7912, "eval_samples_per_second": 222.75, "eval_steps_per_second": 27.914, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.24180538537072355, "train_runtime": 620.8783, "train_samples_per_second": 117.189, "train_steps_per_second": 3.93 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }