{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 6.223746299743652, "learning_rate": 4.75e-05, "loss": 0.5424, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7418546365914787, "eval_f1": 0.6575164379109477, "eval_loss": 0.4762427806854248, "eval_precision": 0.6837301587301587, "eval_recall": 0.647344971813057, "eval_runtime": 1.7978, "eval_samples_per_second": 221.935, "eval_steps_per_second": 27.811, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.059021949768066, "learning_rate": 4.5e-05, "loss": 0.4345, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7894736842105263, "eval_f1": 0.7673663168415792, "eval_loss": 0.41568055748939514, "eval_precision": 0.7581367924528302, "eval_recall": 0.7985542825968357, "eval_runtime": 1.7989, "eval_samples_per_second": 221.802, "eval_steps_per_second": 27.795, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.354827404022217, "learning_rate": 4.25e-05, "loss": 0.3391, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8180088078011953, "eval_loss": 0.33879804611206055, "eval_precision": 0.8323930726843348, "eval_recall": 0.8071467539552646, "eval_runtime": 1.8074, "eval_samples_per_second": 220.763, "eval_steps_per_second": 27.665, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.747511863708496, "learning_rate": 4e-05, "loss": 0.2837, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8621553884711779, "eval_f1": 0.8333016825553572, "eval_loss": 0.32792460918426514, "eval_precision": 0.8341507249908615, "eval_recall": 0.8324695399163484, "eval_runtime": 1.804, "eval_samples_per_second": 221.178, "eval_steps_per_second": 27.717, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.42053157091140747, "learning_rate": 3.7500000000000003e-05, "loss": 0.2761, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8391129032258065, "eval_loss": 0.31322285532951355, "eval_precision": 0.8345705196182396, "eval_recall": 0.8442444080741953, "eval_runtime": 1.8022, "eval_samples_per_second": 221.397, "eval_steps_per_second": 27.744, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.2977254390716553, "learning_rate": 3.5e-05, "loss": 0.2459, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8543795620437956, "eval_loss": 0.3032587468624115, "eval_precision": 0.843984962406015, "eval_recall": 0.868839789052555, "eval_runtime": 1.8072, "eval_samples_per_second": 220.78, "eval_steps_per_second": 27.667, "step": 732 }, { "epoch": 7.0, "grad_norm": 2.8183882236480713, "learning_rate": 3.2500000000000004e-05, "loss": 0.2321, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8622085718274466, "eval_loss": 0.2870577275753021, "eval_precision": 0.8530168716042322, "eval_recall": 0.8741589379887251, "eval_runtime": 1.8055, "eval_samples_per_second": 220.996, "eval_steps_per_second": 27.694, "step": 854 }, { "epoch": 8.0, "grad_norm": 3.4162003993988037, "learning_rate": 3e-05, "loss": 0.2206, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8560793854229822, "eval_loss": 0.2634139955043793, "eval_precision": 0.8609538327526132, "eval_recall": 0.8516548463356974, "eval_runtime": 1.8055, "eval_samples_per_second": 220.985, "eval_steps_per_second": 27.692, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.009228229522705, "learning_rate": 2.7500000000000004e-05, "loss": 0.2067, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8703223612108386, "eval_loss": 0.2633897066116333, "eval_precision": 0.8694131129742446, "eval_recall": 0.8712493180578287, "eval_runtime": 1.8057, "eval_samples_per_second": 220.963, "eval_steps_per_second": 27.69, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.9459621906280518, "learning_rate": 2.5e-05, "loss": 0.192, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8631217838765008, "eval_loss": 0.2696186900138855, "eval_precision": 0.8872804935927859, "eval_recall": 0.8462447717766868, "eval_runtime": 1.8049, "eval_samples_per_second": 221.061, "eval_steps_per_second": 27.702, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.9607306122779846, "learning_rate": 2.25e-05, "loss": 0.1866, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8808495451466529, "eval_loss": 0.2752375304698944, "eval_precision": 0.8691495353421572, "eval_recall": 0.8972995090016367, "eval_runtime": 1.8092, "eval_samples_per_second": 220.542, "eval_steps_per_second": 27.637, "step": 1342 }, { "epoch": 12.0, "grad_norm": 4.809903621673584, "learning_rate": 2e-05, "loss": 0.1786, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8792560061999484, "eval_loss": 0.2651856243610382, "eval_precision": 0.8707622232472325, "eval_recall": 0.889798145117294, "eval_runtime": 1.8065, "eval_samples_per_second": 220.87, "eval_steps_per_second": 27.678, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.7058310508728027, "learning_rate": 1.75e-05, "loss": 0.1695, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8867007927797945, "eval_loss": 0.25362077355384827, "eval_precision": 0.89198606271777, "eval_recall": 0.8818876159301692, "eval_runtime": 1.82, "eval_samples_per_second": 219.234, "eval_steps_per_second": 27.473, "step": 1586 }, { "epoch": 14.0, "grad_norm": 7.428104877471924, "learning_rate": 1.5e-05, "loss": 0.1664, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8680720368560659, "eval_loss": 0.2736993730068207, "eval_precision": 0.8587217615098657, "eval_recall": 0.8802054919076197, "eval_runtime": 1.8089, "eval_samples_per_second": 220.576, "eval_steps_per_second": 27.641, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.6353508234024048, "learning_rate": 1.25e-05, "loss": 0.1521, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8829621606985718, "eval_loss": 0.26195329427719116, "eval_precision": 0.8802419354838709, "eval_recall": 0.8858428805237315, "eval_runtime": 1.8126, "eval_samples_per_second": 220.122, "eval_steps_per_second": 27.584, "step": 1830 }, { "epoch": 16.0, "grad_norm": 1.580483317375183, "learning_rate": 1e-05, "loss": 0.1494, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8760914310475572, "eval_loss": 0.30298247933387756, "eval_precision": 0.8629851740796268, "eval_recall": 0.8962538643389707, "eval_runtime": 1.8105, "eval_samples_per_second": 220.383, "eval_steps_per_second": 27.617, "step": 1952 }, { "epoch": 17.0, "grad_norm": 3.5637781620025635, "learning_rate": 7.5e-06, "loss": 0.1487, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8733660552828726, "eval_loss": 0.2702126204967499, "eval_precision": 0.8650109547970479, "eval_recall": 0.8837515911983997, "eval_runtime": 1.8055, "eval_samples_per_second": 220.99, "eval_steps_per_second": 27.693, "step": 2074 }, { "epoch": 18.0, "grad_norm": 3.6467041969299316, "learning_rate": 5e-06, "loss": 0.1494, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8765906680805938, "eval_loss": 0.2763027548789978, "eval_precision": 0.8675710594315245, "eval_recall": 0.888025095471904, "eval_runtime": 1.8075, "eval_samples_per_second": 220.753, "eval_steps_per_second": 27.663, "step": 2196 }, { "epoch": 19.0, "grad_norm": 4.27400541305542, "learning_rate": 2.5e-06, "loss": 0.1334, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8733660552828726, "eval_loss": 0.28261518478393555, "eval_precision": 0.8650109547970479, "eval_recall": 0.8837515911983997, "eval_runtime": 1.8039, "eval_samples_per_second": 221.182, "eval_steps_per_second": 27.717, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.5643185377120972, "learning_rate": 0.0, "loss": 0.1325, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8922305764411027, "eval_f1": 0.8721887408091659, "eval_loss": 0.27931535243988037, "eval_precision": 0.866466275659824, "eval_recall": 0.8787506819421713, "eval_runtime": 1.804, "eval_samples_per_second": 221.175, "eval_steps_per_second": 27.716, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.2269888150887411, "train_runtime": 635.9179, "train_samples_per_second": 114.417, "train_steps_per_second": 3.837 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }