{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 5.144454002380371, "learning_rate": 4.75e-05, "loss": 0.5655, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7243107769423559, "eval_f1": 0.6548442906574394, "eval_loss": 0.5178584456443787, "eval_precision": 0.6622605615324062, "eval_recall": 0.6499363520640116, "eval_runtime": 5.1252, "eval_samples_per_second": 77.851, "eval_steps_per_second": 9.756, "step": 122 }, { "epoch": 2.0, "grad_norm": 4.57472562789917, "learning_rate": 4.5e-05, "loss": 0.5048, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7518796992481203, "eval_f1": 0.7147221319467014, "eval_loss": 0.49262356758117676, "eval_precision": 0.707896051974013, "eval_recall": 0.7269503546099291, "eval_runtime": 5.1022, "eval_samples_per_second": 78.201, "eval_steps_per_second": 9.8, "step": 244 }, { "epoch": 3.0, "grad_norm": 3.7757699489593506, "learning_rate": 4.25e-05, "loss": 0.4529, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7994987468671679, "eval_f1": 0.7593340973520719, "eval_loss": 0.430127888917923, "eval_precision": 0.7580882352941176, "eval_recall": 0.7606382978723404, "eval_runtime": 5.1095, "eval_samples_per_second": 78.089, "eval_steps_per_second": 9.786, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.7683963775634766, "learning_rate": 4e-05, "loss": 0.393, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8220551378446115, "eval_f1": 0.781440706080221, "eval_loss": 0.3863331973552704, "eval_precision": 0.787071406102548, "eval_recall": 0.776595744680851, "eval_runtime": 5.0737, "eval_samples_per_second": 78.641, "eval_steps_per_second": 9.855, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.220449924468994, "learning_rate": 3.7500000000000003e-05, "loss": 0.3754, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8245614035087719, "eval_f1": 0.800328853302831, "eval_loss": 0.38681742548942566, "eval_precision": 0.7891990821678322, "eval_recall": 0.8208765230041826, "eval_runtime": 5.0481, "eval_samples_per_second": 79.039, "eval_steps_per_second": 9.905, "step": 610 }, { "epoch": 6.0, "grad_norm": 3.7665016651153564, "learning_rate": 3.5e-05, "loss": 0.3455, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8446115288220551, "eval_f1": 0.8125568285142754, "eval_loss": 0.36048319935798645, "eval_precision": 0.8125568285142754, "eval_recall": 0.8125568285142754, "eval_runtime": 5.1019, "eval_samples_per_second": 78.207, "eval_steps_per_second": 9.8, "step": 732 }, { "epoch": 7.0, "grad_norm": 1.0729649066925049, "learning_rate": 3.2500000000000004e-05, "loss": 0.3344, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8228567054500919, "eval_loss": 0.33959081768989563, "eval_precision": 0.8263351692555232, "eval_recall": 0.8196490270958356, "eval_runtime": 5.0803, "eval_samples_per_second": 78.538, "eval_steps_per_second": 9.842, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.983180522918701, "learning_rate": 3e-05, "loss": 0.3157, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8368501045387564, "eval_loss": 0.3319353461265564, "eval_precision": 0.8435514312676942, "eval_recall": 0.8310147299509002, "eval_runtime": 5.0858, "eval_samples_per_second": 78.454, "eval_steps_per_second": 9.831, "step": 976 }, { "epoch": 9.0, "grad_norm": 8.283629417419434, "learning_rate": 2.7500000000000004e-05, "loss": 0.3076, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8209821152299028, "eval_loss": 0.32728075981140137, "eval_precision": 0.8284245491932933, "eval_recall": 0.8146481178396072, "eval_runtime": 5.0499, "eval_samples_per_second": 79.011, "eval_steps_per_second": 9.901, "step": 1098 }, { "epoch": 10.0, "grad_norm": 5.000823020935059, "learning_rate": 2.5e-05, "loss": 0.2948, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8448388501742161, "eval_loss": 0.3238257169723511, "eval_precision": 0.8551721930610677, "eval_recall": 0.8363338788870704, "eval_runtime": 5.0547, "eval_samples_per_second": 78.937, "eval_steps_per_second": 9.892, "step": 1220 }, { "epoch": 11.0, "grad_norm": 3.291066884994507, "learning_rate": 2.25e-05, "loss": 0.2737, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8395012067578439, "eval_loss": 0.31985798478126526, "eval_precision": 0.8473584308763049, "eval_recall": 0.8327877795962902, "eval_runtime": 5.0949, "eval_samples_per_second": 78.314, "eval_steps_per_second": 9.814, "step": 1342 }, { "epoch": 12.0, "grad_norm": 9.703360557556152, "learning_rate": 2e-05, "loss": 0.2741, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.8315338681464504, "eval_loss": 0.31903842091560364, "eval_precision": 0.8299369747899159, "eval_recall": 0.8331969448990726, "eval_runtime": 5.0527, "eval_samples_per_second": 78.968, "eval_steps_per_second": 9.896, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.6231120824813843, "learning_rate": 1.75e-05, "loss": 0.275, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8458135188208294, "eval_loss": 0.3146449625492096, "eval_precision": 0.8627946127946129, "eval_recall": 0.8331060192762321, "eval_runtime": 5.1105, "eval_samples_per_second": 78.075, "eval_steps_per_second": 9.784, "step": 1586 }, { "epoch": 14.0, "grad_norm": 7.953589916229248, "learning_rate": 1.5e-05, "loss": 0.2736, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8403508771929824, "eval_loss": 0.3103943169116974, "eval_precision": 0.8460491741741742, "eval_recall": 0.8352882342244045, "eval_runtime": 5.0663, "eval_samples_per_second": 78.756, "eval_steps_per_second": 9.869, "step": 1708 }, { "epoch": 15.0, "grad_norm": 1.1334174871444702, "learning_rate": 1.25e-05, "loss": 0.263, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8671679197994987, "eval_f1": 0.8401647707947546, "eval_loss": 0.3111773729324341, "eval_precision": 0.8393298751432535, "eval_recall": 0.8410165484633569, "eval_runtime": 5.0642, "eval_samples_per_second": 78.788, "eval_steps_per_second": 9.873, "step": 1830 }, { "epoch": 16.0, "grad_norm": 6.466490745544434, "learning_rate": 1e-05, "loss": 0.2583, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8461962888779714, "eval_loss": 0.3085690140724182, "eval_precision": 0.8453465227094517, "eval_recall": 0.8470631023822512, "eval_runtime": 5.0762, "eval_samples_per_second": 78.602, "eval_steps_per_second": 9.85, "step": 1952 }, { "epoch": 17.0, "grad_norm": 7.930349826812744, "learning_rate": 7.5e-06, "loss": 0.2544, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8421640488656195, "eval_loss": 0.30654484033584595, "eval_precision": 0.8512313860252005, "eval_recall": 0.8345608292416803, "eval_runtime": 5.1322, "eval_samples_per_second": 77.745, "eval_steps_per_second": 9.742, "step": 2074 }, { "epoch": 18.0, "grad_norm": 6.3030524253845215, "learning_rate": 5e-06, "loss": 0.2594, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8411818738518064, "eval_loss": 0.30556628108024597, "eval_precision": 0.8448542607834644, "eval_recall": 0.8377886888525186, "eval_runtime": 5.0496, "eval_samples_per_second": 79.016, "eval_steps_per_second": 9.902, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.6230645179748535, "learning_rate": 2.5e-06, "loss": 0.256, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8721804511278195, "eval_f1": 0.8421640488656195, "eval_loss": 0.3042590618133545, "eval_precision": 0.8512313860252005, "eval_recall": 0.8345608292416803, "eval_runtime": 5.0911, "eval_samples_per_second": 78.372, "eval_steps_per_second": 9.821, "step": 2318 }, { "epoch": 20.0, "grad_norm": 8.38916301727295, "learning_rate": 0.0, "loss": 0.2515, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8464912280701754, "eval_loss": 0.3035424053668976, "eval_precision": 0.8522897897897899, "eval_recall": 0.8413347881432988, "eval_runtime": 5.0606, "eval_samples_per_second": 78.845, "eval_steps_per_second": 9.88, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7609911792720000.0, "train_loss": 0.32642708059217107, "train_runtime": 1954.5439, "train_samples_per_second": 37.226, "train_steps_per_second": 1.248 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7609911792720000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }