{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.732887268066406, "learning_rate": 4.75e-05, "loss": 0.56, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.6992481203007519, "eval_f1": 0.5969696969696969, "eval_loss": 0.5053137540817261, "eval_precision": 0.6180992313067785, "eval_recall": 0.5921985815602837, "eval_runtime": 1.7058, "eval_samples_per_second": 233.909, "eval_steps_per_second": 29.312, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.830270767211914, "learning_rate": 4.5e-05, "loss": 0.5012, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7393483709273183, "eval_f1": 0.7129496402877697, "eval_loss": 0.49953827261924744, "eval_precision": 0.7071611253196931, "eval_recall": 0.7405891980360065, "eval_runtime": 1.7056, "eval_samples_per_second": 233.93, "eval_steps_per_second": 29.315, "step": 244 }, { "epoch": 3.0, "grad_norm": 4.50347375869751, "learning_rate": 4.25e-05, "loss": 0.4698, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.7794486215538847, "eval_f1": 0.7425806451612903, "eval_loss": 0.447434663772583, "eval_precision": 0.736313660704113, "eval_recall": 0.7514548099654483, "eval_runtime": 1.7066, "eval_samples_per_second": 233.795, "eval_steps_per_second": 29.298, "step": 366 }, { "epoch": 4.0, "grad_norm": 3.343970537185669, "learning_rate": 4e-05, "loss": 0.43, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8145363408521303, "eval_f1": 0.7728070175438597, "eval_loss": 0.4072956144809723, "eval_precision": 0.7774024024024024, "eval_recall": 0.7687761411165667, "eval_runtime": 1.7059, "eval_samples_per_second": 233.891, "eval_steps_per_second": 29.31, "step": 488 }, { "epoch": 5.0, "grad_norm": 4.232522964477539, "learning_rate": 3.7500000000000003e-05, "loss": 0.4124, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.7819548872180451, "eval_f1": 0.7560593390067533, "eval_loss": 0.431627094745636, "eval_precision": 0.7467105263157895, "eval_recall": 0.7807328605200945, "eval_runtime": 1.7055, "eval_samples_per_second": 233.956, "eval_steps_per_second": 29.318, "step": 610 }, { "epoch": 6.0, "grad_norm": 2.718388795852661, "learning_rate": 3.5e-05, "loss": 0.3812, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.7655350631880775, "eval_loss": 0.41591039299964905, "eval_precision": 0.7556396037471735, "eval_recall": 0.7878250591016549, "eval_runtime": 1.7074, "eval_samples_per_second": 233.691, "eval_steps_per_second": 29.285, "step": 732 }, { "epoch": 7.0, "grad_norm": 4.499834060668945, "learning_rate": 3.2500000000000004e-05, "loss": 0.3634, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8039756622954537, "eval_loss": 0.37424883246421814, "eval_precision": 0.8032299897460643, "eval_recall": 0.8047372249499909, "eval_runtime": 1.7061, "eval_samples_per_second": 233.862, "eval_steps_per_second": 29.306, "step": 854 }, { "epoch": 8.0, "grad_norm": 4.036494255065918, "learning_rate": 3e-05, "loss": 0.3458, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8049369344976196, "eval_loss": 0.37001675367355347, "eval_precision": 0.8027777777777778, "eval_recall": 0.8072376795781051, "eval_runtime": 1.7057, "eval_samples_per_second": 233.925, "eval_steps_per_second": 29.314, "step": 976 }, { "epoch": 9.0, "grad_norm": 5.362171649932861, "learning_rate": 2.7500000000000004e-05, "loss": 0.3346, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8170426065162907, "eval_f1": 0.7887587482322225, "eval_loss": 0.3915169835090637, "eval_precision": 0.779551554462089, "eval_recall": 0.8030551009274414, "eval_runtime": 1.7034, "eval_samples_per_second": 234.243, "eval_steps_per_second": 29.354, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.311065435409546, "learning_rate": 2.5e-05, "loss": 0.3173, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.8004637206764866, "eval_loss": 0.367042601108551, "eval_precision": 0.8004637206764866, "eval_recall": 0.8004637206764866, "eval_runtime": 1.7057, "eval_samples_per_second": 233.92, "eval_steps_per_second": 29.313, "step": 1220 }, { "epoch": 11.0, "grad_norm": 10.555017471313477, "learning_rate": 2.25e-05, "loss": 0.3133, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8141734808401475, "eval_loss": 0.35967516899108887, "eval_precision": 0.8166666666666667, "eval_recall": 0.8118294235315512, "eval_runtime": 1.7056, "eval_samples_per_second": 233.93, "eval_steps_per_second": 29.315, "step": 1342 }, { "epoch": 12.0, "grad_norm": 8.858575820922852, "learning_rate": 2e-05, "loss": 0.3174, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8067969337812972, "eval_loss": 0.36401695013046265, "eval_precision": 0.8021114369501466, "eval_recall": 0.8122385888343335, "eval_runtime": 1.7058, "eval_samples_per_second": 233.905, "eval_steps_per_second": 29.311, "step": 1464 }, { "epoch": 13.0, "grad_norm": 4.911335468292236, "learning_rate": 1.75e-05, "loss": 0.3056, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8546365914786967, "eval_f1": 0.8209821152299028, "eval_loss": 0.3489120304584503, "eval_precision": 0.8284245491932933, "eval_recall": 0.8146481178396072, "eval_runtime": 1.705, "eval_samples_per_second": 234.02, "eval_steps_per_second": 29.326, "step": 1586 }, { "epoch": 14.0, "grad_norm": 13.05285358428955, "learning_rate": 1.5e-05, "loss": 0.3018, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8521303258145363, "eval_f1": 0.8193336965948074, "eval_loss": 0.3513885736465454, "eval_precision": 0.8237151567944251, "eval_recall": 0.8153755228223314, "eval_runtime": 1.7068, "eval_samples_per_second": 233.777, "eval_steps_per_second": 29.295, "step": 1708 }, { "epoch": 15.0, "grad_norm": 2.7146189212799072, "learning_rate": 1.25e-05, "loss": 0.2979, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8395989974937343, "eval_f1": 0.8074672778816575, "eval_loss": 0.35171884298324585, "eval_precision": 0.8059873949579832, "eval_recall": 0.8090107292234952, "eval_runtime": 1.7044, "eval_samples_per_second": 234.104, "eval_steps_per_second": 29.336, "step": 1830 }, { "epoch": 16.0, "grad_norm": 2.740528106689453, "learning_rate": 1e-05, "loss": 0.2818, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8320802005012531, "eval_f1": 0.801779557335113, "eval_loss": 0.3599066436290741, "eval_precision": 0.7960927960927962, "eval_recall": 0.8086924895435534, "eval_runtime": 1.7041, "eval_samples_per_second": 234.142, "eval_steps_per_second": 29.341, "step": 1952 }, { "epoch": 17.0, "grad_norm": 5.430470943450928, "learning_rate": 7.5e-06, "loss": 0.2918, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8160386984618873, "eval_loss": 0.349627822637558, "eval_precision": 0.8152632848784607, "eval_recall": 0.8168303327877796, "eval_runtime": 1.7058, "eval_samples_per_second": 233.905, "eval_steps_per_second": 29.311, "step": 2074 }, { "epoch": 18.0, "grad_norm": 5.947146415710449, "learning_rate": 5e-06, "loss": 0.2921, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8370927318295739, "eval_f1": 0.8076965854743632, "eval_loss": 0.35576099157333374, "eval_precision": 0.8018925518925519, "eval_recall": 0.8147390434624477, "eval_runtime": 1.7044, "eval_samples_per_second": 234.095, "eval_steps_per_second": 29.335, "step": 2196 }, { "epoch": 19.0, "grad_norm": 3.248528480529785, "learning_rate": 2.5e-06, "loss": 0.2807, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.849624060150376, "eval_f1": 0.8195005730140539, "eval_loss": 0.34564533829689026, "eval_precision": 0.8179621848739496, "eval_recall": 0.8211038370612839, "eval_runtime": 1.7042, "eval_samples_per_second": 234.132, "eval_steps_per_second": 29.34, "step": 2318 }, { "epoch": 20.0, "grad_norm": 3.228414297103882, "learning_rate": 0.0, "loss": 0.2793, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8471177944862155, "eval_f1": 0.8169408154516122, "eval_loss": 0.34686279296875, "eval_precision": 0.814695340501792, "eval_recall": 0.8193307874158937, "eval_runtime": 1.7037, "eval_samples_per_second": 234.195, "eval_steps_per_second": 29.348, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7604291693904000.0, "train_loss": 0.35385844902914076, "train_runtime": 612.688, "train_samples_per_second": 118.755, "train_steps_per_second": 3.982 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7604291693904000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }