{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.0522332191467285, "learning_rate": 4.75e-05, "loss": 0.5524, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.7167919799498746, "eval_f1": 0.5962731806907421, "eval_loss": 0.5142865777015686, "eval_precision": 0.6417444029850746, "eval_recall": 0.5921076559374432, "eval_runtime": 1.7987, "eval_samples_per_second": 221.826, "eval_steps_per_second": 27.798, "step": 122 }, { "epoch": 2.0, "grad_norm": 3.862292528152466, "learning_rate": 4.5e-05, "loss": 0.468, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.7919799498746867, "eval_f1": 0.7577413479052824, "eval_loss": 0.427168607711792, "eval_precision": 0.7507235274089207, "eval_recall": 0.7678214220767412, "eval_runtime": 1.8042, "eval_samples_per_second": 221.147, "eval_steps_per_second": 27.713, "step": 244 }, { "epoch": 3.0, "grad_norm": 2.0498220920562744, "learning_rate": 4.25e-05, "loss": 0.3759, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8345864661654135, "eval_f1": 0.7840625819994752, "eval_loss": 0.3480204641819, "eval_precision": 0.8174519753533889, "eval_recall": 0.7654573558828878, "eval_runtime": 1.8451, "eval_samples_per_second": 216.245, "eval_steps_per_second": 27.098, "step": 366 }, { "epoch": 4.0, "grad_norm": 2.6740951538085938, "learning_rate": 4e-05, "loss": 0.3116, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8646616541353384, "eval_f1": 0.8315033783783784, "eval_loss": 0.308014452457428, "eval_precision": 0.8438775510204082, "eval_recall": 0.8217403164211674, "eval_runtime": 1.8424, "eval_samples_per_second": 216.565, "eval_steps_per_second": 27.138, "step": 488 }, { "epoch": 5.0, "grad_norm": 3.3190958499908447, "learning_rate": 3.7500000000000003e-05, "loss": 0.2812, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.8368354828562441, "eval_loss": 0.3000350892543793, "eval_precision": 0.8520237470480189, "eval_recall": 0.8252864157119476, "eval_runtime": 1.8448, "eval_samples_per_second": 216.28, "eval_steps_per_second": 27.103, "step": 610 }, { "epoch": 6.0, "grad_norm": 6.869318962097168, "learning_rate": 3.5e-05, "loss": 0.2692, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8563451067988272, "eval_loss": 0.2969658374786377, "eval_precision": 0.8473119816985988, "eval_recall": 0.8681123840698308, "eval_runtime": 1.8412, "eval_samples_per_second": 216.704, "eval_steps_per_second": 27.156, "step": 732 }, { "epoch": 7.0, "grad_norm": 2.0313923358917236, "learning_rate": 3.2500000000000004e-05, "loss": 0.2603, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8771929824561403, "eval_f1": 0.8543546116197471, "eval_loss": 0.29286739230155945, "eval_precision": 0.8489149560117302, "eval_recall": 0.8606110201854882, "eval_runtime": 1.8427, "eval_samples_per_second": 216.531, "eval_steps_per_second": 27.134, "step": 854 }, { "epoch": 8.0, "grad_norm": 2.5121915340423584, "learning_rate": 3e-05, "loss": 0.231, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8596491228070176, "eval_f1": 0.819047619047619, "eval_loss": 0.3082924783229828, "eval_precision": 0.848619017499473, "eval_recall": 0.8006910347335879, "eval_runtime": 1.8502, "eval_samples_per_second": 215.65, "eval_steps_per_second": 27.024, "step": 976 }, { "epoch": 9.0, "grad_norm": 6.5755615234375, "learning_rate": 2.7500000000000004e-05, "loss": 0.2278, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.8696741854636592, "eval_f1": 0.842789598108747, "eval_loss": 0.29386115074157715, "eval_precision": 0.842789598108747, "eval_recall": 0.842789598108747, "eval_runtime": 1.8446, "eval_samples_per_second": 216.311, "eval_steps_per_second": 27.107, "step": 1098 }, { "epoch": 10.0, "grad_norm": 2.957425117492676, "learning_rate": 2.5e-05, "loss": 0.2117, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.87468671679198, "eval_f1": 0.8403693509153758, "eval_loss": 0.32400935888290405, "eval_precision": 0.8647333925035843, "eval_recall": 0.8238316057464994, "eval_runtime": 1.8502, "eval_samples_per_second": 215.651, "eval_steps_per_second": 27.024, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.6018930673599243, "learning_rate": 2.25e-05, "loss": 0.2014, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8533986527862829, "eval_loss": 0.2902269959449768, "eval_precision": 0.8572003218020917, "eval_recall": 0.8498817966903074, "eval_runtime": 1.8555, "eval_samples_per_second": 215.034, "eval_steps_per_second": 26.947, "step": 1342 }, { "epoch": 12.0, "grad_norm": 5.241063594818115, "learning_rate": 2e-05, "loss": 0.1869, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8748655913978494, "eval_loss": 0.27595847845077515, "eval_precision": 0.86983032873807, "eval_recall": 0.8805237315875614, "eval_runtime": 1.849, "eval_samples_per_second": 215.789, "eval_steps_per_second": 27.041, "step": 1464 }, { "epoch": 13.0, "grad_norm": 1.832000970840454, "learning_rate": 1.75e-05, "loss": 0.1685, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8560793854229822, "eval_loss": 0.3015528619289398, "eval_precision": 0.8609538327526132, "eval_recall": 0.8516548463356974, "eval_runtime": 1.8472, "eval_samples_per_second": 216.004, "eval_steps_per_second": 27.068, "step": 1586 }, { "epoch": 14.0, "grad_norm": 4.6754584312438965, "learning_rate": 1.5e-05, "loss": 0.1703, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8695225637671682, "eval_loss": 0.30271315574645996, "eval_precision": 0.8631532846715328, "eval_recall": 0.8769776322967813, "eval_runtime": 1.8487, "eval_samples_per_second": 215.823, "eval_steps_per_second": 27.045, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.3340052366256714, "learning_rate": 1.25e-05, "loss": 0.1617, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8695225637671682, "eval_loss": 0.30203503370285034, "eval_precision": 0.8631532846715328, "eval_recall": 0.8769776322967813, "eval_runtime": 1.8491, "eval_samples_per_second": 215.78, "eval_steps_per_second": 27.04, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.8184535503387451, "learning_rate": 1e-05, "loss": 0.1524, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.8822055137844611, "eval_f1": 0.8622085718274466, "eval_loss": 0.31774768233299255, "eval_precision": 0.8530168716042322, "eval_recall": 0.8741589379887251, "eval_runtime": 1.8434, "eval_samples_per_second": 216.442, "eval_steps_per_second": 27.123, "step": 1952 }, { "epoch": 17.0, "grad_norm": 1.1876635551452637, "learning_rate": 7.5e-06, "loss": 0.1356, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8682773109243698, "eval_loss": 0.3291165828704834, "eval_precision": 0.864855223259409, "eval_recall": 0.8719767230405528, "eval_runtime": 1.8462, "eval_samples_per_second": 216.117, "eval_steps_per_second": 27.082, "step": 2074 }, { "epoch": 18.0, "grad_norm": 4.472387313842773, "learning_rate": 5e-06, "loss": 0.1474, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8682773109243698, "eval_loss": 0.3268108665943146, "eval_precision": 0.864855223259409, "eval_recall": 0.8719767230405528, "eval_runtime": 1.8536, "eval_samples_per_second": 215.257, "eval_steps_per_second": 26.975, "step": 2196 }, { "epoch": 19.0, "grad_norm": 6.7281928062438965, "learning_rate": 2.5e-06, "loss": 0.145, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8656072933585827, "eval_loss": 0.3314986526966095, "eval_precision": 0.8614399005740664, "eval_recall": 0.8702036733951628, "eval_runtime": 1.8468, "eval_samples_per_second": 216.052, "eval_steps_per_second": 27.074, "step": 2318 }, { "epoch": 20.0, "grad_norm": 1.0261917114257812, "learning_rate": 0.0, "loss": 0.1466, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8649563392675828, "eval_loss": 0.32958927750587463, "eval_precision": 0.8623655913978494, "eval_recall": 0.8677032187670486, "eval_runtime": 1.8152, "eval_samples_per_second": 219.809, "eval_steps_per_second": 27.545, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 8444128359504000.0, "train_loss": 0.24024685015443895, "train_runtime": 634.5102, "train_samples_per_second": 114.671, "train_steps_per_second": 3.845 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 8444128359504000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }