{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 2440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 78.255126953125, "learning_rate": 4.75e-05, "loss": 0.3942, "step": 122 }, { "epoch": 1.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8419489007724301, "eval_loss": 0.3128369450569153, "eval_precision": 0.8857758620689655, "eval_recall": 0.8173758865248226, "eval_runtime": 1.6299, "eval_samples_per_second": 244.801, "eval_steps_per_second": 30.677, "step": 122 }, { "epoch": 2.0, "grad_norm": 42.82415771484375, "learning_rate": 4.5e-05, "loss": 0.2168, "step": 244 }, { "epoch": 2.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8676337535436396, "eval_loss": 0.3043781518936157, "eval_precision": 0.8658613445378152, "eval_recall": 0.8694762684124386, "eval_runtime": 1.6375, "eval_samples_per_second": 243.661, "eval_steps_per_second": 30.534, "step": 244 }, { "epoch": 3.0, "grad_norm": 0.2970781624317169, "learning_rate": 4.25e-05, "loss": 0.1372, "step": 366 }, { "epoch": 3.0, "eval_accuracy": 0.8897243107769424, "eval_f1": 0.8595250288055307, "eval_loss": 0.5317866802215576, "eval_precision": 0.885164197446576, "eval_recall": 0.8419712675031824, "eval_runtime": 1.6412, "eval_samples_per_second": 243.114, "eval_steps_per_second": 30.465, "step": 366 }, { "epoch": 4.0, "grad_norm": 0.16418644785881042, "learning_rate": 4e-05, "loss": 0.0957, "step": 488 }, { "epoch": 4.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8765906680805938, "eval_loss": 0.47654101252555847, "eval_precision": 0.8675710594315245, "eval_recall": 0.888025095471904, "eval_runtime": 1.6551, "eval_samples_per_second": 241.073, "eval_steps_per_second": 30.21, "step": 488 }, { "epoch": 5.0, "grad_norm": 0.4955180287361145, "learning_rate": 3.7500000000000003e-05, "loss": 0.0674, "step": 610 }, { "epoch": 5.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8728804559453431, "eval_loss": 0.552257239818573, "eval_precision": 0.8576773985140519, "eval_recall": 0.9027095835606473, "eval_runtime": 1.6807, "eval_samples_per_second": 237.402, "eval_steps_per_second": 29.75, "step": 610 }, { "epoch": 6.0, "grad_norm": 0.03946012258529663, "learning_rate": 3.5e-05, "loss": 0.0535, "step": 732 }, { "epoch": 6.0, "eval_accuracy": 0.9072681704260651, "eval_f1": 0.8878574955372402, "eval_loss": 0.5158531069755554, "eval_precision": 0.8888448885098087, "eval_recall": 0.8868885251863976, "eval_runtime": 1.6465, "eval_samples_per_second": 242.338, "eval_steps_per_second": 30.368, "step": 732 }, { "epoch": 7.0, "grad_norm": 0.008608223870396614, "learning_rate": 3.2500000000000004e-05, "loss": 0.027, "step": 854 }, { "epoch": 7.0, "eval_accuracy": 0.8872180451127819, "eval_f1": 0.8642908431276217, "eval_loss": 0.5940884351730347, "eval_precision": 0.8633964654080464, "eval_recall": 0.8652027641389344, "eval_runtime": 1.6485, "eval_samples_per_second": 242.043, "eval_steps_per_second": 30.331, "step": 854 }, { "epoch": 8.0, "grad_norm": 0.010127891786396503, "learning_rate": 3e-05, "loss": 0.0223, "step": 976 }, { "epoch": 8.0, "eval_accuracy": 0.8796992481203008, "eval_f1": 0.8548827059465357, "eval_loss": 0.7166243195533752, "eval_precision": 0.8548827059465357, "eval_recall": 0.8548827059465357, "eval_runtime": 1.6562, "eval_samples_per_second": 240.913, "eval_steps_per_second": 30.19, "step": 976 }, { "epoch": 9.0, "grad_norm": 0.005933025386184454, "learning_rate": 2.7500000000000004e-05, "loss": 0.0145, "step": 1098 }, { "epoch": 9.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8829621606985718, "eval_loss": 0.7022837996482849, "eval_precision": 0.8802419354838709, "eval_recall": 0.8858428805237315, "eval_runtime": 1.6595, "eval_samples_per_second": 240.429, "eval_steps_per_second": 30.129, "step": 1098 }, { "epoch": 10.0, "grad_norm": 0.02505210041999817, "learning_rate": 2.5e-05, "loss": 0.0106, "step": 1220 }, { "epoch": 10.0, "eval_accuracy": 0.9047619047619048, "eval_f1": 0.8839406001224739, "eval_loss": 0.699307918548584, "eval_precision": 0.8880654743486602, "eval_recall": 0.880114566284779, "eval_runtime": 1.6551, "eval_samples_per_second": 241.07, "eval_steps_per_second": 30.209, "step": 1220 }, { "epoch": 11.0, "grad_norm": 0.002501419745385647, "learning_rate": 2.25e-05, "loss": 0.0093, "step": 1342 }, { "epoch": 11.0, "eval_accuracy": 0.8947368421052632, "eval_f1": 0.8703663593044124, "eval_loss": 0.8273664712905884, "eval_precision": 0.8789149003479912, "eval_recall": 0.8630205491907619, "eval_runtime": 1.6583, "eval_samples_per_second": 240.615, "eval_steps_per_second": 30.152, "step": 1342 }, { "epoch": 12.0, "grad_norm": 0.012166227214038372, "learning_rate": 2e-05, "loss": 0.0086, "step": 1464 }, { "epoch": 12.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8744522298370696, "eval_loss": 0.7971612215042114, "eval_precision": 0.8795731707317074, "eval_recall": 0.8697945080923805, "eval_runtime": 1.6712, "eval_samples_per_second": 238.744, "eval_steps_per_second": 29.918, "step": 1464 }, { "epoch": 13.0, "grad_norm": 0.00197013420984149, "learning_rate": 1.75e-05, "loss": 0.0106, "step": 1586 }, { "epoch": 13.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8787009231453675, "eval_loss": 0.7591652870178223, "eval_precision": 0.8714896214896215, "eval_recall": 0.8872976904891798, "eval_runtime": 1.6672, "eval_samples_per_second": 239.329, "eval_steps_per_second": 29.991, "step": 1586 }, { "epoch": 14.0, "grad_norm": 0.0050615849904716015, "learning_rate": 1.5e-05, "loss": 0.0072, "step": 1708 }, { "epoch": 14.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8813841488792438, "eval_loss": 0.7834069728851318, "eval_precision": 0.8748029197080291, "eval_recall": 0.8890707401345699, "eval_runtime": 1.6555, "eval_samples_per_second": 241.019, "eval_steps_per_second": 30.203, "step": 1708 }, { "epoch": 15.0, "grad_norm": 0.002086851978674531, "learning_rate": 1.25e-05, "loss": 0.0098, "step": 1830 }, { "epoch": 15.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8802521008403361, "eval_loss": 0.8048883676528931, "eval_precision": 0.8767168083714847, "eval_recall": 0.8840698308783415, "eval_runtime": 1.6591, "eval_samples_per_second": 240.488, "eval_steps_per_second": 30.136, "step": 1830 }, { "epoch": 16.0, "grad_norm": 0.0012473827227950096, "learning_rate": 1e-05, "loss": 0.0058, "step": 1952 }, { "epoch": 16.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8802521008403361, "eval_loss": 0.7670984268188477, "eval_precision": 0.8767168083714847, "eval_recall": 0.8840698308783415, "eval_runtime": 1.659, "eval_samples_per_second": 240.503, "eval_steps_per_second": 30.138, "step": 1952 }, { "epoch": 17.0, "grad_norm": 0.00188881263602525, "learning_rate": 7.5e-06, "loss": 0.0035, "step": 2074 }, { "epoch": 17.0, "eval_accuracy": 0.9022556390977443, "eval_f1": 0.8856624319419237, "eval_loss": 0.8084732294082642, "eval_precision": 0.8758364312267658, "eval_recall": 0.8983451536643026, "eval_runtime": 1.6569, "eval_samples_per_second": 240.816, "eval_steps_per_second": 30.177, "step": 2074 }, { "epoch": 18.0, "grad_norm": 0.0014366944087669253, "learning_rate": 5e-06, "loss": 0.0052, "step": 2196 }, { "epoch": 18.0, "eval_accuracy": 0.899749373433584, "eval_f1": 0.8808243727598566, "eval_loss": 0.7721081972122192, "eval_precision": 0.875706963591375, "eval_recall": 0.8865702855064557, "eval_runtime": 1.6546, "eval_samples_per_second": 241.143, "eval_steps_per_second": 30.218, "step": 2196 }, { "epoch": 19.0, "grad_norm": 0.0011094665387645364, "learning_rate": 2.5e-06, "loss": 0.0028, "step": 2318 }, { "epoch": 19.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8792560061999484, "eval_loss": 0.8358559608459473, "eval_precision": 0.8707622232472325, "eval_recall": 0.889798145117294, "eval_runtime": 1.6584, "eval_samples_per_second": 240.592, "eval_steps_per_second": 30.149, "step": 2318 }, { "epoch": 20.0, "grad_norm": 0.0015741140814498067, "learning_rate": 0.0, "loss": 0.0033, "step": 2440 }, { "epoch": 20.0, "eval_accuracy": 0.8972431077694235, "eval_f1": 0.8792560061999484, "eval_loss": 0.8335620164871216, "eval_precision": 0.8707622232472325, "eval_recall": 0.889798145117294, "eval_runtime": 1.6776, "eval_samples_per_second": 237.834, "eval_steps_per_second": 29.804, "step": 2440 }, { "epoch": 20.0, "step": 2440, "total_flos": 7584162436176000.0, "train_loss": 0.05526667458356404, "train_runtime": 862.9394, "train_samples_per_second": 84.316, "train_steps_per_second": 2.828 } ], "logging_steps": 500, "max_steps": 2440, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 7584162436176000.0, "train_batch_size": 30, "trial_name": null, "trial_params": null }