{ "best_metric": 0.8011033681765389, "best_model_checkpoint": "./output_4/checkpoint-270", "epoch": 16.451612903225808, "eval_steps": 30, "global_step": 510, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.97, "learning_rate": 9.758064516129034e-06, "loss": 0.4653, "step": 30 }, { "epoch": 0.97, "eval_f1": 0.47104247104247104, "eval_loss": 0.34549078345298767, "eval_precision": 0.44525547445255476, "eval_recall": 0.5, "eval_runtime": 0.8351, "eval_samples_per_second": 164.048, "eval_steps_per_second": 5.987, "step": 30 }, { "epoch": 1.94, "learning_rate": 9.516129032258065e-06, "loss": 0.3365, "step": 60 }, { "epoch": 1.94, "eval_f1": 0.47104247104247104, "eval_loss": 0.33970290422439575, "eval_precision": 0.44525547445255476, "eval_recall": 0.5, "eval_runtime": 0.8593, "eval_samples_per_second": 159.439, "eval_steps_per_second": 5.819, "step": 60 }, { "epoch": 2.9, "learning_rate": 9.274193548387097e-06, "loss": 0.3174, "step": 90 }, { "epoch": 2.9, "eval_f1": 0.47104247104247104, "eval_loss": 0.3178149461746216, "eval_precision": 0.44525547445255476, "eval_recall": 0.5, "eval_runtime": 0.8461, "eval_samples_per_second": 161.924, "eval_steps_per_second": 5.91, "step": 90 }, { "epoch": 3.87, "learning_rate": 9.03225806451613e-06, "loss": 0.3154, "step": 120 }, { "epoch": 3.87, "eval_f1": 0.787531017369727, "eval_loss": 0.27127787470817566, "eval_precision": 0.8358585858585859, "eval_recall": 0.7543715846994535, "eval_runtime": 0.8477, "eval_samples_per_second": 161.61, "eval_steps_per_second": 5.898, "step": 120 }, { "epoch": 4.84, "learning_rate": 8.790322580645163e-06, "loss": 0.2477, "step": 150 }, { "epoch": 4.84, "eval_f1": 0.7579116465863454, "eval_loss": 0.2217591255903244, "eval_precision": 0.8185039370078739, "eval_recall": 0.7210382513661202, "eval_runtime": 0.8342, "eval_samples_per_second": 164.232, "eval_steps_per_second": 5.994, "step": 150 }, { "epoch": 5.81, "learning_rate": 8.548387096774194e-06, "loss": 0.1818, "step": 180 }, { "epoch": 5.81, "eval_f1": 0.7754098360655737, "eval_loss": 0.2334146499633789, "eval_precision": 0.7754098360655737, "eval_recall": 0.7754098360655737, "eval_runtime": 0.833, "eval_samples_per_second": 164.466, "eval_steps_per_second": 6.002, "step": 180 }, { "epoch": 6.77, "learning_rate": 8.306451612903227e-06, "loss": 0.1681, "step": 210 }, { "epoch": 6.77, "eval_f1": 0.6914948900051966, "eval_loss": 0.28801918029785156, "eval_precision": 0.7737403100775193, "eval_recall": 0.6543715846994536, "eval_runtime": 0.8432, "eval_samples_per_second": 162.485, "eval_steps_per_second": 5.93, "step": 210 }, { "epoch": 7.74, "learning_rate": 8.064516129032258e-06, "loss": 0.1128, "step": 240 }, { "epoch": 7.74, "eval_f1": 0.726, "eval_loss": 0.2648162841796875, "eval_precision": 0.7981770833333333, "eval_recall": 0.6877049180327869, "eval_runtime": 0.8446, "eval_samples_per_second": 162.204, "eval_steps_per_second": 5.92, "step": 240 }, { "epoch": 8.71, "learning_rate": 7.822580645161291e-06, "loss": 0.0989, "step": 270 }, { "epoch": 8.71, "eval_f1": 0.8011033681765389, "eval_loss": 0.2688542306423187, "eval_precision": 0.8219602977667494, "eval_recall": 0.7836065573770492, "eval_runtime": 0.8386, "eval_samples_per_second": 163.369, "eval_steps_per_second": 5.962, "step": 270 }, { "epoch": 9.68, "learning_rate": 7.580645161290323e-06, "loss": 0.09, "step": 300 }, { "epoch": 9.68, "eval_f1": 0.7716666666666667, "eval_loss": 0.27322623133659363, "eval_precision": 0.8576388888888888, "eval_recall": 0.7251366120218579, "eval_runtime": 0.8354, "eval_samples_per_second": 163.993, "eval_steps_per_second": 5.985, "step": 300 }, { "epoch": 10.65, "learning_rate": 7.338709677419356e-06, "loss": 0.0772, "step": 330 }, { "epoch": 10.65, "eval_f1": 0.7740290898185634, "eval_loss": 0.30010420083999634, "eval_precision": 0.8053333333333332, "eval_recall": 0.7502732240437158, "eval_runtime": 0.8358, "eval_samples_per_second": 163.909, "eval_steps_per_second": 5.982, "step": 330 }, { "epoch": 11.61, "learning_rate": 7.096774193548388e-06, "loss": 0.0616, "step": 360 }, { "epoch": 11.61, "eval_f1": 0.726, "eval_loss": 0.3184939920902252, "eval_precision": 0.7981770833333333, "eval_recall": 0.6877049180327869, "eval_runtime": 0.8427, "eval_samples_per_second": 162.575, "eval_steps_per_second": 5.933, "step": 360 }, { "epoch": 12.58, "learning_rate": 6.854838709677419e-06, "loss": 0.0476, "step": 390 }, { "epoch": 12.58, "eval_f1": 0.7215447154471545, "eval_loss": 0.36287298798561096, "eval_precision": 0.7369727047146402, "eval_recall": 0.708743169398907, "eval_runtime": 0.8413, "eval_samples_per_second": 162.849, "eval_steps_per_second": 5.943, "step": 390 }, { "epoch": 13.55, "learning_rate": 6.612903225806452e-06, "loss": 0.0331, "step": 420 }, { "epoch": 13.55, "eval_f1": 0.7878958479943701, "eval_loss": 0.3389006555080414, "eval_precision": 0.7970383275261324, "eval_recall": 0.7795081967213114, "eval_runtime": 0.8333, "eval_samples_per_second": 164.406, "eval_steps_per_second": 6.0, "step": 420 }, { "epoch": 14.52, "learning_rate": 6.370967741935485e-06, "loss": 0.0473, "step": 450 }, { "epoch": 14.52, "eval_f1": 0.7389572146197818, "eval_loss": 0.3224264681339264, "eval_precision": 0.8401162790697674, "eval_recall": 0.6918032786885246, "eval_runtime": 0.8344, "eval_samples_per_second": 164.2, "eval_steps_per_second": 5.993, "step": 450 }, { "epoch": 15.48, "learning_rate": 6.129032258064517e-06, "loss": 0.0342, "step": 480 }, { "epoch": 15.48, "eval_f1": 0.787531017369727, "eval_loss": 0.3013747036457062, "eval_precision": 0.8358585858585859, "eval_recall": 0.7543715846994535, "eval_runtime": 0.8339, "eval_samples_per_second": 164.295, "eval_steps_per_second": 5.996, "step": 480 }, { "epoch": 16.45, "learning_rate": 5.887096774193549e-06, "loss": 0.0374, "step": 510 }, { "epoch": 16.45, "eval_f1": 0.726, "eval_loss": 0.34248507022857666, "eval_precision": 0.7981770833333333, "eval_recall": 0.6877049180327869, "eval_runtime": 0.8368, "eval_samples_per_second": 163.722, "eval_steps_per_second": 5.975, "step": 510 } ], "logging_steps": 30, "max_steps": 1240, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 30, "total_flos": 415559245279680.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }