{ "best_metric": 0.9092560719066743, "best_model_checkpoint": "./output_1/checkpoint-1408", "epoch": 29.53846153846154, "eval_steps": 64, "global_step": 1920, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.98, "learning_rate": 9.753846153846154e-06, "loss": 0.5876, "step": 64 }, { "epoch": 0.98, "eval_f1": 0.8264750469953308, "eval_loss": 0.426924467086792, "eval_precision": 0.8384115884115884, "eval_recall": 0.8222047843249156, "eval_runtime": 1.7863, "eval_samples_per_second": 163.47, "eval_steps_per_second": 5.598, "step": 64 }, { "epoch": 1.97, "learning_rate": 9.50769230769231e-06, "loss": 0.331, "step": 128 }, { "epoch": 1.97, "eval_f1": 0.8797480620155038, "eval_loss": 0.3329169452190399, "eval_precision": 0.8921407595440745, "eval_recall": 0.8746849289009369, "eval_runtime": 1.7913, "eval_samples_per_second": 163.008, "eval_steps_per_second": 5.582, "step": 128 }, { "epoch": 2.95, "learning_rate": 9.261538461538461e-06, "loss": 0.2585, "step": 192 }, { "epoch": 2.95, "eval_f1": 0.8914823472996463, "eval_loss": 0.31825584173202515, "eval_precision": 0.8960271317829458, "eval_recall": 0.8887382888666951, "eval_runtime": 1.7885, "eval_samples_per_second": 163.262, "eval_steps_per_second": 5.591, "step": 192 }, { "epoch": 3.94, "learning_rate": 9.015384615384616e-06, "loss": 0.2016, "step": 256 }, { "epoch": 3.94, "eval_f1": 0.8988811005098933, "eval_loss": 0.3036338686943054, "eval_precision": 0.9010176651305684, "eval_recall": 0.8972987111808627, "eval_runtime": 1.7803, "eval_samples_per_second": 164.015, "eval_steps_per_second": 5.617, "step": 256 }, { "epoch": 4.92, "learning_rate": 8.76923076923077e-06, "loss": 0.1733, "step": 320 }, { "epoch": 4.92, "eval_f1": 0.8919073833036791, "eval_loss": 0.31887006759643555, "eval_precision": 0.8940092165898618, "eval_recall": 0.890355257526038, "eval_runtime": 1.7908, "eval_samples_per_second": 163.059, "eval_steps_per_second": 5.584, "step": 320 }, { "epoch": 5.91, "learning_rate": 8.523076923076923e-06, "loss": 0.1627, "step": 384 }, { "epoch": 5.91, "eval_f1": 0.8975599724181316, "eval_loss": 0.3417821526527405, "eval_precision": 0.9093253968253968, "eval_recall": 0.8924478052028344, "eval_runtime": 1.7807, "eval_samples_per_second": 163.98, "eval_steps_per_second": 5.616, "step": 384 }, { "epoch": 6.89, "learning_rate": 8.276923076923078e-06, "loss": 0.1268, "step": 448 }, { "epoch": 6.89, "eval_f1": 0.8907594463150018, "eval_loss": 0.33714038133621216, "eval_precision": 0.9003055391287207, "eval_recall": 0.8863128358776811, "eval_runtime": 1.7763, "eval_samples_per_second": 164.386, "eval_steps_per_second": 5.63, "step": 448 }, { "epoch": 7.88, "learning_rate": 8.03076923076923e-06, "loss": 0.1059, "step": 512 }, { "epoch": 7.88, "eval_f1": 0.8910119800132443, "eval_loss": 0.36000651121139526, "eval_precision": 0.8987068965517242, "eval_recall": 0.8871213202073525, "eval_runtime": 1.809, "eval_samples_per_second": 161.417, "eval_steps_per_second": 5.528, "step": 512 }, { "epoch": 8.86, "learning_rate": 7.784615384615385e-06, "loss": 0.1036, "step": 576 }, { "epoch": 8.86, "eval_f1": 0.9018817204301075, "eval_loss": 0.3717869520187378, "eval_precision": 0.9072715791518919, "eval_recall": 0.8987492271840967, "eval_runtime": 1.7837, "eval_samples_per_second": 163.704, "eval_steps_per_second": 5.606, "step": 576 }, { "epoch": 9.85, "learning_rate": 7.538461538461539e-06, "loss": 0.0901, "step": 640 }, { "epoch": 9.85, "eval_f1": 0.8978072239720984, "eval_loss": 0.3917124569416046, "eval_precision": 0.9075004928050463, "eval_recall": 0.8932562895325058, "eval_runtime": 1.816, "eval_samples_per_second": 160.796, "eval_steps_per_second": 5.507, "step": 640 }, { "epoch": 10.83, "learning_rate": 7.292307692307693e-06, "loss": 0.0859, "step": 704 }, { "epoch": 10.83, "eval_f1": 0.8944069431051109, "eval_loss": 0.3997272551059723, "eval_precision": 0.9030704986489806, "eval_recall": 0.8901888048699291, "eval_runtime": 1.7905, "eval_samples_per_second": 163.086, "eval_steps_per_second": 5.585, "step": 704 }, { "epoch": 11.82, "learning_rate": 7.046153846153847e-06, "loss": 0.0559, "step": 768 }, { "epoch": 11.82, "eval_f1": 0.9027916488324536, "eval_loss": 0.4531518518924713, "eval_precision": 0.9027916488324536, "eval_recall": 0.9027916488324536, "eval_runtime": 1.7898, "eval_samples_per_second": 163.144, "eval_steps_per_second": 5.587, "step": 768 }, { "epoch": 12.8, "learning_rate": 6.800000000000001e-06, "loss": 0.0593, "step": 832 }, { "epoch": 12.8, "eval_f1": 0.8808563748079878, "eval_loss": 0.47470441460609436, "eval_precision": 0.8859960169038714, "eval_recall": 0.8779188662196225, "eval_runtime": 1.7809, "eval_samples_per_second": 163.966, "eval_steps_per_second": 5.615, "step": 832 }, { "epoch": 13.78, "learning_rate": 6.553846153846154e-06, "loss": 0.0448, "step": 896 }, { "epoch": 13.78, "eval_f1": 0.8958481951776287, "eval_loss": 0.4653804302215576, "eval_precision": 0.8958481951776287, "eval_recall": 0.8958481951776287, "eval_runtime": 1.7817, "eval_samples_per_second": 163.887, "eval_steps_per_second": 5.613, "step": 896 }, { "epoch": 14.77, "learning_rate": 6.307692307692308e-06, "loss": 0.035, "step": 960 }, { "epoch": 14.77, "eval_f1": 0.9024576473395372, "eval_loss": 0.4710945188999176, "eval_precision": 0.9040958083832336, "eval_recall": 0.9011746801731108, "eval_runtime": 1.8119, "eval_samples_per_second": 161.155, "eval_steps_per_second": 5.519, "step": 960 }, { "epoch": 15.75, "learning_rate": 6.061538461538462e-06, "loss": 0.0372, "step": 1024 }, { "epoch": 15.75, "eval_f1": 0.8839804948528084, "eval_loss": 0.5761662125587463, "eval_precision": 0.8915556426332288, "eval_recall": 0.8801778665525277, "eval_runtime": 1.7874, "eval_samples_per_second": 163.369, "eval_steps_per_second": 5.595, "step": 1024 }, { "epoch": 16.74, "learning_rate": 5.815384615384616e-06, "loss": 0.0341, "step": 1088 }, { "epoch": 16.74, "eval_f1": 0.9020838323353293, "eval_loss": 0.5446054339408875, "eval_precision": 0.9060461070030448, "eval_recall": 0.8995577115137681, "eval_runtime": 1.7858, "eval_samples_per_second": 163.512, "eval_steps_per_second": 5.6, "step": 1088 }, { "epoch": 17.72, "learning_rate": 5.56923076923077e-06, "loss": 0.0321, "step": 1152 }, { "epoch": 17.72, "eval_f1": 0.9058548177161077, "eval_loss": 0.5152109265327454, "eval_precision": 0.908026113671275, "eval_recall": 0.9042421648356874, "eval_runtime": 1.7852, "eval_samples_per_second": 163.569, "eval_steps_per_second": 5.602, "step": 1152 }, { "epoch": 18.71, "learning_rate": 5.323076923076923e-06, "loss": 0.0171, "step": 1216 }, { "epoch": 18.71, "eval_f1": 0.887100671789667, "eval_loss": 0.6121218204498291, "eval_precision": 0.897562663766253, "eval_recall": 0.882436866885433, "eval_runtime": 1.7977, "eval_samples_per_second": 162.431, "eval_steps_per_second": 5.563, "step": 1216 }, { "epoch": 19.69, "learning_rate": 5.076923076923077e-06, "loss": 0.0261, "step": 1280 }, { "epoch": 19.69, "eval_f1": 0.9024576473395372, "eval_loss": 0.5516932010650635, "eval_precision": 0.9040958083832336, "eval_recall": 0.9011746801731108, "eval_runtime": 1.7934, "eval_samples_per_second": 162.821, "eval_steps_per_second": 5.576, "step": 1280 }, { "epoch": 20.68, "learning_rate": 4.830769230769231e-06, "loss": 0.0153, "step": 1344 }, { "epoch": 20.68, "eval_f1": 0.89848348618354, "eval_loss": 0.5832306146621704, "eval_precision": 0.9031007751937985, "eval_recall": 0.89568174252152, "eval_runtime": 1.7807, "eval_samples_per_second": 163.978, "eval_steps_per_second": 5.616, "step": 1344 }, { "epoch": 21.66, "learning_rate": 4.5846153846153855e-06, "loss": 0.0183, "step": 1408 }, { "epoch": 21.66, "eval_f1": 0.9092560719066743, "eval_loss": 0.5685573220252991, "eval_precision": 0.9120123153894261, "eval_recall": 0.9073096494982642, "eval_runtime": 1.8067, "eval_samples_per_second": 161.617, "eval_steps_per_second": 5.535, "step": 1408 }, { "epoch": 22.65, "learning_rate": 4.338461538461539e-06, "loss": 0.0098, "step": 1472 }, { "epoch": 22.65, "eval_f1": 0.9058548177161077, "eval_loss": 0.5791997909545898, "eval_precision": 0.908026113671275, "eval_recall": 0.9042421648356874, "eval_runtime": 1.7948, "eval_samples_per_second": 162.694, "eval_steps_per_second": 5.572, "step": 1472 }, { "epoch": 23.63, "learning_rate": 4.0923076923076925e-06, "loss": 0.0216, "step": 1536 }, { "epoch": 23.63, "eval_f1": 0.8948732718894008, "eval_loss": 0.6078387498855591, "eval_precision": 0.9001797250692184, "eval_recall": 0.8918057735292719, "eval_runtime": 1.7897, "eval_samples_per_second": 163.158, "eval_steps_per_second": 5.588, "step": 1536 }, { "epoch": 24.62, "learning_rate": 3.846153846153847e-06, "loss": 0.0076, "step": 1600 }, { "epoch": 24.62, "eval_f1": 0.8950898203592814, "eval_loss": 0.6126357913017273, "eval_precision": 0.898989898989899, "eval_recall": 0.8926142578589432, "eval_runtime": 1.7831, "eval_samples_per_second": 163.755, "eval_steps_per_second": 5.608, "step": 1600 }, { "epoch": 25.6, "learning_rate": 3.6000000000000003e-06, "loss": 0.0073, "step": 1664 }, { "epoch": 25.6, "eval_f1": 0.9058548177161077, "eval_loss": 0.6077719926834106, "eval_precision": 0.908026113671275, "eval_recall": 0.9042421648356874, "eval_runtime": 1.7913, "eval_samples_per_second": 163.011, "eval_steps_per_second": 5.583, "step": 1664 }, { "epoch": 26.58, "learning_rate": 3.353846153846154e-06, "loss": 0.0257, "step": 1728 }, { "epoch": 26.58, "eval_f1": 0.9020838323353293, "eval_loss": 0.6265017986297607, "eval_precision": 0.9060461070030448, "eval_recall": 0.8995577115137681, "eval_runtime": 1.8318, "eval_samples_per_second": 159.409, "eval_steps_per_second": 5.459, "step": 1728 }, { "epoch": 27.57, "learning_rate": 3.1076923076923076e-06, "loss": 0.0047, "step": 1792 }, { "epoch": 27.57, "eval_f1": 0.891252898280854, "eval_loss": 0.6706699728965759, "eval_precision": 0.8972822910578608, "eval_recall": 0.8879298045370239, "eval_runtime": 1.7836, "eval_samples_per_second": 163.716, "eval_steps_per_second": 5.607, "step": 1792 }, { "epoch": 28.55, "learning_rate": 2.8615384615384615e-06, "loss": 0.0063, "step": 1856 }, { "epoch": 28.55, "eval_f1": 0.89848348618354, "eval_loss": 0.6434349417686462, "eval_precision": 0.9031007751937985, "eval_recall": 0.89568174252152, "eval_runtime": 1.8097, "eval_samples_per_second": 161.353, "eval_steps_per_second": 5.526, "step": 1856 }, { "epoch": 29.54, "learning_rate": 2.615384615384616e-06, "loss": 0.0045, "step": 1920 }, { "epoch": 29.54, "eval_f1": 0.9092560719066743, "eval_loss": 0.6284749507904053, "eval_precision": 0.9120123153894261, "eval_recall": 0.9073096494982642, "eval_runtime": 1.7895, "eval_samples_per_second": 163.173, "eval_steps_per_second": 5.588, "step": 1920 } ], "logging_steps": 64, "max_steps": 2600, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 64, "total_flos": 2249673523312320.0, "train_batch_size": 32, "trial_name": null, "trial_params": null }