| { | |
| "best_global_step": 1890, | |
| "best_metric": 0.7751064362634611, | |
| "best_model_checkpoint": "./my_unified_model_classification_final/checkpoint-1890", | |
| "epoch": 2.0, | |
| "eval_steps": 315, | |
| "global_step": 2108, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04743833017077799, | |
| "grad_norm": 60.25, | |
| "learning_rate": 2.3333333333333336e-06, | |
| "loss": 1.0421, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09487666034155598, | |
| "grad_norm": 59.75, | |
| "learning_rate": 4.714285714285715e-06, | |
| "loss": 0.8211, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14231499051233396, | |
| "grad_norm": 41.0, | |
| "learning_rate": 7.095238095238096e-06, | |
| "loss": 0.7358, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18975332068311196, | |
| "grad_norm": 53.0, | |
| "learning_rate": 9.476190476190476e-06, | |
| "loss": 0.7224, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23719165085388993, | |
| "grad_norm": 109.5, | |
| "learning_rate": 9.794520547945206e-06, | |
| "loss": 0.6881, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2846299810246679, | |
| "grad_norm": 67.0, | |
| "learning_rate": 9.531085353003163e-06, | |
| "loss": 0.6706, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.29886148007590135, | |
| "eval_accuracy": 0.6481514450180998, | |
| "eval_auc": 0.7337608189235229, | |
| "eval_f1": 0.6774035584090539, | |
| "eval_false_negatives": 1756, | |
| "eval_false_positives": 4173, | |
| "eval_loss": 0.6601030230522156, | |
| "eval_precision": 0.5986728216964801, | |
| "eval_recall": 0.7799774464352838, | |
| "eval_runtime": 565.4848, | |
| "eval_samples_per_second": 29.799, | |
| "eval_specificity": 0.5295377677564825, | |
| "eval_steps_per_second": 0.233, | |
| "eval_true_negatives": 4697, | |
| "eval_true_positives": 6225, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.29886148007590135, | |
| "step": 315, | |
| "train_accuracy": 0.689, | |
| "train_auc": 0.7696650342487676, | |
| "train_f1": 0.7210762331838565, | |
| "train_false_negatives": 106, | |
| "train_false_positives": 205, | |
| "train_loss": 0.6076448559761047, | |
| "train_precision": 0.6622734761120264, | |
| "train_recall": 0.7913385826771654, | |
| "train_runtime": 33.5457, | |
| "train_samples_per_second": 29.81, | |
| "train_specificity": 0.5833333333333334, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 287, | |
| "train_true_positives": 402 | |
| }, | |
| { | |
| "epoch": 0.33206831119544594, | |
| "grad_norm": 27.25, | |
| "learning_rate": 9.267650158061118e-06, | |
| "loss": 0.637, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.3795066413662239, | |
| "grad_norm": 32.25, | |
| "learning_rate": 9.004214963119073e-06, | |
| "loss": 0.6261, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4269449715370019, | |
| "grad_norm": 12.6875, | |
| "learning_rate": 8.74077976817703e-06, | |
| "loss": 0.6213, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47438330170777987, | |
| "grad_norm": 10.6875, | |
| "learning_rate": 8.477344573234985e-06, | |
| "loss": 0.6218, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5218216318785579, | |
| "grad_norm": 9.75, | |
| "learning_rate": 8.21390937829294e-06, | |
| "loss": 0.6033, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5692599620493358, | |
| "grad_norm": 17.75, | |
| "learning_rate": 7.950474183350897e-06, | |
| "loss": 0.6017, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5977229601518027, | |
| "eval_accuracy": 0.6978814313690582, | |
| "eval_auc": 0.7703106885617717, | |
| "eval_f1": 0.6362272240085745, | |
| "eval_false_negatives": 3529, | |
| "eval_false_positives": 1562, | |
| "eval_loss": 0.589336097240448, | |
| "eval_precision": 0.7402726970402395, | |
| "eval_recall": 0.5578248339807041, | |
| "eval_runtime": 566.1112, | |
| "eval_samples_per_second": 29.766, | |
| "eval_specificity": 0.8239007891770012, | |
| "eval_steps_per_second": 0.233, | |
| "eval_true_negatives": 7308, | |
| "eval_true_positives": 4452, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.5977229601518027, | |
| "step": 630, | |
| "train_accuracy": 0.699, | |
| "train_auc": 0.7801548075728242, | |
| "train_f1": 0.6403823178016727, | |
| "train_false_negatives": 209, | |
| "train_false_positives": 92, | |
| "train_loss": 0.5804136991500854, | |
| "train_precision": 0.7444444444444445, | |
| "train_recall": 0.5618448637316562, | |
| "train_runtime": 33.5615, | |
| "train_samples_per_second": 29.796, | |
| "train_specificity": 0.8240917782026769, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 431, | |
| "train_true_positives": 268 | |
| }, | |
| { | |
| "epoch": 0.6166982922201139, | |
| "grad_norm": 65.5, | |
| "learning_rate": 7.687038988408853e-06, | |
| "loss": 0.5989, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6641366223908919, | |
| "grad_norm": 38.75, | |
| "learning_rate": 7.423603793466808e-06, | |
| "loss": 0.5827, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7115749525616698, | |
| "grad_norm": 15.25, | |
| "learning_rate": 7.1601685985247635e-06, | |
| "loss": 0.5679, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7590132827324478, | |
| "grad_norm": 31.875, | |
| "learning_rate": 6.896733403582719e-06, | |
| "loss": 0.561, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8064516129032258, | |
| "grad_norm": 24.25, | |
| "learning_rate": 6.633298208640675e-06, | |
| "loss": 0.5675, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8538899430740038, | |
| "grad_norm": 16.0, | |
| "learning_rate": 6.36986301369863e-06, | |
| "loss": 0.544, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.896584440227704, | |
| "eval_accuracy": 0.7435167052400451, | |
| "eval_auc": 0.8127202542905241, | |
| "eval_f1": 0.7221293557927221, | |
| "eval_false_negatives": 2365, | |
| "eval_false_positives": 1957, | |
| "eval_loss": 0.5301145911216736, | |
| "eval_precision": 0.7415819358246402, | |
| "eval_recall": 0.7036712191454705, | |
| "eval_runtime": 565.3739, | |
| "eval_samples_per_second": 29.805, | |
| "eval_specificity": 0.7793686583990981, | |
| "eval_steps_per_second": 0.233, | |
| "eval_true_negatives": 6913, | |
| "eval_true_positives": 5616, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.896584440227704, | |
| "step": 945, | |
| "train_accuracy": 0.769, | |
| "train_auc": 0.8395073580294321, | |
| "train_f1": 0.7621009268795057, | |
| "train_false_negatives": 129, | |
| "train_false_positives": 102, | |
| "train_loss": 0.498809278011322, | |
| "train_precision": 0.7838983050847458, | |
| "train_recall": 0.7414829659318637, | |
| "train_runtime": 33.5647, | |
| "train_samples_per_second": 29.793, | |
| "train_specificity": 0.7964071856287425, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 399, | |
| "train_true_positives": 370 | |
| }, | |
| { | |
| "epoch": 0.9013282732447818, | |
| "grad_norm": 21.375, | |
| "learning_rate": 6.106427818756586e-06, | |
| "loss": 0.5341, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9487666034155597, | |
| "grad_norm": 17.625, | |
| "learning_rate": 5.8429926238145414e-06, | |
| "loss": 0.5227, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9962049335863378, | |
| "grad_norm": 17.5, | |
| "learning_rate": 5.579557428872497e-06, | |
| "loss": 0.5105, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0436432637571158, | |
| "grad_norm": 15.8125, | |
| "learning_rate": 5.316122233930453e-06, | |
| "loss": 0.4476, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.0910815939278937, | |
| "grad_norm": 36.5, | |
| "learning_rate": 5.05268703898841e-06, | |
| "loss": 0.432, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.1385199240986716, | |
| "grad_norm": 20.625, | |
| "learning_rate": 4.789251844046365e-06, | |
| "loss": 0.4303, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.1859582542694498, | |
| "grad_norm": 15.0, | |
| "learning_rate": 4.525816649104321e-06, | |
| "loss": 0.431, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.1954459203036052, | |
| "eval_accuracy": 0.7668981069372738, | |
| "eval_auc": 0.8454007170637932, | |
| "eval_f1": 0.7403146899378553, | |
| "eval_false_negatives": 2382, | |
| "eval_false_positives": 1546, | |
| "eval_loss": 0.49584120512008667, | |
| "eval_precision": 0.7836249125262421, | |
| "eval_recall": 0.7015411602556071, | |
| "eval_runtime": 565.3929, | |
| "eval_samples_per_second": 29.804, | |
| "eval_specificity": 0.8257046223224351, | |
| "eval_steps_per_second": 0.233, | |
| "eval_true_negatives": 7324, | |
| "eval_true_positives": 5599, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.1954459203036052, | |
| "step": 1260, | |
| "train_accuracy": 0.816, | |
| "train_auc": 0.8946033653846154, | |
| "train_f1": 0.7960088691796009, | |
| "train_false_negatives": 121, | |
| "train_false_positives": 63, | |
| "train_loss": 0.4144395887851715, | |
| "train_precision": 0.8507109004739336, | |
| "train_recall": 0.7479166666666667, | |
| "train_runtime": 33.5611, | |
| "train_samples_per_second": 29.796, | |
| "train_specificity": 0.8788461538461538, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 457, | |
| "train_true_positives": 359 | |
| }, | |
| { | |
| "epoch": 1.2333965844402277, | |
| "grad_norm": 26.375, | |
| "learning_rate": 4.262381454162276e-06, | |
| "loss": 0.4144, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.2808349146110056, | |
| "grad_norm": 27.375, | |
| "learning_rate": 3.998946259220232e-06, | |
| "loss": 0.4234, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3282732447817835, | |
| "grad_norm": 37.0, | |
| "learning_rate": 3.7355110642781876e-06, | |
| "loss": 0.4121, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.3757115749525617, | |
| "grad_norm": 22.0, | |
| "learning_rate": 3.4720758693361435e-06, | |
| "loss": 0.4019, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.4231499051233396, | |
| "grad_norm": 22.75, | |
| "learning_rate": 3.2086406743940995e-06, | |
| "loss": 0.3934, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4705882352941178, | |
| "grad_norm": 22.75, | |
| "learning_rate": 2.945205479452055e-06, | |
| "loss": 0.3826, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.4943074003795067, | |
| "eval_accuracy": 0.7803691175597888, | |
| "eval_auc": 0.8628874001345077, | |
| "eval_f1": 0.7676272995542162, | |
| "eval_false_negatives": 1868, | |
| "eval_false_positives": 1833, | |
| "eval_loss": 0.4711809456348419, | |
| "eval_precision": 0.7693178957966272, | |
| "eval_recall": 0.7659441172785365, | |
| "eval_runtime": 565.811, | |
| "eval_samples_per_second": 29.782, | |
| "eval_specificity": 0.793348365276212, | |
| "eval_steps_per_second": 0.233, | |
| "eval_true_negatives": 7037, | |
| "eval_true_positives": 6113, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 1.4943074003795067, | |
| "step": 1575, | |
| "train_accuracy": 0.864, | |
| "train_auc": 0.936523558617489, | |
| "train_f1": 0.859504132231405, | |
| "train_false_negatives": 77, | |
| "train_false_positives": 59, | |
| "train_loss": 0.32817962765693665, | |
| "train_precision": 0.8757894736842106, | |
| "train_recall": 0.8438133874239351, | |
| "train_runtime": 33.5625, | |
| "train_samples_per_second": 29.795, | |
| "train_specificity": 0.883629191321499, | |
| "train_steps_per_second": 0.238, | |
| "train_true_negatives": 448, | |
| "train_true_positives": 416 | |
| }, | |
| { | |
| "epoch": 1.5180265654648957, | |
| "grad_norm": 26.25, | |
| "learning_rate": 2.681770284510011e-06, | |
| "loss": 0.3818, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.5654648956356736, | |
| "grad_norm": 23.75, | |
| "learning_rate": 2.4183350895679664e-06, | |
| "loss": 0.4057, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.6129032258064515, | |
| "grad_norm": 31.375, | |
| "learning_rate": 2.1548998946259223e-06, | |
| "loss": 0.3949, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.6603415559772297, | |
| "grad_norm": 22.625, | |
| "learning_rate": 1.8914646996838779e-06, | |
| "loss": 0.3875, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.7077798861480076, | |
| "grad_norm": 26.375, | |
| "learning_rate": 1.6280295047418338e-06, | |
| "loss": 0.38, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.7552182163187857, | |
| "grad_norm": 26.75, | |
| "learning_rate": 1.3645943097997893e-06, | |
| "loss": 0.3754, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.793168880455408, | |
| "eval_accuracy": 0.7868375764049611, | |
| "eval_auc": 0.8702927979882322, | |
| "eval_f1": 0.7751064362634611, | |
| "eval_false_negatives": 1791, | |
| "eval_false_positives": 1801, | |
| "eval_loss": 0.45739424228668213, | |
| "eval_precision": 0.7746214491302715, | |
| "eval_recall": 0.7755920310738003, | |
| "eval_runtime": 565.5123, | |
| "eval_samples_per_second": 29.798, | |
| "eval_specificity": 0.7969560315670801, | |
| "eval_steps_per_second": 0.233, | |
| "eval_true_negatives": 7069, | |
| "eval_true_positives": 6190, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.793168880455408, | |
| "step": 1890, | |
| "train_accuracy": 0.869, | |
| "train_auc": 0.9383460241118122, | |
| "train_f1": 0.8659160696008188, | |
| "train_false_negatives": 68, | |
| "train_false_positives": 63, | |
| "train_loss": 0.32318422198295593, | |
| "train_precision": 0.8703703703703703, | |
| "train_recall": 0.8615071283095723, | |
| "train_runtime": 33.5363, | |
| "train_samples_per_second": 29.818, | |
| "train_specificity": 0.8762278978388998, | |
| "train_steps_per_second": 0.239, | |
| "train_true_negatives": 446, | |
| "train_true_positives": 423 | |
| }, | |
| { | |
| "epoch": 1.8026565464895636, | |
| "grad_norm": 29.0, | |
| "learning_rate": 1.101159114857745e-06, | |
| "loss": 0.3655, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.8500948766603416, | |
| "grad_norm": 25.625, | |
| "learning_rate": 8.377239199157008e-07, | |
| "loss": 0.3777, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.8975332068311195, | |
| "grad_norm": 26.25, | |
| "learning_rate": 5.742887249736566e-07, | |
| "loss": 0.371, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.9449715370018974, | |
| "grad_norm": 34.25, | |
| "learning_rate": 3.108535300316122e-07, | |
| "loss": 0.3917, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.9924098671726755, | |
| "grad_norm": 24.75, | |
| "learning_rate": 4.741833508956797e-08, | |
| "loss": 0.3776, | |
| "step": 2100 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 2108, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 315, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1560301981608182e+19, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |