{ "best_metric": 0.3162839710712433, "best_model_checkpoint": "autotrain-ytgys-osuer/checkpoint-1326", "epoch": 3.0, "eval_steps": 500, "global_step": 1326, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.049773755656108594, "grad_norm": 9.081209182739258, "learning_rate": 8.270676691729324e-06, "loss": 0.6296, "step": 22 }, { "epoch": 0.09954751131221719, "grad_norm": 1.96213698387146, "learning_rate": 1.6541353383458648e-05, "loss": 0.4877, "step": 44 }, { "epoch": 0.1493212669683258, "grad_norm": 2.1971828937530518, "learning_rate": 2.4812030075187968e-05, "loss": 0.5465, "step": 66 }, { "epoch": 0.19909502262443438, "grad_norm": 5.049612998962402, "learning_rate": 3.3082706766917295e-05, "loss": 0.5186, "step": 88 }, { "epoch": 0.248868778280543, "grad_norm": 0.6077613830566406, "learning_rate": 4.135338345864662e-05, "loss": 0.2962, "step": 110 }, { "epoch": 0.2986425339366516, "grad_norm": 5.124961853027344, "learning_rate": 4.9624060150375936e-05, "loss": 0.5159, "step": 132 }, { "epoch": 0.34841628959276016, "grad_norm": 1.2383034229278564, "learning_rate": 4.9119865884325234e-05, "loss": 0.5115, "step": 154 }, { "epoch": 0.39819004524886875, "grad_norm": 4.597978115081787, "learning_rate": 4.8197820620285e-05, "loss": 0.4524, "step": 176 }, { "epoch": 0.4479638009049774, "grad_norm": 4.115572452545166, "learning_rate": 4.727577535624476e-05, "loss": 0.4217, "step": 198 }, { "epoch": 0.497737556561086, "grad_norm": 1.8618402481079102, "learning_rate": 4.635373009220453e-05, "loss": 0.325, "step": 220 }, { "epoch": 0.5475113122171946, "grad_norm": 4.649389743804932, "learning_rate": 4.5431684828164296e-05, "loss": 0.4603, "step": 242 }, { "epoch": 0.5972850678733032, "grad_norm": 2.3018507957458496, "learning_rate": 4.450963956412406e-05, "loss": 0.4817, "step": 264 }, { "epoch": 0.6470588235294118, "grad_norm": 1.6560252904891968, "learning_rate": 4.358759430008382e-05, "loss": 0.4588, "step": 286 }, { "epoch": 0.6968325791855203, "grad_norm": 1.248030185699463, "learning_rate": 4.266554903604359e-05, "loss": 0.4533, "step": 308 }, { "epoch": 0.746606334841629, "grad_norm": 7.337639808654785, "learning_rate": 4.174350377200336e-05, "loss": 0.5258, "step": 330 }, { "epoch": 0.7963800904977375, "grad_norm": 3.778733253479004, "learning_rate": 4.0821458507963125e-05, "loss": 0.4715, "step": 352 }, { "epoch": 0.8461538461538461, "grad_norm": 2.055952787399292, "learning_rate": 3.9899413243922885e-05, "loss": 0.5279, "step": 374 }, { "epoch": 0.8959276018099548, "grad_norm": 2.640718460083008, "learning_rate": 3.897736797988265e-05, "loss": 0.4142, "step": 396 }, { "epoch": 0.9457013574660633, "grad_norm": 2.906071424484253, "learning_rate": 3.805532271584242e-05, "loss": 0.618, "step": 418 }, { "epoch": 0.995475113122172, "grad_norm": 7.490321159362793, "learning_rate": 3.713327745180218e-05, "loss": 0.4642, "step": 440 }, { "epoch": 1.0, "eval_accuracy": 0.8301245753114382, "eval_auc": 0.6658117326057298, "eval_f1": 0.0, "eval_loss": 0.468678742647171, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 155.3554, "eval_samples_per_second": 5.684, "eval_steps_per_second": 0.36, "step": 442 }, { "epoch": 1.0452488687782806, "grad_norm": 1.7145849466323853, "learning_rate": 3.6211232187761947e-05, "loss": 0.5339, "step": 462 }, { "epoch": 1.0950226244343892, "grad_norm": 7.355587959289551, "learning_rate": 3.5289186923721714e-05, "loss": 0.4688, "step": 484 }, { "epoch": 1.1447963800904977, "grad_norm": 8.240862846374512, "learning_rate": 3.436714165968148e-05, "loss": 0.413, "step": 506 }, { "epoch": 1.1945701357466063, "grad_norm": 4.566345691680908, "learning_rate": 3.344509639564124e-05, "loss": 0.4214, "step": 528 }, { "epoch": 1.244343891402715, "grad_norm": 7.886547088623047, "learning_rate": 3.252305113160101e-05, "loss": 0.5584, "step": 550 }, { "epoch": 1.2941176470588236, "grad_norm": 4.851104259490967, "learning_rate": 3.1601005867560775e-05, "loss": 0.5674, "step": 572 }, { "epoch": 1.3438914027149322, "grad_norm": 2.095370054244995, "learning_rate": 3.067896060352054e-05, "loss": 0.4522, "step": 594 }, { "epoch": 1.3936651583710407, "grad_norm": 1.348547339439392, "learning_rate": 2.9756915339480303e-05, "loss": 0.4711, "step": 616 }, { "epoch": 1.4434389140271493, "grad_norm": 1.514244556427002, "learning_rate": 2.8834870075440066e-05, "loss": 0.3652, "step": 638 }, { "epoch": 1.493212669683258, "grad_norm": 2.2231717109680176, "learning_rate": 2.7912824811399834e-05, "loss": 0.4884, "step": 660 }, { "epoch": 1.5429864253393664, "grad_norm": 4.4252777099609375, "learning_rate": 2.69907795473596e-05, "loss": 0.4066, "step": 682 }, { "epoch": 1.5927601809954752, "grad_norm": 2.0143589973449707, "learning_rate": 2.606873428331936e-05, "loss": 0.4817, "step": 704 }, { "epoch": 1.6425339366515836, "grad_norm": 1.2555855512619019, "learning_rate": 2.5146689019279128e-05, "loss": 0.4505, "step": 726 }, { "epoch": 1.6923076923076923, "grad_norm": 1.5008816719055176, "learning_rate": 2.4224643755238895e-05, "loss": 0.4987, "step": 748 }, { "epoch": 1.742081447963801, "grad_norm": 1.0928298234939575, "learning_rate": 2.330259849119866e-05, "loss": 0.4491, "step": 770 }, { "epoch": 1.7918552036199094, "grad_norm": 2.131342887878418, "learning_rate": 2.2380553227158423e-05, "loss": 0.5099, "step": 792 }, { "epoch": 1.8416289592760182, "grad_norm": 2.0319790840148926, "learning_rate": 2.145850796311819e-05, "loss": 0.4312, "step": 814 }, { "epoch": 1.8914027149321266, "grad_norm": 3.668442726135254, "learning_rate": 2.0536462699077953e-05, "loss": 0.414, "step": 836 }, { "epoch": 1.9411764705882353, "grad_norm": 1.9720642566680908, "learning_rate": 1.961441743503772e-05, "loss": 0.3894, "step": 858 }, { "epoch": 1.990950226244344, "grad_norm": 4.421242713928223, "learning_rate": 1.8692372170997484e-05, "loss": 0.4847, "step": 880 }, { "epoch": 2.0, "eval_accuracy": 0.8301245753114382, "eval_auc": 0.7693769895407002, "eval_f1": 0.0, "eval_loss": 0.45319485664367676, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 156.7435, "eval_samples_per_second": 5.633, "eval_steps_per_second": 0.357, "step": 884 }, { "epoch": 2.0407239819004523, "grad_norm": 4.845108985900879, "learning_rate": 1.777032690695725e-05, "loss": 0.426, "step": 902 }, { "epoch": 2.090497737556561, "grad_norm": 4.742054462432861, "learning_rate": 1.6848281642917015e-05, "loss": 0.4253, "step": 924 }, { "epoch": 2.1402714932126696, "grad_norm": 27.354101181030273, "learning_rate": 1.5926236378876782e-05, "loss": 0.4168, "step": 946 }, { "epoch": 2.1900452488687785, "grad_norm": 9.501100540161133, "learning_rate": 1.5004191114836546e-05, "loss": 0.4337, "step": 968 }, { "epoch": 2.239819004524887, "grad_norm": 2.986358165740967, "learning_rate": 1.4082145850796313e-05, "loss": 0.3713, "step": 990 }, { "epoch": 2.2895927601809953, "grad_norm": 13.785974502563477, "learning_rate": 1.3160100586756077e-05, "loss": 0.3517, "step": 1012 }, { "epoch": 2.339366515837104, "grad_norm": 6.597299098968506, "learning_rate": 1.2238055322715842e-05, "loss": 0.362, "step": 1034 }, { "epoch": 2.3891402714932126, "grad_norm": 1.9302808046340942, "learning_rate": 1.1316010058675607e-05, "loss": 0.3733, "step": 1056 }, { "epoch": 2.4389140271493215, "grad_norm": 1.2917982339859009, "learning_rate": 1.0393964794635373e-05, "loss": 0.3184, "step": 1078 }, { "epoch": 2.48868778280543, "grad_norm": 2.898386240005493, "learning_rate": 9.471919530595138e-06, "loss": 0.4959, "step": 1100 }, { "epoch": 2.5384615384615383, "grad_norm": 5.883040904998779, "learning_rate": 8.549874266554904e-06, "loss": 0.3014, "step": 1122 }, { "epoch": 2.588235294117647, "grad_norm": 10.006911277770996, "learning_rate": 7.627829002514669e-06, "loss": 0.3047, "step": 1144 }, { "epoch": 2.6380090497737556, "grad_norm": 3.732818365097046, "learning_rate": 6.7057837384744345e-06, "loss": 0.382, "step": 1166 }, { "epoch": 2.6877828054298645, "grad_norm": 4.408326148986816, "learning_rate": 5.7837384744342e-06, "loss": 0.3173, "step": 1188 }, { "epoch": 2.737556561085973, "grad_norm": 7.149359226226807, "learning_rate": 4.861693210393965e-06, "loss": 0.4387, "step": 1210 }, { "epoch": 2.7873303167420813, "grad_norm": 9.934762001037598, "learning_rate": 3.939647946353731e-06, "loss": 0.2751, "step": 1232 }, { "epoch": 2.83710407239819, "grad_norm": 5.656704902648926, "learning_rate": 3.0176026823134957e-06, "loss": 0.3792, "step": 1254 }, { "epoch": 2.8868778280542986, "grad_norm": 7.494544506072998, "learning_rate": 2.095557418273261e-06, "loss": 0.3882, "step": 1276 }, { "epoch": 2.9366515837104075, "grad_norm": 12.555413246154785, "learning_rate": 1.173512154233026e-06, "loss": 0.3562, "step": 1298 }, { "epoch": 2.986425339366516, "grad_norm": 8.668551445007324, "learning_rate": 2.5146689019279127e-07, "loss": 0.2171, "step": 1320 }, { "epoch": 3.0, "eval_accuracy": 0.8754246885617214, "eval_auc": 0.8692314688494771, "eval_f1": 0.6180555555555556, "eval_loss": 0.3162839710712433, "eval_precision": 0.644927536231884, "eval_recall": 0.5933333333333334, "eval_runtime": 161.1074, "eval_samples_per_second": 5.481, "eval_steps_per_second": 0.348, "step": 1326 } ], "logging_steps": 22, "max_steps": 1326, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 696981185648640.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }