{ "best_metric": 0.1894965022802353, "best_model_checkpoint": "autotrain-8s6z2-1g2hg/checkpoint-32895", "epoch": 3.0, "eval_steps": 500, "global_step": 32895, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 3.4229037761688232, "learning_rate": 8.282674772036476e-06, "loss": 0.655, "step": 548 }, { "epoch": 0.1, "grad_norm": 2.956129789352417, "learning_rate": 1.661094224924012e-05, "loss": 0.5579, "step": 1096 }, { "epoch": 0.15, "grad_norm": 7.492773056030273, "learning_rate": 2.493920972644377e-05, "loss": 0.4886, "step": 1644 }, { "epoch": 0.2, "grad_norm": 9.447515487670898, "learning_rate": 3.326747720364742e-05, "loss": 0.4206, "step": 2192 }, { "epoch": 0.25, "grad_norm": 10.904797554016113, "learning_rate": 4.158054711246201e-05, "loss": 0.4038, "step": 2740 }, { "epoch": 0.3, "grad_norm": 8.469663619995117, "learning_rate": 4.9908814589665656e-05, "loss": 0.3769, "step": 3288 }, { "epoch": 0.35, "grad_norm": 6.261867046356201, "learning_rate": 4.9086302989359904e-05, "loss": 0.3841, "step": 3836 }, { "epoch": 0.4, "grad_norm": 4.032571792602539, "learning_rate": 4.8160783651410236e-05, "loss": 0.3391, "step": 4384 }, { "epoch": 0.45, "grad_norm": 48.33228302001953, "learning_rate": 4.723526431346057e-05, "loss": 0.3192, "step": 4932 }, { "epoch": 0.5, "grad_norm": 4.632198810577393, "learning_rate": 4.630974497551089e-05, "loss": 0.3239, "step": 5480 }, { "epoch": 0.55, "grad_norm": 2.5730295181274414, "learning_rate": 4.5384225637561225e-05, "loss": 0.3143, "step": 6028 }, { "epoch": 0.6, "grad_norm": 3.378683090209961, "learning_rate": 4.446208410741429e-05, "loss": 0.3059, "step": 6576 }, { "epoch": 0.65, "grad_norm": 10.585208892822266, "learning_rate": 4.353656476946462e-05, "loss": 0.2987, "step": 7124 }, { "epoch": 0.7, "grad_norm": 16.021188735961914, "learning_rate": 4.261104543151495e-05, "loss": 0.2935, "step": 7672 }, { "epoch": 0.75, "grad_norm": 2.585524797439575, "learning_rate": 4.168552609356528e-05, "loss": 0.292, "step": 8220 }, { "epoch": 0.8, "grad_norm": 0.8300177454948425, "learning_rate": 4.0761695659516976e-05, "loss": 0.2746, "step": 8768 }, { "epoch": 0.85, "grad_norm": 1.8167496919631958, "learning_rate": 3.9837865225468676e-05, "loss": 0.288, "step": 9316 }, { "epoch": 0.9, "grad_norm": 14.431105613708496, "learning_rate": 3.8912345887519e-05, "loss": 0.3442, "step": 9864 }, { "epoch": 0.95, "grad_norm": 4.341527462005615, "learning_rate": 3.798682654956933e-05, "loss": 0.2964, "step": 10412 }, { "epoch": 1.0, "grad_norm": 0.9708638191223145, "learning_rate": 3.706130721161966e-05, "loss": 0.2836, "step": 10960 }, { "epoch": 1.0, "eval_accuracy": 0.9091862559565882, "eval_auc": 0.9327717478336053, "eval_f1": 0.9394671651544857, "eval_loss": 0.26499995589256287, "eval_precision": 0.9356703902158447, "eval_recall": 0.9432948788378197, "eval_runtime": 90.2174, "eval_samples_per_second": 486.148, "eval_steps_per_second": 15.197, "step": 10965 }, { "epoch": 1.05, "grad_norm": 10.155410766601562, "learning_rate": 3.613578787366999e-05, "loss": 0.2915, "step": 11508 }, { "epoch": 1.1, "grad_norm": 13.481185913085938, "learning_rate": 3.521026853572032e-05, "loss": 0.2706, "step": 12056 }, { "epoch": 1.15, "grad_norm": 11.898757934570312, "learning_rate": 3.428474919777065e-05, "loss": 0.2577, "step": 12604 }, { "epoch": 1.2, "grad_norm": 102.29252624511719, "learning_rate": 3.336091876372234e-05, "loss": 0.2867, "step": 13152 }, { "epoch": 1.25, "grad_norm": 0.8772763609886169, "learning_rate": 3.243539942577267e-05, "loss": 0.2543, "step": 13700 }, { "epoch": 1.3, "grad_norm": 3.079983711242676, "learning_rate": 3.1509880087823005e-05, "loss": 0.2747, "step": 14248 }, { "epoch": 1.35, "grad_norm": 2.654982805252075, "learning_rate": 3.058436074987334e-05, "loss": 0.2637, "step": 14796 }, { "epoch": 1.4, "grad_norm": 8.28964900970459, "learning_rate": 2.9658841411923666e-05, "loss": 0.2433, "step": 15344 }, { "epoch": 1.45, "grad_norm": 0.606247067451477, "learning_rate": 2.873669988177673e-05, "loss": 0.2496, "step": 15892 }, { "epoch": 1.5, "grad_norm": 4.818925857543945, "learning_rate": 2.781118054382706e-05, "loss": 0.2344, "step": 16440 }, { "epoch": 1.55, "grad_norm": 9.206971168518066, "learning_rate": 2.6885661205877388e-05, "loss": 0.224, "step": 16988 }, { "epoch": 1.6, "grad_norm": 1.9517682790756226, "learning_rate": 2.5960141867927716e-05, "loss": 0.2115, "step": 17536 }, { "epoch": 1.65, "grad_norm": 10.609549522399902, "learning_rate": 2.503462252997805e-05, "loss": 0.2102, "step": 18084 }, { "epoch": 1.7, "grad_norm": 20.039274215698242, "learning_rate": 2.4109103192028377e-05, "loss": 0.2027, "step": 18632 }, { "epoch": 1.75, "grad_norm": 2.285228729248047, "learning_rate": 2.3183583854078706e-05, "loss": 0.1995, "step": 19180 }, { "epoch": 1.8, "grad_norm": 9.45280647277832, "learning_rate": 2.2258064516129034e-05, "loss": 0.2076, "step": 19728 }, { "epoch": 1.85, "grad_norm": 8.692055702209473, "learning_rate": 2.1332545178179363e-05, "loss": 0.2075, "step": 20276 }, { "epoch": 1.9, "grad_norm": 8.51143741607666, "learning_rate": 2.040871474413106e-05, "loss": 0.2097, "step": 20824 }, { "epoch": 1.95, "grad_norm": 4.226066589355469, "learning_rate": 1.948319540618139e-05, "loss": 0.2153, "step": 21372 }, { "epoch": 2.0, "grad_norm": 3.73645281791687, "learning_rate": 1.855767606823172e-05, "loss": 0.2082, "step": 21920 }, { "epoch": 2.0, "eval_accuracy": 0.9314621856403474, "eval_auc": 0.96886043230506, "eval_f1": 0.953925385487876, "eval_loss": 0.2151412069797516, "eval_precision": 0.9581845054809706, "eval_recall": 0.9497039614234267, "eval_runtime": 90.5052, "eval_samples_per_second": 484.602, "eval_steps_per_second": 15.148, "step": 21930 }, { "epoch": 2.05, "grad_norm": 2.4432873725891113, "learning_rate": 1.763215673028205e-05, "loss": 0.1786, "step": 22468 }, { "epoch": 2.1, "grad_norm": 0.6836100816726685, "learning_rate": 1.6708326296233746e-05, "loss": 0.1842, "step": 23016 }, { "epoch": 2.15, "grad_norm": 2.6022326946258545, "learning_rate": 1.5784495862185442e-05, "loss": 0.1801, "step": 23564 }, { "epoch": 2.2, "grad_norm": 1.9158451557159424, "learning_rate": 1.4858976524235771e-05, "loss": 0.1731, "step": 24112 }, { "epoch": 2.25, "grad_norm": 7.503026485443115, "learning_rate": 1.39334571862861e-05, "loss": 0.1726, "step": 24660 }, { "epoch": 2.3, "grad_norm": 3.1770987510681152, "learning_rate": 1.300793784833643e-05, "loss": 0.1771, "step": 25208 }, { "epoch": 2.35, "grad_norm": 4.539430141448975, "learning_rate": 1.208241851038676e-05, "loss": 0.1551, "step": 25756 }, { "epoch": 2.4, "grad_norm": 1.772599458694458, "learning_rate": 1.1156899172437089e-05, "loss": 0.1735, "step": 26304 }, { "epoch": 2.45, "grad_norm": 3.809593439102173, "learning_rate": 1.023137983448742e-05, "loss": 0.1683, "step": 26852 }, { "epoch": 2.5, "grad_norm": 10.1038179397583, "learning_rate": 9.307549400439116e-06, "loss": 0.1617, "step": 27400 }, { "epoch": 2.55, "grad_norm": 18.696857452392578, "learning_rate": 8.382030062489445e-06, "loss": 0.1589, "step": 27948 }, { "epoch": 2.6, "grad_norm": 15.82597827911377, "learning_rate": 7.4565107245397735e-06, "loss": 0.1636, "step": 28496 }, { "epoch": 2.65, "grad_norm": 1.0566959381103516, "learning_rate": 6.530991386590103e-06, "loss": 0.1553, "step": 29044 }, { "epoch": 2.7, "grad_norm": 13.043928146362305, "learning_rate": 5.6054720486404325e-06, "loss": 0.1532, "step": 29592 }, { "epoch": 2.75, "grad_norm": 19.499807357788086, "learning_rate": 4.679952710690762e-06, "loss": 0.1426, "step": 30140 }, { "epoch": 2.8, "grad_norm": 24.73866081237793, "learning_rate": 3.756122276642459e-06, "loss": 0.1514, "step": 30688 }, { "epoch": 2.85, "grad_norm": 6.256073474884033, "learning_rate": 2.8306029386927886e-06, "loss": 0.1554, "step": 31236 }, { "epoch": 2.9, "grad_norm": 0.34589943289756775, "learning_rate": 1.9050836007431179e-06, "loss": 0.1459, "step": 31784 }, { "epoch": 2.95, "grad_norm": 2.303668737411499, "learning_rate": 9.795642627934472e-07, "loss": 0.1584, "step": 32332 }, { "epoch": 3.0, "grad_norm": 4.459900379180908, "learning_rate": 5.404492484377639e-08, "loss": 0.152, "step": 32880 }, { "epoch": 3.0, "eval_accuracy": 0.945028386420119, "eval_auc": 0.9812675754686511, "eval_f1": 0.9633213150168105, "eval_loss": 0.1894965022802353, "eval_precision": 0.9603846270512937, "eval_recall": 0.9662760178233535, "eval_runtime": 90.4032, "eval_samples_per_second": 485.149, "eval_steps_per_second": 15.165, "step": 32895 } ], "logging_steps": 548, "max_steps": 32895, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "total_flos": 3.46191659978112e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }