{ "best_metric": 0.9745301568430197, "best_model_checkpoint": "models/pos_final_xlm_fr/checkpoint-448", "epoch": 39.94915254237288, "global_step": 560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.95, "eval_accuracy": 0.0025780833283944762, "eval_f1": 0.0, "eval_loss": 3.5537023544311523, "eval_precision": 0.0, "eval_recall": 0.0, "eval_runtime": 2.1805, "eval_samples_per_second": 760.825, "eval_steps_per_second": 3.21, "step": 14 }, { "epoch": 1.95, "eval_accuracy": 0.004948734664849168, "eval_f1": 0.004161179878851726, "eval_loss": 3.453566551208496, "eval_precision": 0.01528046421663443, "eval_recall": 0.0024085365853658536, "eval_runtime": 2.2293, "eval_samples_per_second": 744.195, "eval_steps_per_second": 3.14, "step": 28 }, { "epoch": 2.95, "eval_accuracy": 0.2843003615243288, "eval_f1": 0.2065520889335946, "eval_loss": 3.1247434616088867, "eval_precision": 0.23953179679015324, "eval_recall": 0.18155487804878048, "eval_runtime": 2.0115, "eval_samples_per_second": 824.764, "eval_steps_per_second": 3.48, "step": 42 }, { "epoch": 3.95, "eval_accuracy": 0.45430569548983585, "eval_f1": 0.3899751394208157, "eval_loss": 2.598811149597168, "eval_precision": 0.4342361215023193, "eval_recall": 0.3539024390243902, "eval_runtime": 2.3441, "eval_samples_per_second": 707.74, "eval_steps_per_second": 2.986, "step": 56 }, { "epoch": 4.95, "eval_accuracy": 0.5148462039945475, "eval_f1": 0.454724342663274, "eval_loss": 2.0168328285217285, "eval_precision": 0.5125430210325048, "eval_recall": 0.4086280487804878, "eval_runtime": 2.0095, "eval_samples_per_second": 825.594, "eval_steps_per_second": 3.484, "step": 70 }, { "epoch": 5.95, "eval_accuracy": 0.6085758312096249, "eval_f1": 0.5542561693660192, "eval_loss": 1.4837758541107178, "eval_precision": 0.5959454245729718, "eval_recall": 0.5180182926829269, "eval_runtime": 2.0097, "eval_samples_per_second": 825.481, "eval_steps_per_second": 3.483, "step": 84 }, { "epoch": 6.95, "eval_accuracy": 0.7980501392757661, "eval_f1": 0.7759372816890475, "eval_loss": 0.9300474524497986, "eval_precision": 0.7904792029100111, "eval_recall": 0.7619207317073171, "eval_runtime": 2.0866, "eval_samples_per_second": 795.072, "eval_steps_per_second": 3.355, "step": 98 }, { "epoch": 7.95, "eval_accuracy": 0.9146565518876312, "eval_f1": 0.909426874150543, "eval_loss": 0.4873865842819214, "eval_precision": 0.911054676743261, "eval_recall": 0.9078048780487805, "eval_runtime": 2.0506, "eval_samples_per_second": 809.029, "eval_steps_per_second": 3.414, "step": 112 }, { "epoch": 8.95, "eval_accuracy": 0.9395780240621111, "eval_f1": 0.9370149572323785, "eval_loss": 0.2940390408039093, "eval_precision": 0.9372007197974808, "eval_recall": 0.936829268292683, "eval_runtime": 2.4026, "eval_samples_per_second": 690.513, "eval_steps_per_second": 2.914, "step": 126 }, { "epoch": 9.95, "eval_accuracy": 0.9490309962662241, "eval_f1": 0.9476378372613843, "eval_loss": 0.20862668752670288, "eval_precision": 0.9470751240902585, "eval_recall": 0.9482012195121952, "eval_runtime": 2.1173, "eval_samples_per_second": 783.547, "eval_steps_per_second": 3.306, "step": 140 }, { "epoch": 10.95, "eval_accuracy": 0.96272150773425, "eval_f1": 0.9601852133914156, "eval_loss": 0.16879160702228546, "eval_precision": 0.9593961161502405, "eval_recall": 0.9609756097560975, "eval_runtime": 2.6297, "eval_samples_per_second": 630.882, "eval_steps_per_second": 2.662, "step": 154 }, { "epoch": 11.95, "eval_accuracy": 0.9659218870384638, "eval_f1": 0.9632472774350773, "eval_loss": 0.1449553668498993, "eval_precision": 0.9624410287627454, "eval_recall": 0.9640548780487805, "eval_runtime": 2.1659, "eval_samples_per_second": 765.976, "eval_steps_per_second": 3.232, "step": 168 }, { "epoch": 12.95, "eval_accuracy": 0.9685592366502697, "eval_f1": 0.9659930249912432, "eval_loss": 0.13338540494441986, "eval_precision": 0.9650670967349299, "eval_recall": 0.9669207317073171, "eval_runtime": 2.0965, "eval_samples_per_second": 791.329, "eval_steps_per_second": 3.339, "step": 182 }, { "epoch": 13.95, "eval_accuracy": 0.9702483257274936, "eval_f1": 0.9679444207447133, "eval_loss": 0.12125352025032043, "eval_precision": 0.9674138141064685, "eval_recall": 0.9684756097560976, "eval_runtime": 2.0848, "eval_samples_per_second": 795.757, "eval_steps_per_second": 3.358, "step": 196 }, { "epoch": 14.95, "eval_accuracy": 0.9718188822378948, "eval_f1": 0.9689970901445788, "eval_loss": 0.11550796031951904, "eval_precision": 0.9684216937178355, "eval_recall": 0.9695731707317073, "eval_runtime": 2.1294, "eval_samples_per_second": 779.097, "eval_steps_per_second": 3.287, "step": 210 }, { "epoch": 15.95, "eval_accuracy": 0.9733598056065904, "eval_f1": 0.9709382953108094, "eval_loss": 0.10925151407718658, "eval_precision": 0.9706572412322131, "eval_recall": 0.9712195121951219, "eval_runtime": 2.0792, "eval_samples_per_second": 797.884, "eval_steps_per_second": 3.367, "step": 224 }, { "epoch": 16.95, "eval_accuracy": 0.9739228352989984, "eval_f1": 0.9713353956812605, "eval_loss": 0.10589804500341415, "eval_precision": 0.9710246488528685, "eval_recall": 0.9716463414634147, "eval_runtime": 3.3331, "eval_samples_per_second": 497.732, "eval_steps_per_second": 2.1, "step": 238 }, { "epoch": 17.95, "eval_accuracy": 0.973952468440704, "eval_f1": 0.9713650009905667, "eval_loss": 0.10459830611944199, "eval_precision": 0.9710838233949846, "eval_recall": 0.9716463414634147, "eval_runtime": 2.0698, "eval_samples_per_second": 801.518, "eval_steps_per_second": 3.382, "step": 252 }, { "epoch": 18.95, "eval_accuracy": 0.9745154981331121, "eval_f1": 0.9721574543958305, "eval_loss": 0.10144730657339096, "eval_precision": 0.9718760474115604, "eval_recall": 0.9724390243902439, "eval_runtime": 2.3781, "eval_samples_per_second": 697.618, "eval_steps_per_second": 2.944, "step": 266 }, { "epoch": 19.95, "eval_accuracy": 0.9741599004326439, "eval_f1": 0.971824761904762, "eval_loss": 0.1002810001373291, "eval_precision": 0.9714546839299315, "eval_recall": 0.9721951219512195, "eval_runtime": 2.3329, "eval_samples_per_second": 711.141, "eval_steps_per_second": 3.001, "step": 280 }, { "epoch": 20.95, "eval_accuracy": 0.9750785278255201, "eval_f1": 0.9727077396644468, "eval_loss": 0.09868494421243668, "eval_precision": 0.9723669378179934, "eval_recall": 0.9730487804878049, "eval_runtime": 2.094, "eval_samples_per_second": 792.279, "eval_steps_per_second": 3.343, "step": 294 }, { "epoch": 21.95, "eval_accuracy": 0.9750192615421087, "eval_f1": 0.9725075436618001, "eval_loss": 0.09709486365318298, "eval_precision": 0.9722408434395758, "eval_recall": 0.9727743902439024, "eval_runtime": 2.3309, "eval_samples_per_second": 711.747, "eval_steps_per_second": 3.003, "step": 308 }, { "epoch": 22.95, "eval_accuracy": 0.9753748592425769, "eval_f1": 0.9729877965172083, "eval_loss": 0.09679476916790009, "eval_precision": 0.9724396260316107, "eval_recall": 0.9735365853658536, "eval_runtime": 2.3344, "eval_samples_per_second": 710.679, "eval_steps_per_second": 2.999, "step": 322 }, { "epoch": 23.95, "eval_accuracy": 0.975641557517928, "eval_f1": 0.9732274825910068, "eval_loss": 0.09535854309797287, "eval_precision": 0.9728272458646846, "eval_recall": 0.9736280487804878, "eval_runtime": 3.076, "eval_samples_per_second": 539.342, "eval_steps_per_second": 2.276, "step": 336 }, { "epoch": 24.95, "eval_accuracy": 0.9751970603923428, "eval_f1": 0.9726641067836899, "eval_loss": 0.09671631455421448, "eval_precision": 0.9722492993785793, "eval_recall": 0.9730792682926829, "eval_runtime": 2.2902, "eval_samples_per_second": 724.376, "eval_steps_per_second": 3.056, "step": 350 }, { "epoch": 25.95, "eval_accuracy": 0.9762638534937474, "eval_f1": 0.9739440482720789, "eval_loss": 0.09651771187782288, "eval_precision": 0.9735286950164493, "eval_recall": 0.974359756097561, "eval_runtime": 2.3507, "eval_samples_per_second": 705.738, "eval_steps_per_second": 2.978, "step": 364 }, { "epoch": 26.95, "eval_accuracy": 0.9756711906596337, "eval_f1": 0.973000975134081, "eval_loss": 0.09634628146886826, "eval_precision": 0.9725268031189084, "eval_recall": 0.9734756097560976, "eval_runtime": 2.1161, "eval_samples_per_second": 783.981, "eval_steps_per_second": 3.308, "step": 378 }, { "epoch": 27.95, "eval_accuracy": 0.9759082557932792, "eval_f1": 0.9732752315943443, "eval_loss": 0.09717196971178055, "eval_precision": 0.9728009259259259, "eval_recall": 0.97375, "eval_runtime": 2.4757, "eval_samples_per_second": 670.113, "eval_steps_per_second": 2.827, "step": 392 }, { "epoch": 28.95, "eval_accuracy": 0.9765601849108042, "eval_f1": 0.9740214228032484, "eval_loss": 0.09867348521947861, "eval_precision": 0.9735615729036581, "eval_recall": 0.9744817073170732, "eval_runtime": 2.2832, "eval_samples_per_second": 726.617, "eval_steps_per_second": 3.066, "step": 406 }, { "epoch": 29.95, "eval_accuracy": 0.9764416523439815, "eval_f1": 0.9739541263430618, "eval_loss": 0.09944748878479004, "eval_precision": 0.9737315252171264, "eval_recall": 0.9741768292682926, "eval_runtime": 2.2486, "eval_samples_per_second": 737.785, "eval_steps_per_second": 3.113, "step": 420 }, { "epoch": 30.95, "eval_accuracy": 0.9763527529188645, "eval_f1": 0.9738618871260268, "eval_loss": 0.09847575426101685, "eval_precision": 0.9736689726632737, "eval_recall": 0.9740548780487804, "eval_runtime": 2.8613, "eval_samples_per_second": 579.816, "eval_steps_per_second": 2.446, "step": 434 }, { "epoch": 31.95, "eval_accuracy": 0.9768565163278611, "eval_f1": 0.9745301568430197, "eval_loss": 0.10217323899269104, "eval_precision": 0.9744261895327216, "eval_recall": 0.9746341463414634, "eval_runtime": 2.2672, "eval_samples_per_second": 731.743, "eval_steps_per_second": 3.088, "step": 448 }, { "epoch": 32.95, "eval_accuracy": 0.9767379837610384, "eval_f1": 0.9742120343839542, "eval_loss": 0.10195796191692352, "eval_precision": 0.9740338900402292, "eval_recall": 0.974390243902439, "eval_runtime": 2.0866, "eval_samples_per_second": 795.073, "eval_steps_per_second": 3.355, "step": 462 }, { "epoch": 33.95, "eval_accuracy": 0.9757897232264565, "eval_f1": 0.9734095238095238, "eval_loss": 0.10545694828033447, "eval_precision": 0.9730388423457731, "eval_recall": 0.973780487804878, "eval_runtime": 2.3249, "eval_samples_per_second": 713.591, "eval_steps_per_second": 3.011, "step": 476 }, { "epoch": 34.95, "eval_accuracy": 0.9760267883601019, "eval_f1": 0.9736721821007404, "eval_loss": 0.10677994042634964, "eval_precision": 0.9731680575013705, "eval_recall": 0.9741768292682926, "eval_runtime": 2.2489, "eval_samples_per_second": 737.696, "eval_steps_per_second": 3.113, "step": 490 }, { "epoch": 35.68, "learning_rate": 4.9800000000000004e-05, "loss": 0.6768, "step": 500 }, { "epoch": 35.95, "eval_accuracy": 0.9764416523439815, "eval_f1": 0.9738759335467154, "eval_loss": 0.10853772610425949, "eval_precision": 0.9737275220969217, "eval_recall": 0.9740243902439024, "eval_runtime": 2.1342, "eval_samples_per_second": 777.355, "eval_steps_per_second": 3.28, "step": 504 }, { "epoch": 36.95, "eval_accuracy": 0.9763527529188645, "eval_f1": 0.9738987337914641, "eval_loss": 0.1088031679391861, "eval_precision": 0.9734685796094916, "eval_recall": 0.974329268292683, "eval_runtime": 2.3204, "eval_samples_per_second": 714.977, "eval_steps_per_second": 3.017, "step": 518 }, { "epoch": 37.95, "eval_accuracy": 0.976767616902744, "eval_f1": 0.9741534335090981, "eval_loss": 0.10998602956533432, "eval_precision": 0.9738862819184594, "eval_recall": 0.974420731707317, "eval_runtime": 2.2182, "eval_samples_per_second": 747.919, "eval_steps_per_second": 3.156, "step": 532 }, { "epoch": 38.95, "eval_accuracy": 0.9767379837610384, "eval_f1": 0.9741690668861153, "eval_loss": 0.11069974303245544, "eval_precision": 0.9738870776074835, "eval_recall": 0.9744512195121952, "eval_runtime": 2.083, "eval_samples_per_second": 796.451, "eval_steps_per_second": 3.361, "step": 546 }, { "epoch": 39.95, "eval_accuracy": 0.9768861494695668, "eval_f1": 0.9743683520770474, "eval_loss": 0.11153056472539902, "eval_precision": 0.9740418012308817, "eval_recall": 0.9746951219512195, "eval_runtime": 2.295, "eval_samples_per_second": 722.863, "eval_steps_per_second": 3.05, "step": 560 }, { "epoch": 39.95, "step": 560, "total_flos": 4.190909533741901e+16, "train_loss": 0.6079375518219812, "train_runtime": 701.2451, "train_samples_per_second": 851.514, "train_steps_per_second": 0.799 } ], "max_steps": 560, "num_train_epochs": 40, "total_flos": 4.190909533741901e+16, "trial_name": null, "trial_params": null }