{ "best_metric": null, "best_model_checkpoint": null, "epoch": 17.856, "global_step": 558, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 8.928571428571428e-07, "loss": 2.9786, "step": 1 }, { "epoch": 0.26, "learning_rate": 7.142857142857143e-06, "loss": 3.0172, "step": 8 }, { "epoch": 0.51, "learning_rate": 1.4285714285714285e-05, "loss": 2.9016, "step": 16 }, { "epoch": 0.77, "learning_rate": 2.1428571428571428e-05, "loss": 2.7266, "step": 24 }, { "epoch": 0.99, "eval_Macro F1": 0.18274435256625565, "eval_Macro Precision": 0.22464899203701116, "eval_Macro Recall": 0.21014375581515982, "eval_Micro F1": 0.208, "eval_Micro Precision": 0.208, "eval_Micro Recall": 0.208, "eval_Weighted F1": 0.18112534226726865, "eval_Weighted Precision": 0.21432408746391318, "eval_Weighted Recall": 0.208, "eval_accuracy": 0.208, "eval_loss": 2.473806381225586, "eval_runtime": 340.1389, "eval_samples_per_second": 2.94, "eval_steps_per_second": 0.094, "step": 31 }, { "epoch": 1.02, "learning_rate": 2.857142857142857e-05, "loss": 2.5835, "step": 32 }, { "epoch": 1.28, "learning_rate": 3.571428571428572e-05, "loss": 2.4181, "step": 40 }, { "epoch": 1.54, "learning_rate": 4.2857142857142856e-05, "loss": 2.2732, "step": 48 }, { "epoch": 1.79, "learning_rate": 5e-05, "loss": 2.171, "step": 56 }, { "epoch": 1.98, "eval_Macro F1": 0.39250727765144267, "eval_Macro Precision": 0.4446397468859132, "eval_Macro Recall": 0.42427064875651993, "eval_Micro F1": 0.42299999999999993, "eval_Micro Precision": 0.423, "eval_Micro Recall": 0.423, "eval_Weighted F1": 0.39356324128346765, "eval_Weighted Precision": 0.45034474867370766, "eval_Weighted Recall": 0.423, "eval_accuracy": 0.423, "eval_loss": 1.8510247468948364, "eval_runtime": 23.3293, "eval_samples_per_second": 42.864, "eval_steps_per_second": 1.372, "step": 62 }, { "epoch": 2.05, "learning_rate": 4.9203187250996016e-05, "loss": 2.0057, "step": 64 }, { "epoch": 2.3, "learning_rate": 4.840637450199204e-05, "loss": 1.8883, "step": 72 }, { "epoch": 2.56, "learning_rate": 4.760956175298805e-05, "loss": 1.7538, "step": 80 }, { "epoch": 2.82, "learning_rate": 4.6812749003984064e-05, "loss": 1.6525, "step": 88 }, { "epoch": 2.98, "eval_Macro F1": 0.5854998981328875, "eval_Macro Precision": 0.6283041062692261, "eval_Macro Recall": 0.6124154907646651, "eval_Micro F1": 0.61, "eval_Micro Precision": 0.61, "eval_Micro Recall": 0.61, "eval_Weighted F1": 0.5883778569595041, "eval_Weighted Precision": 0.6376755054054996, "eval_Weighted Recall": 0.61, "eval_accuracy": 0.61, "eval_loss": 1.2633185386657715, "eval_runtime": 23.4866, "eval_samples_per_second": 42.577, "eval_steps_per_second": 1.362, "step": 93 }, { "epoch": 3.07, "learning_rate": 4.601593625498008e-05, "loss": 1.4741, "step": 96 }, { "epoch": 3.33, "learning_rate": 4.52191235059761e-05, "loss": 1.4995, "step": 104 }, { "epoch": 3.58, "learning_rate": 4.442231075697211e-05, "loss": 1.3476, "step": 112 }, { "epoch": 3.84, "learning_rate": 4.362549800796813e-05, "loss": 1.346, "step": 120 }, { "epoch": 4.0, "eval_Macro F1": 0.6991540885965197, "eval_Macro Precision": 0.7033970309397422, "eval_Macro Recall": 0.705813786350264, "eval_Micro F1": 0.706, "eval_Micro Precision": 0.706, "eval_Micro Recall": 0.706, "eval_Weighted F1": 0.7023066954415087, "eval_Weighted Precision": 0.7095418750091215, "eval_Weighted Recall": 0.706, "eval_accuracy": 0.706, "eval_loss": 1.025865077972412, "eval_runtime": 25.2258, "eval_samples_per_second": 39.642, "eval_steps_per_second": 1.269, "step": 125 }, { "epoch": 4.1, "learning_rate": 4.2828685258964146e-05, "loss": 1.361, "step": 128 }, { "epoch": 4.35, "learning_rate": 4.203187250996016e-05, "loss": 1.2724, "step": 136 }, { "epoch": 4.61, "learning_rate": 4.123505976095618e-05, "loss": 1.2634, "step": 144 }, { "epoch": 4.86, "learning_rate": 4.043824701195219e-05, "loss": 1.253, "step": 152 }, { "epoch": 4.99, "eval_Macro F1": 0.7239315752163233, "eval_Macro Precision": 0.7261038012066161, "eval_Macro Recall": 0.7290773391581917, "eval_Micro F1": 0.729, "eval_Micro Precision": 0.729, "eval_Micro Recall": 0.729, "eval_Weighted F1": 0.7276898829930567, "eval_Weighted Precision": 0.7340103739336716, "eval_Weighted Recall": 0.729, "eval_accuracy": 0.729, "eval_loss": 0.9180329442024231, "eval_runtime": 24.244, "eval_samples_per_second": 41.247, "eval_steps_per_second": 1.32, "step": 156 }, { "epoch": 5.12, "learning_rate": 3.964143426294821e-05, "loss": 1.1127, "step": 160 }, { "epoch": 5.38, "learning_rate": 3.884462151394422e-05, "loss": 1.2072, "step": 168 }, { "epoch": 5.63, "learning_rate": 3.804780876494024e-05, "loss": 1.1051, "step": 176 }, { "epoch": 5.89, "learning_rate": 3.7250996015936256e-05, "loss": 1.0975, "step": 184 }, { "epoch": 5.98, "eval_Macro F1": 0.7437283226344332, "eval_Macro Precision": 0.7525965488125351, "eval_Macro Recall": 0.7471731443855207, "eval_Micro F1": 0.747, "eval_Micro Precision": 0.747, "eval_Micro Recall": 0.747, "eval_Weighted F1": 0.7479974803294792, "eval_Weighted Precision": 0.7608663358741853, "eval_Weighted Recall": 0.747, "eval_accuracy": 0.747, "eval_loss": 0.8858795166015625, "eval_runtime": 24.5565, "eval_samples_per_second": 40.722, "eval_steps_per_second": 1.303, "step": 187 }, { "epoch": 6.14, "learning_rate": 3.6454183266932277e-05, "loss": 1.0922, "step": 192 }, { "epoch": 6.4, "learning_rate": 3.565737051792829e-05, "loss": 1.0797, "step": 200 }, { "epoch": 6.66, "learning_rate": 3.4860557768924304e-05, "loss": 1.0066, "step": 208 }, { "epoch": 6.91, "learning_rate": 3.406374501992032e-05, "loss": 1.1122, "step": 216 }, { "epoch": 6.98, "eval_Macro F1": 0.7577762006111012, "eval_Macro Precision": 0.7726630864911376, "eval_Macro Recall": 0.7594437988791525, "eval_Micro F1": 0.76, "eval_Micro Precision": 0.76, "eval_Micro Recall": 0.76, "eval_Weighted F1": 0.7606134776349917, "eval_Weighted Precision": 0.7772442524935343, "eval_Weighted Recall": 0.76, "eval_accuracy": 0.76, "eval_loss": 0.8269779086112976, "eval_runtime": 24.3399, "eval_samples_per_second": 41.085, "eval_steps_per_second": 1.315, "step": 218 }, { "epoch": 7.17, "learning_rate": 3.326693227091633e-05, "loss": 0.988, "step": 224 }, { "epoch": 7.42, "learning_rate": 3.247011952191235e-05, "loss": 1.0301, "step": 232 }, { "epoch": 7.68, "learning_rate": 3.1673306772908366e-05, "loss": 0.9724, "step": 240 }, { "epoch": 7.94, "learning_rate": 3.0876494023904386e-05, "loss": 1.0365, "step": 248 }, { "epoch": 8.0, "eval_Macro F1": 0.7730173451661722, "eval_Macro Precision": 0.7919871344293006, "eval_Macro Recall": 0.7734992733449892, "eval_Micro F1": 0.775, "eval_Micro Precision": 0.775, "eval_Micro Recall": 0.775, "eval_Weighted F1": 0.7759067917731501, "eval_Weighted Precision": 0.7957122736768526, "eval_Weighted Recall": 0.775, "eval_accuracy": 0.775, "eval_loss": 0.7806075215339661, "eval_runtime": 23.5653, "eval_samples_per_second": 42.435, "eval_steps_per_second": 1.358, "step": 250 }, { "epoch": 8.19, "learning_rate": 3.00796812749004e-05, "loss": 0.9482, "step": 256 }, { "epoch": 8.45, "learning_rate": 2.9282868525896417e-05, "loss": 0.9364, "step": 264 }, { "epoch": 8.7, "learning_rate": 2.8486055776892434e-05, "loss": 0.9417, "step": 272 }, { "epoch": 8.96, "learning_rate": 2.7689243027888445e-05, "loss": 1.004, "step": 280 }, { "epoch": 8.99, "eval_Macro F1": 0.7956663948657638, "eval_Macro Precision": 0.8150981558104353, "eval_Macro Recall": 0.7955573765342054, "eval_Micro F1": 0.796, "eval_Micro Precision": 0.796, "eval_Micro Recall": 0.796, "eval_Weighted F1": 0.7977477337842563, "eval_Weighted Precision": 0.8193174046095921, "eval_Weighted Recall": 0.796, "eval_accuracy": 0.796, "eval_loss": 0.7471871972084045, "eval_runtime": 24.2779, "eval_samples_per_second": 41.19, "eval_steps_per_second": 1.318, "step": 281 }, { "epoch": 9.22, "learning_rate": 2.6892430278884462e-05, "loss": 0.8964, "step": 288 }, { "epoch": 9.47, "learning_rate": 2.609561752988048e-05, "loss": 0.9098, "step": 296 }, { "epoch": 9.73, "learning_rate": 2.5298804780876496e-05, "loss": 0.9576, "step": 304 }, { "epoch": 9.98, "learning_rate": 2.4501992031872513e-05, "loss": 0.9278, "step": 312 }, { "epoch": 9.98, "eval_Macro F1": 0.7956928280629123, "eval_Macro Precision": 0.8114507473411248, "eval_Macro Recall": 0.7953090431389529, "eval_Micro F1": 0.795, "eval_Micro Precision": 0.795, "eval_Micro Recall": 0.795, "eval_Weighted F1": 0.7974177696936208, "eval_Weighted Precision": 0.8157219938612603, "eval_Weighted Recall": 0.795, "eval_accuracy": 0.795, "eval_loss": 0.7296148538589478, "eval_runtime": 24.012, "eval_samples_per_second": 41.646, "eval_steps_per_second": 1.333, "step": 312 }, { "epoch": 10.24, "learning_rate": 2.3705179282868527e-05, "loss": 0.8623, "step": 320 }, { "epoch": 10.5, "learning_rate": 2.290836653386454e-05, "loss": 0.9, "step": 328 }, { "epoch": 10.75, "learning_rate": 2.2111553784860558e-05, "loss": 0.8767, "step": 336 }, { "epoch": 10.98, "eval_Macro F1": 0.8077597575155505, "eval_Macro Precision": 0.8135983545887402, "eval_Macro Recall": 0.8090640618814349, "eval_Micro F1": 0.809, "eval_Micro Precision": 0.809, "eval_Micro Recall": 0.809, "eval_Weighted F1": 0.8101031910520137, "eval_Weighted Precision": 0.8181958157506283, "eval_Weighted Recall": 0.809, "eval_accuracy": 0.809, "eval_loss": 0.7256603240966797, "eval_runtime": 23.8159, "eval_samples_per_second": 41.989, "eval_steps_per_second": 1.344, "step": 343 }, { "epoch": 11.01, "learning_rate": 2.1314741035856575e-05, "loss": 0.9239, "step": 344 }, { "epoch": 11.26, "learning_rate": 2.0517928286852592e-05, "loss": 0.8971, "step": 352 }, { "epoch": 11.52, "learning_rate": 1.9721115537848606e-05, "loss": 0.9387, "step": 360 }, { "epoch": 11.78, "learning_rate": 1.8924302788844623e-05, "loss": 0.8656, "step": 368 }, { "epoch": 12.0, "eval_Macro F1": 0.8106317572025369, "eval_Macro Precision": 0.8164411111931356, "eval_Macro Recall": 0.812159053433045, "eval_Micro F1": 0.8140000000000001, "eval_Micro Precision": 0.814, "eval_Micro Recall": 0.814, "eval_Weighted F1": 0.813675273019787, "eval_Weighted Precision": 0.8206734023674667, "eval_Weighted Recall": 0.814, "eval_accuracy": 0.814, "eval_loss": 0.687544584274292, "eval_runtime": 24.187, "eval_samples_per_second": 41.344, "eval_steps_per_second": 1.323, "step": 375 }, { "epoch": 12.03, "learning_rate": 1.812749003984064e-05, "loss": 0.8568, "step": 376 }, { "epoch": 12.29, "learning_rate": 1.7330677290836657e-05, "loss": 0.9105, "step": 384 }, { "epoch": 12.54, "learning_rate": 1.653386454183267e-05, "loss": 0.7687, "step": 392 }, { "epoch": 12.8, "learning_rate": 1.5737051792828685e-05, "loss": 0.7905, "step": 400 }, { "epoch": 12.99, "eval_Macro F1": 0.807144557531445, "eval_Macro Precision": 0.8145226206650988, "eval_Macro Recall": 0.8067641953560594, "eval_Micro F1": 0.808, "eval_Micro Precision": 0.808, "eval_Micro Recall": 0.808, "eval_Weighted F1": 0.8093388838073848, "eval_Weighted Precision": 0.8181875581570741, "eval_Weighted Recall": 0.808, "eval_accuracy": 0.808, "eval_loss": 0.7060463428497314, "eval_runtime": 23.4383, "eval_samples_per_second": 42.665, "eval_steps_per_second": 1.365, "step": 406 }, { "epoch": 13.06, "learning_rate": 1.4940239043824702e-05, "loss": 0.8453, "step": 408 }, { "epoch": 13.31, "learning_rate": 1.4143426294820719e-05, "loss": 0.7926, "step": 416 }, { "epoch": 13.57, "learning_rate": 1.3346613545816733e-05, "loss": 0.8721, "step": 424 }, { "epoch": 13.82, "learning_rate": 1.254980079681275e-05, "loss": 0.8804, "step": 432 }, { "epoch": 13.98, "eval_Macro F1": 0.8182926573190057, "eval_Macro Precision": 0.8215050680984892, "eval_Macro Recall": 0.8183483781309105, "eval_Micro F1": 0.82, "eval_Micro Precision": 0.82, "eval_Micro Recall": 0.82, "eval_Weighted F1": 0.8213528638428045, "eval_Weighted Precision": 0.8260224129157536, "eval_Weighted Recall": 0.82, "eval_accuracy": 0.82, "eval_loss": 0.6848881244659424, "eval_runtime": 24.2686, "eval_samples_per_second": 41.206, "eval_steps_per_second": 1.319, "step": 437 }, { "epoch": 14.08, "learning_rate": 1.1752988047808767e-05, "loss": 0.8041, "step": 440 }, { "epoch": 14.34, "learning_rate": 1.095617529880478e-05, "loss": 0.8291, "step": 448 }, { "epoch": 14.59, "learning_rate": 1.0159362549800798e-05, "loss": 0.8109, "step": 456 }, { "epoch": 14.85, "learning_rate": 9.362549800796813e-06, "loss": 0.8265, "step": 464 }, { "epoch": 14.98, "eval_Macro F1": 0.814299562240979, "eval_Macro Precision": 0.8205669101031257, "eval_Macro Recall": 0.8141933591904861, "eval_Micro F1": 0.816, "eval_Micro Precision": 0.816, "eval_Micro Recall": 0.816, "eval_Weighted F1": 0.8171172793618807, "eval_Weighted Precision": 0.8242012281620977, "eval_Weighted Recall": 0.816, "eval_accuracy": 0.816, "eval_loss": 0.6820688247680664, "eval_runtime": 24.1434, "eval_samples_per_second": 41.419, "eval_steps_per_second": 1.325, "step": 468 }, { "epoch": 15.1, "learning_rate": 8.565737051792829e-06, "loss": 0.7845, "step": 472 }, { "epoch": 15.36, "learning_rate": 7.768924302788846e-06, "loss": 0.8545, "step": 480 }, { "epoch": 15.62, "learning_rate": 6.97211155378486e-06, "loss": 0.7685, "step": 488 }, { "epoch": 15.87, "learning_rate": 6.175298804780877e-06, "loss": 0.7929, "step": 496 }, { "epoch": 16.0, "eval_Macro F1": 0.8151613063095204, "eval_Macro Precision": 0.818634892770315, "eval_Macro Recall": 0.8167488060127654, "eval_Micro F1": 0.818, "eval_Micro Precision": 0.818, "eval_Micro Recall": 0.818, "eval_Weighted F1": 0.8184268497331145, "eval_Weighted Precision": 0.8239745058078853, "eval_Weighted Recall": 0.818, "eval_accuracy": 0.818, "eval_loss": 0.6877326369285583, "eval_runtime": 23.4065, "eval_samples_per_second": 42.723, "eval_steps_per_second": 1.367, "step": 500 }, { "epoch": 16.13, "learning_rate": 5.378486055776893e-06, "loss": 0.8338, "step": 504 }, { "epoch": 16.38, "learning_rate": 4.581673306772908e-06, "loss": 0.8154, "step": 512 }, { "epoch": 16.64, "learning_rate": 3.7848605577689246e-06, "loss": 0.8054, "step": 520 }, { "epoch": 16.9, "learning_rate": 2.9880478087649404e-06, "loss": 0.7993, "step": 528 }, { "epoch": 16.99, "eval_Macro F1": 0.8233893661888394, "eval_Macro Precision": 0.8281793690152945, "eval_Macro Recall": 0.822655931109436, "eval_Micro F1": 0.825, "eval_Micro Precision": 0.825, "eval_Micro Recall": 0.825, "eval_Weighted F1": 0.8258766455781327, "eval_Weighted Precision": 0.8305915821251276, "eval_Weighted Recall": 0.825, "eval_accuracy": 0.825, "eval_loss": 0.6717957854270935, "eval_runtime": 24.415, "eval_samples_per_second": 40.958, "eval_steps_per_second": 1.311, "step": 531 }, { "epoch": 17.15, "learning_rate": 2.1912350597609563e-06, "loss": 0.7572, "step": 536 }, { "epoch": 17.41, "learning_rate": 1.3944223107569721e-06, "loss": 0.8022, "step": 544 }, { "epoch": 17.66, "learning_rate": 5.976095617529881e-07, "loss": 0.7954, "step": 552 }, { "epoch": 17.86, "eval_Macro F1": 0.8242217019056312, "eval_Macro Precision": 0.8293094252297472, "eval_Macro Recall": 0.8236754312981294, "eval_Micro F1": 0.826, "eval_Micro Precision": 0.826, "eval_Micro Recall": 0.826, "eval_Weighted F1": 0.827162008102242, "eval_Weighted Precision": 0.8326500761444383, "eval_Weighted Recall": 0.826, "eval_accuracy": 0.826, "eval_loss": 0.6715443134307861, "eval_runtime": 24.1692, "eval_samples_per_second": 41.375, "eval_steps_per_second": 1.324, "step": 558 }, { "epoch": 17.86, "step": 558, "total_flos": 5.536126404560683e+18, "train_loss": 1.1784635154149865, "train_runtime": 5483.2083, "train_samples_per_second": 13.131, "train_steps_per_second": 0.102 } ], "max_steps": 558, "num_train_epochs": 18, "total_flos": 5.536126404560683e+18, "trial_name": null, "trial_params": null }