{ "best_metric": 0.3449392712550607, "best_model_checkpoint": "xlmr-base-cont-is-bsz160k-steps225k-hf-finetuned-iec-token-errors-uniq-42/checkpoint-17080", "epoch": 5.0, "global_step": 17080, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 1.9414519906323187e-05, "loss": 0.2543, "step": 500 }, { "epoch": 0.29, "learning_rate": 1.8829039812646373e-05, "loss": 0.1982, "step": 1000 }, { "epoch": 0.44, "learning_rate": 1.8243559718969555e-05, "loss": 0.1951, "step": 1500 }, { "epoch": 0.59, "learning_rate": 1.765807962529274e-05, "loss": 0.1787, "step": 2000 }, { "epoch": 0.73, "learning_rate": 1.7072599531615927e-05, "loss": 0.1804, "step": 2500 }, { "epoch": 0.88, "learning_rate": 1.6487119437939112e-05, "loss": 0.1833, "step": 3000 }, { "epoch": 1.0, "eval_accuracy": 0.9657446996355014, "eval_f1": 0.2873458792991564, "eval_loss": 0.15018649399280548, "eval_precision": 0.6944792973651192, "eval_recall": 0.18114874815905743, "eval_runtime": 5.7645, "eval_samples_per_second": 857.316, "eval_steps_per_second": 53.604, "step": 3416 }, { "epoch": 1.02, "learning_rate": 1.5901639344262295e-05, "loss": 0.1703, "step": 3500 }, { "epoch": 1.17, "learning_rate": 1.531615925058548e-05, "loss": 0.1605, "step": 4000 }, { "epoch": 1.32, "learning_rate": 1.4730679156908668e-05, "loss": 0.1577, "step": 4500 }, { "epoch": 1.46, "learning_rate": 1.4145199063231852e-05, "loss": 0.1578, "step": 5000 }, { "epoch": 1.61, "learning_rate": 1.3559718969555036e-05, "loss": 0.1526, "step": 5500 }, { "epoch": 1.76, "learning_rate": 1.2974238875878221e-05, "loss": 0.1513, "step": 6000 }, { "epoch": 1.9, "learning_rate": 1.2388758782201407e-05, "loss": 0.1557, "step": 6500 }, { "epoch": 2.0, "eval_accuracy": 0.9658519051042416, "eval_f1": 0.2957278076234331, "eval_loss": 0.15432876348495483, "eval_precision": 0.677211482132396, "eval_recall": 0.1891670757650139, "eval_runtime": 5.6853, "eval_samples_per_second": 869.262, "eval_steps_per_second": 54.351, "step": 6832 }, { "epoch": 2.05, "learning_rate": 1.1803278688524591e-05, "loss": 0.1516, "step": 7000 }, { "epoch": 2.2, "learning_rate": 1.1217798594847775e-05, "loss": 0.1301, "step": 7500 }, { "epoch": 2.34, "learning_rate": 1.0632318501170963e-05, "loss": 0.1321, "step": 8000 }, { "epoch": 2.49, "learning_rate": 1.0046838407494147e-05, "loss": 0.1262, "step": 8500 }, { "epoch": 2.63, "learning_rate": 9.461358313817332e-06, "loss": 0.1365, "step": 9000 }, { "epoch": 2.78, "learning_rate": 8.875878220140516e-06, "loss": 0.1339, "step": 9500 }, { "epoch": 2.93, "learning_rate": 8.290398126463702e-06, "loss": 0.1267, "step": 10000 }, { "epoch": 3.0, "eval_accuracy": 0.9658077616759367, "eval_f1": 0.32741425443930916, "eval_loss": 0.15831299126148224, "eval_precision": 0.6376125059213643, "eval_recall": 0.22025855015545737, "eval_runtime": 5.7385, "eval_samples_per_second": 861.2, "eval_steps_per_second": 53.847, "step": 10248 }, { "epoch": 3.07, "learning_rate": 7.704918032786886e-06, "loss": 0.1247, "step": 10500 }, { "epoch": 3.22, "learning_rate": 7.119437939110071e-06, "loss": 0.1106, "step": 11000 }, { "epoch": 3.37, "learning_rate": 6.5339578454332556e-06, "loss": 0.1079, "step": 11500 }, { "epoch": 3.51, "learning_rate": 5.94847775175644e-06, "loss": 0.1124, "step": 12000 }, { "epoch": 3.66, "learning_rate": 5.362997658079626e-06, "loss": 0.1091, "step": 12500 }, { "epoch": 3.81, "learning_rate": 4.77751756440281e-06, "loss": 0.1141, "step": 13000 }, { "epoch": 3.95, "learning_rate": 4.192037470725996e-06, "loss": 0.1085, "step": 13500 }, { "epoch": 4.0, "eval_accuracy": 0.9649879551502768, "eval_f1": 0.3396051103368177, "eval_loss": 0.16941741108894348, "eval_precision": 0.5850340136054422, "eval_recall": 0.2392407134675176, "eval_runtime": 5.6434, "eval_samples_per_second": 875.708, "eval_steps_per_second": 54.754, "step": 13664 }, { "epoch": 4.1, "learning_rate": 3.6065573770491806e-06, "loss": 0.1036, "step": 14000 }, { "epoch": 4.24, "learning_rate": 3.0210772833723655e-06, "loss": 0.0961, "step": 14500 }, { "epoch": 4.39, "learning_rate": 2.4355971896955503e-06, "loss": 0.0959, "step": 15000 }, { "epoch": 4.54, "learning_rate": 1.8501170960187356e-06, "loss": 0.0958, "step": 15500 }, { "epoch": 4.68, "learning_rate": 1.2646370023419204e-06, "loss": 0.0949, "step": 16000 }, { "epoch": 4.83, "learning_rate": 6.791569086651055e-07, "loss": 0.0946, "step": 16500 }, { "epoch": 4.98, "learning_rate": 9.367681498829041e-08, "loss": 0.0919, "step": 17000 }, { "epoch": 5.0, "eval_accuracy": 0.9652086722918006, "eval_f1": 0.3449392712550607, "eval_loss": 0.1819370836019516, "eval_precision": 0.5883977900552486, "eval_recall": 0.24398625429553264, "eval_runtime": 5.7966, "eval_samples_per_second": 852.569, "eval_steps_per_second": 53.307, "step": 17080 } ], "max_steps": 17080, "num_train_epochs": 5, "total_flos": 9598023469073754.0, "trial_name": null, "trial_params": null }