|
{ |
|
"best_metric": 0.3449392712550607, |
|
"best_model_checkpoint": "xlmr-base-cont-is-bsz160k-steps225k-hf-finetuned-iec-token-errors-uniq-42/checkpoint-17080", |
|
"epoch": 5.0, |
|
"global_step": 17080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9414519906323187e-05, |
|
"loss": 0.2543, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 1.8829039812646373e-05, |
|
"loss": 0.1982, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 1.8243559718969555e-05, |
|
"loss": 0.1951, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 1.765807962529274e-05, |
|
"loss": 0.1787, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.7072599531615927e-05, |
|
"loss": 0.1804, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 1.6487119437939112e-05, |
|
"loss": 0.1833, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9657446996355014, |
|
"eval_f1": 0.2873458792991564, |
|
"eval_loss": 0.15018649399280548, |
|
"eval_precision": 0.6944792973651192, |
|
"eval_recall": 0.18114874815905743, |
|
"eval_runtime": 5.7645, |
|
"eval_samples_per_second": 857.316, |
|
"eval_steps_per_second": 53.604, |
|
"step": 3416 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 1.5901639344262295e-05, |
|
"loss": 0.1703, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 1.531615925058548e-05, |
|
"loss": 0.1605, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 1.4730679156908668e-05, |
|
"loss": 0.1577, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.4145199063231852e-05, |
|
"loss": 0.1578, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.3559718969555036e-05, |
|
"loss": 0.1526, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 1.2974238875878221e-05, |
|
"loss": 0.1513, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 1.2388758782201407e-05, |
|
"loss": 0.1557, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9658519051042416, |
|
"eval_f1": 0.2957278076234331, |
|
"eval_loss": 0.15432876348495483, |
|
"eval_precision": 0.677211482132396, |
|
"eval_recall": 0.1891670757650139, |
|
"eval_runtime": 5.6853, |
|
"eval_samples_per_second": 869.262, |
|
"eval_steps_per_second": 54.351, |
|
"step": 6832 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 1.1803278688524591e-05, |
|
"loss": 0.1516, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 1.1217798594847775e-05, |
|
"loss": 0.1301, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 1.0632318501170963e-05, |
|
"loss": 0.1321, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.0046838407494147e-05, |
|
"loss": 0.1262, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 9.461358313817332e-06, |
|
"loss": 0.1365, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 8.875878220140516e-06, |
|
"loss": 0.1339, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 8.290398126463702e-06, |
|
"loss": 0.1267, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9658077616759367, |
|
"eval_f1": 0.32741425443930916, |
|
"eval_loss": 0.15831299126148224, |
|
"eval_precision": 0.6376125059213643, |
|
"eval_recall": 0.22025855015545737, |
|
"eval_runtime": 5.7385, |
|
"eval_samples_per_second": 861.2, |
|
"eval_steps_per_second": 53.847, |
|
"step": 10248 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"learning_rate": 7.704918032786886e-06, |
|
"loss": 0.1247, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 7.119437939110071e-06, |
|
"loss": 0.1106, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.5339578454332556e-06, |
|
"loss": 0.1079, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 5.94847775175644e-06, |
|
"loss": 0.1124, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 5.362997658079626e-06, |
|
"loss": 0.1091, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 4.77751756440281e-06, |
|
"loss": 0.1141, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 4.192037470725996e-06, |
|
"loss": 0.1085, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9649879551502768, |
|
"eval_f1": 0.3396051103368177, |
|
"eval_loss": 0.16941741108894348, |
|
"eval_precision": 0.5850340136054422, |
|
"eval_recall": 0.2392407134675176, |
|
"eval_runtime": 5.6434, |
|
"eval_samples_per_second": 875.708, |
|
"eval_steps_per_second": 54.754, |
|
"step": 13664 |
|
}, |
|
{ |
|
"epoch": 4.1, |
|
"learning_rate": 3.6065573770491806e-06, |
|
"loss": 0.1036, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 3.0210772833723655e-06, |
|
"loss": 0.0961, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 2.4355971896955503e-06, |
|
"loss": 0.0959, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.8501170960187356e-06, |
|
"loss": 0.0958, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 1.2646370023419204e-06, |
|
"loss": 0.0949, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 4.83, |
|
"learning_rate": 6.791569086651055e-07, |
|
"loss": 0.0946, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 4.98, |
|
"learning_rate": 9.367681498829041e-08, |
|
"loss": 0.0919, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9652086722918006, |
|
"eval_f1": 0.3449392712550607, |
|
"eval_loss": 0.1819370836019516, |
|
"eval_precision": 0.5883977900552486, |
|
"eval_recall": 0.24398625429553264, |
|
"eval_runtime": 5.7966, |
|
"eval_samples_per_second": 852.569, |
|
"eval_steps_per_second": 53.307, |
|
"step": 17080 |
|
} |
|
], |
|
"max_steps": 17080, |
|
"num_train_epochs": 5, |
|
"total_flos": 9598023469073754.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|