icebert-xlmr-ic3-iec / trainer_state.json
vesteinn's picture
Added files
bb828a1
raw history blame
No virus
6.44 kB
{
"best_metric": 0.3449392712550607,
"best_model_checkpoint": "xlmr-base-cont-is-bsz160k-steps225k-hf-finetuned-iec-token-errors-uniq-42/checkpoint-17080",
"epoch": 5.0,
"global_step": 17080,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.15,
"learning_rate": 1.9414519906323187e-05,
"loss": 0.2543,
"step": 500
},
{
"epoch": 0.29,
"learning_rate": 1.8829039812646373e-05,
"loss": 0.1982,
"step": 1000
},
{
"epoch": 0.44,
"learning_rate": 1.8243559718969555e-05,
"loss": 0.1951,
"step": 1500
},
{
"epoch": 0.59,
"learning_rate": 1.765807962529274e-05,
"loss": 0.1787,
"step": 2000
},
{
"epoch": 0.73,
"learning_rate": 1.7072599531615927e-05,
"loss": 0.1804,
"step": 2500
},
{
"epoch": 0.88,
"learning_rate": 1.6487119437939112e-05,
"loss": 0.1833,
"step": 3000
},
{
"epoch": 1.0,
"eval_accuracy": 0.9657446996355014,
"eval_f1": 0.2873458792991564,
"eval_loss": 0.15018649399280548,
"eval_precision": 0.6944792973651192,
"eval_recall": 0.18114874815905743,
"eval_runtime": 5.7645,
"eval_samples_per_second": 857.316,
"eval_steps_per_second": 53.604,
"step": 3416
},
{
"epoch": 1.02,
"learning_rate": 1.5901639344262295e-05,
"loss": 0.1703,
"step": 3500
},
{
"epoch": 1.17,
"learning_rate": 1.531615925058548e-05,
"loss": 0.1605,
"step": 4000
},
{
"epoch": 1.32,
"learning_rate": 1.4730679156908668e-05,
"loss": 0.1577,
"step": 4500
},
{
"epoch": 1.46,
"learning_rate": 1.4145199063231852e-05,
"loss": 0.1578,
"step": 5000
},
{
"epoch": 1.61,
"learning_rate": 1.3559718969555036e-05,
"loss": 0.1526,
"step": 5500
},
{
"epoch": 1.76,
"learning_rate": 1.2974238875878221e-05,
"loss": 0.1513,
"step": 6000
},
{
"epoch": 1.9,
"learning_rate": 1.2388758782201407e-05,
"loss": 0.1557,
"step": 6500
},
{
"epoch": 2.0,
"eval_accuracy": 0.9658519051042416,
"eval_f1": 0.2957278076234331,
"eval_loss": 0.15432876348495483,
"eval_precision": 0.677211482132396,
"eval_recall": 0.1891670757650139,
"eval_runtime": 5.6853,
"eval_samples_per_second": 869.262,
"eval_steps_per_second": 54.351,
"step": 6832
},
{
"epoch": 2.05,
"learning_rate": 1.1803278688524591e-05,
"loss": 0.1516,
"step": 7000
},
{
"epoch": 2.2,
"learning_rate": 1.1217798594847775e-05,
"loss": 0.1301,
"step": 7500
},
{
"epoch": 2.34,
"learning_rate": 1.0632318501170963e-05,
"loss": 0.1321,
"step": 8000
},
{
"epoch": 2.49,
"learning_rate": 1.0046838407494147e-05,
"loss": 0.1262,
"step": 8500
},
{
"epoch": 2.63,
"learning_rate": 9.461358313817332e-06,
"loss": 0.1365,
"step": 9000
},
{
"epoch": 2.78,
"learning_rate": 8.875878220140516e-06,
"loss": 0.1339,
"step": 9500
},
{
"epoch": 2.93,
"learning_rate": 8.290398126463702e-06,
"loss": 0.1267,
"step": 10000
},
{
"epoch": 3.0,
"eval_accuracy": 0.9658077616759367,
"eval_f1": 0.32741425443930916,
"eval_loss": 0.15831299126148224,
"eval_precision": 0.6376125059213643,
"eval_recall": 0.22025855015545737,
"eval_runtime": 5.7385,
"eval_samples_per_second": 861.2,
"eval_steps_per_second": 53.847,
"step": 10248
},
{
"epoch": 3.07,
"learning_rate": 7.704918032786886e-06,
"loss": 0.1247,
"step": 10500
},
{
"epoch": 3.22,
"learning_rate": 7.119437939110071e-06,
"loss": 0.1106,
"step": 11000
},
{
"epoch": 3.37,
"learning_rate": 6.5339578454332556e-06,
"loss": 0.1079,
"step": 11500
},
{
"epoch": 3.51,
"learning_rate": 5.94847775175644e-06,
"loss": 0.1124,
"step": 12000
},
{
"epoch": 3.66,
"learning_rate": 5.362997658079626e-06,
"loss": 0.1091,
"step": 12500
},
{
"epoch": 3.81,
"learning_rate": 4.77751756440281e-06,
"loss": 0.1141,
"step": 13000
},
{
"epoch": 3.95,
"learning_rate": 4.192037470725996e-06,
"loss": 0.1085,
"step": 13500
},
{
"epoch": 4.0,
"eval_accuracy": 0.9649879551502768,
"eval_f1": 0.3396051103368177,
"eval_loss": 0.16941741108894348,
"eval_precision": 0.5850340136054422,
"eval_recall": 0.2392407134675176,
"eval_runtime": 5.6434,
"eval_samples_per_second": 875.708,
"eval_steps_per_second": 54.754,
"step": 13664
},
{
"epoch": 4.1,
"learning_rate": 3.6065573770491806e-06,
"loss": 0.1036,
"step": 14000
},
{
"epoch": 4.24,
"learning_rate": 3.0210772833723655e-06,
"loss": 0.0961,
"step": 14500
},
{
"epoch": 4.39,
"learning_rate": 2.4355971896955503e-06,
"loss": 0.0959,
"step": 15000
},
{
"epoch": 4.54,
"learning_rate": 1.8501170960187356e-06,
"loss": 0.0958,
"step": 15500
},
{
"epoch": 4.68,
"learning_rate": 1.2646370023419204e-06,
"loss": 0.0949,
"step": 16000
},
{
"epoch": 4.83,
"learning_rate": 6.791569086651055e-07,
"loss": 0.0946,
"step": 16500
},
{
"epoch": 4.98,
"learning_rate": 9.367681498829041e-08,
"loss": 0.0919,
"step": 17000
},
{
"epoch": 5.0,
"eval_accuracy": 0.9652086722918006,
"eval_f1": 0.3449392712550607,
"eval_loss": 0.1819370836019516,
"eval_precision": 0.5883977900552486,
"eval_recall": 0.24398625429553264,
"eval_runtime": 5.7966,
"eval_samples_per_second": 852.569,
"eval_steps_per_second": 53.307,
"step": 17080
}
],
"max_steps": 17080,
"num_train_epochs": 5,
"total_flos": 9598023469073754.0,
"trial_name": null,
"trial_params": null
}