abdiharyadi's picture
Upload folder using huggingface_hub
79c0a48 verified
{
"best_metric": 1.4071465730667114,
"best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-1520",
"epoch": 15.9958071278826,
"global_step": 1520,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 5e-09,
"loss": 6.0549,
"step": 1
},
{
"epoch": 0.21,
"learning_rate": 1e-07,
"loss": 5.9202,
"step": 20
},
{
"epoch": 0.42,
"learning_rate": 2e-07,
"loss": 5.3467,
"step": 40
},
{
"epoch": 0.63,
"learning_rate": 3e-07,
"loss": 4.6465,
"step": 60
},
{
"epoch": 0.84,
"learning_rate": 4e-07,
"loss": 4.3418,
"step": 80
},
{
"epoch": 1.0,
"eval_loss": 3.378068447113037,
"eval_runtime": 2.5518,
"eval_samples_per_second": 103.849,
"eval_steps_per_second": 20.77,
"step": 95
},
{
"epoch": 1.05,
"learning_rate": 5e-07,
"loss": 4.0362,
"step": 100
},
{
"epoch": 1.26,
"learning_rate": 6e-07,
"loss": 3.469,
"step": 120
},
{
"epoch": 1.47,
"learning_rate": 7e-07,
"loss": 2.8971,
"step": 140
},
{
"epoch": 1.68,
"learning_rate": 8e-07,
"loss": 2.4616,
"step": 160
},
{
"epoch": 1.89,
"learning_rate": 9e-07,
"loss": 2.2282,
"step": 180
},
{
"epoch": 2.0,
"eval_loss": 2.078709840774536,
"eval_runtime": 2.5357,
"eval_samples_per_second": 104.508,
"eval_steps_per_second": 20.902,
"step": 190
},
{
"epoch": 2.1,
"learning_rate": 1e-06,
"loss": 2.2418,
"step": 200
},
{
"epoch": 2.31,
"learning_rate": 9.863636363636363e-07,
"loss": 2.0575,
"step": 220
},
{
"epoch": 2.52,
"learning_rate": 9.727272727272727e-07,
"loss": 2.0621,
"step": 240
},
{
"epoch": 2.73,
"learning_rate": 9.59090909090909e-07,
"loss": 1.9103,
"step": 260
},
{
"epoch": 2.94,
"learning_rate": 9.454545454545455e-07,
"loss": 1.947,
"step": 280
},
{
"epoch": 3.0,
"eval_loss": 1.817661166191101,
"eval_runtime": 2.5208,
"eval_samples_per_second": 105.125,
"eval_steps_per_second": 21.025,
"step": 285
},
{
"epoch": 3.16,
"learning_rate": 9.318181818181817e-07,
"loss": 1.9331,
"step": 300
},
{
"epoch": 3.37,
"learning_rate": 9.181818181818181e-07,
"loss": 1.7866,
"step": 320
},
{
"epoch": 3.58,
"learning_rate": 9.045454545454545e-07,
"loss": 1.8531,
"step": 340
},
{
"epoch": 3.79,
"learning_rate": 8.909090909090909e-07,
"loss": 1.7661,
"step": 360
},
{
"epoch": 4.0,
"learning_rate": 8.772727272727273e-07,
"loss": 1.7924,
"step": 380
},
{
"epoch": 4.0,
"eval_loss": 1.6833910942077637,
"eval_runtime": 2.5259,
"eval_samples_per_second": 104.913,
"eval_steps_per_second": 20.983,
"step": 380
},
{
"epoch": 4.21,
"learning_rate": 8.636363636363636e-07,
"loss": 1.7426,
"step": 400
},
{
"epoch": 4.42,
"learning_rate": 8.5e-07,
"loss": 1.6863,
"step": 420
},
{
"epoch": 4.63,
"learning_rate": 8.363636363636363e-07,
"loss": 1.6788,
"step": 440
},
{
"epoch": 4.84,
"learning_rate": 8.227272727272727e-07,
"loss": 1.6764,
"step": 460
},
{
"epoch": 5.0,
"eval_loss": 1.613844633102417,
"eval_runtime": 2.526,
"eval_samples_per_second": 104.911,
"eval_steps_per_second": 20.982,
"step": 475
},
{
"epoch": 5.05,
"learning_rate": 8.09090909090909e-07,
"loss": 1.7464,
"step": 480
},
{
"epoch": 5.26,
"learning_rate": 7.954545454545454e-07,
"loss": 1.6661,
"step": 500
},
{
"epoch": 5.47,
"learning_rate": 7.818181818181818e-07,
"loss": 1.6213,
"step": 520
},
{
"epoch": 5.68,
"learning_rate": 7.681818181818182e-07,
"loss": 1.5895,
"step": 540
},
{
"epoch": 5.89,
"learning_rate": 7.545454545454546e-07,
"loss": 1.5868,
"step": 560
},
{
"epoch": 6.0,
"eval_loss": 1.5621322393417358,
"eval_runtime": 2.5161,
"eval_samples_per_second": 105.322,
"eval_steps_per_second": 21.064,
"step": 570
},
{
"epoch": 6.1,
"learning_rate": 7.409090909090909e-07,
"loss": 1.6402,
"step": 580
},
{
"epoch": 6.31,
"learning_rate": 7.272727272727274e-07,
"loss": 1.5571,
"step": 600
},
{
"epoch": 6.52,
"learning_rate": 7.136363636363637e-07,
"loss": 1.6449,
"step": 620
},
{
"epoch": 6.73,
"learning_rate": 7.000000000000001e-07,
"loss": 1.5674,
"step": 640
},
{
"epoch": 6.94,
"learning_rate": 6.863636363636363e-07,
"loss": 1.605,
"step": 660
},
{
"epoch": 7.0,
"eval_loss": 1.530236005783081,
"eval_runtime": 2.5271,
"eval_samples_per_second": 104.864,
"eval_steps_per_second": 20.973,
"step": 665
},
{
"epoch": 7.16,
"learning_rate": 6.727272727272727e-07,
"loss": 1.5795,
"step": 680
},
{
"epoch": 7.37,
"learning_rate": 6.590909090909091e-07,
"loss": 1.5291,
"step": 700
},
{
"epoch": 7.58,
"learning_rate": 6.454545454545455e-07,
"loss": 1.5663,
"step": 720
},
{
"epoch": 7.79,
"learning_rate": 6.318181818181818e-07,
"loss": 1.5326,
"step": 740
},
{
"epoch": 8.0,
"learning_rate": 6.181818181818181e-07,
"loss": 1.5897,
"step": 760
},
{
"epoch": 8.0,
"eval_loss": 1.5036414861679077,
"eval_runtime": 2.5257,
"eval_samples_per_second": 104.921,
"eval_steps_per_second": 20.984,
"step": 760
},
{
"epoch": 8.21,
"learning_rate": 6.045454545454545e-07,
"loss": 1.5629,
"step": 780
},
{
"epoch": 8.42,
"learning_rate": 5.909090909090909e-07,
"loss": 1.5069,
"step": 800
},
{
"epoch": 8.63,
"learning_rate": 5.772727272727273e-07,
"loss": 1.5059,
"step": 820
},
{
"epoch": 8.84,
"learning_rate": 5.636363636363636e-07,
"loss": 1.5103,
"step": 840
},
{
"epoch": 9.0,
"eval_loss": 1.4787436723709106,
"eval_runtime": 2.5377,
"eval_samples_per_second": 104.427,
"eval_steps_per_second": 20.885,
"step": 855
},
{
"epoch": 9.05,
"learning_rate": 5.5e-07,
"loss": 1.5148,
"step": 860
},
{
"epoch": 9.26,
"learning_rate": 5.363636363636363e-07,
"loss": 1.4927,
"step": 880
},
{
"epoch": 9.47,
"learning_rate": 5.227272727272728e-07,
"loss": 1.4826,
"step": 900
},
{
"epoch": 9.68,
"learning_rate": 5.090909090909092e-07,
"loss": 1.4854,
"step": 920
},
{
"epoch": 9.89,
"learning_rate": 4.954545454545455e-07,
"loss": 1.4713,
"step": 940
},
{
"epoch": 10.0,
"eval_loss": 1.4612189531326294,
"eval_runtime": 2.5505,
"eval_samples_per_second": 103.9,
"eval_steps_per_second": 20.78,
"step": 950
},
{
"epoch": 10.1,
"learning_rate": 4.818181818181817e-07,
"loss": 1.4923,
"step": 960
},
{
"epoch": 10.31,
"learning_rate": 4.6818181818181814e-07,
"loss": 1.4864,
"step": 980
},
{
"epoch": 10.52,
"learning_rate": 4.545454545454545e-07,
"loss": 1.4972,
"step": 1000
},
{
"epoch": 10.73,
"learning_rate": 4.409090909090909e-07,
"loss": 1.4156,
"step": 1020
},
{
"epoch": 10.94,
"learning_rate": 4.272727272727273e-07,
"loss": 1.4455,
"step": 1040
},
{
"epoch": 11.0,
"eval_loss": 1.44575035572052,
"eval_runtime": 2.5276,
"eval_samples_per_second": 104.843,
"eval_steps_per_second": 20.969,
"step": 1045
},
{
"epoch": 11.16,
"learning_rate": 4.1363636363636366e-07,
"loss": 1.5115,
"step": 1060
},
{
"epoch": 11.37,
"learning_rate": 4.0000000000000003e-07,
"loss": 1.4458,
"step": 1080
},
{
"epoch": 11.58,
"learning_rate": 3.863636363636364e-07,
"loss": 1.461,
"step": 1100
},
{
"epoch": 11.79,
"learning_rate": 3.727272727272727e-07,
"loss": 1.4437,
"step": 1120
},
{
"epoch": 12.0,
"learning_rate": 3.5909090909090907e-07,
"loss": 1.4553,
"step": 1140
},
{
"epoch": 12.0,
"eval_loss": 1.432648777961731,
"eval_runtime": 2.5587,
"eval_samples_per_second": 103.568,
"eval_steps_per_second": 20.714,
"step": 1140
},
{
"epoch": 12.21,
"learning_rate": 3.4545454545454544e-07,
"loss": 1.4858,
"step": 1160
},
{
"epoch": 12.42,
"learning_rate": 3.3181818181818177e-07,
"loss": 1.4193,
"step": 1180
},
{
"epoch": 12.63,
"learning_rate": 3.1818181818181815e-07,
"loss": 1.4363,
"step": 1200
},
{
"epoch": 12.84,
"learning_rate": 3.0454545454545453e-07,
"loss": 1.4501,
"step": 1220
},
{
"epoch": 13.0,
"eval_loss": 1.4239989519119263,
"eval_runtime": 2.5751,
"eval_samples_per_second": 102.907,
"eval_steps_per_second": 20.581,
"step": 1235
},
{
"epoch": 13.05,
"learning_rate": 2.909090909090909e-07,
"loss": 1.4586,
"step": 1240
},
{
"epoch": 13.26,
"learning_rate": 2.772727272727273e-07,
"loss": 1.4263,
"step": 1260
},
{
"epoch": 13.47,
"learning_rate": 2.6363636363636356e-07,
"loss": 1.4484,
"step": 1280
},
{
"epoch": 13.68,
"learning_rate": 2.4999999999999994e-07,
"loss": 1.3985,
"step": 1300
},
{
"epoch": 13.89,
"learning_rate": 2.3636363636363634e-07,
"loss": 1.4134,
"step": 1320
},
{
"epoch": 14.0,
"eval_loss": 1.4161295890808105,
"eval_runtime": 2.5462,
"eval_samples_per_second": 104.076,
"eval_steps_per_second": 20.815,
"step": 1330
},
{
"epoch": 14.1,
"learning_rate": 2.2272727272727272e-07,
"loss": 1.4115,
"step": 1340
},
{
"epoch": 14.31,
"learning_rate": 2.0909090909090907e-07,
"loss": 1.4126,
"step": 1360
},
{
"epoch": 14.52,
"learning_rate": 1.9545454545454545e-07,
"loss": 1.4652,
"step": 1380
},
{
"epoch": 14.73,
"learning_rate": 1.8181818181818186e-07,
"loss": 1.4327,
"step": 1400
},
{
"epoch": 14.94,
"learning_rate": 1.681818181818182e-07,
"loss": 1.422,
"step": 1420
},
{
"epoch": 15.0,
"eval_loss": 1.4098496437072754,
"eval_runtime": 2.5612,
"eval_samples_per_second": 103.468,
"eval_steps_per_second": 20.694,
"step": 1425
},
{
"epoch": 15.16,
"learning_rate": 1.5454545454545448e-07,
"loss": 1.4582,
"step": 1440
},
{
"epoch": 15.37,
"learning_rate": 1.4090909090909086e-07,
"loss": 1.4484,
"step": 1460
},
{
"epoch": 15.58,
"learning_rate": 1.2727272727272724e-07,
"loss": 1.3937,
"step": 1480
},
{
"epoch": 15.79,
"learning_rate": 1.1363636363636362e-07,
"loss": 1.416,
"step": 1500
},
{
"epoch": 16.0,
"learning_rate": 1e-07,
"loss": 1.4139,
"step": 1520
},
{
"epoch": 16.0,
"eval_loss": 1.4071465730667114,
"eval_runtime": 2.5382,
"eval_samples_per_second": 104.404,
"eval_steps_per_second": 20.881,
"step": 1520
},
{
"epoch": 16.0,
"step": 1520,
"total_flos": 1443270331367424.0,
"train_loss": 1.854057946016914,
"train_runtime": 1311.705,
"train_samples_per_second": 29.055,
"train_steps_per_second": 1.159
}
],
"max_steps": 1520,
"num_train_epochs": 16,
"total_flos": 1443270331367424.0,
"trial_name": null,
"trial_params": null
}