|
{ |
|
"best_metric": 1.4071465730667114, |
|
"best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-1520", |
|
"epoch": 15.9958071278826, |
|
"global_step": 1520, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5e-09, |
|
"loss": 6.0549, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 1e-07, |
|
"loss": 5.9202, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2e-07, |
|
"loss": 5.3467, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3e-07, |
|
"loss": 4.6465, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 4e-07, |
|
"loss": 4.3418, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 3.378068447113037, |
|
"eval_runtime": 2.5518, |
|
"eval_samples_per_second": 103.849, |
|
"eval_steps_per_second": 20.77, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 5e-07, |
|
"loss": 4.0362, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 6e-07, |
|
"loss": 3.469, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 7e-07, |
|
"loss": 2.8971, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8e-07, |
|
"loss": 2.4616, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 9e-07, |
|
"loss": 2.2282, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.078709840774536, |
|
"eval_runtime": 2.5357, |
|
"eval_samples_per_second": 104.508, |
|
"eval_steps_per_second": 20.902, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 1e-06, |
|
"loss": 2.2418, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 9.863636363636363e-07, |
|
"loss": 2.0575, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 9.727272727272727e-07, |
|
"loss": 2.0621, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 9.59090909090909e-07, |
|
"loss": 1.9103, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 9.454545454545455e-07, |
|
"loss": 1.947, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.817661166191101, |
|
"eval_runtime": 2.5208, |
|
"eval_samples_per_second": 105.125, |
|
"eval_steps_per_second": 21.025, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 9.318181818181817e-07, |
|
"loss": 1.9331, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 9.181818181818181e-07, |
|
"loss": 1.7866, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 9.045454545454545e-07, |
|
"loss": 1.8531, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 8.909090909090909e-07, |
|
"loss": 1.7661, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 8.772727272727273e-07, |
|
"loss": 1.7924, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.6833910942077637, |
|
"eval_runtime": 2.5259, |
|
"eval_samples_per_second": 104.913, |
|
"eval_steps_per_second": 20.983, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 8.636363636363636e-07, |
|
"loss": 1.7426, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.42, |
|
"learning_rate": 8.5e-07, |
|
"loss": 1.6863, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 4.63, |
|
"learning_rate": 8.363636363636363e-07, |
|
"loss": 1.6788, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 4.84, |
|
"learning_rate": 8.227272727272727e-07, |
|
"loss": 1.6764, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.613844633102417, |
|
"eval_runtime": 2.526, |
|
"eval_samples_per_second": 104.911, |
|
"eval_steps_per_second": 20.982, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 8.09090909090909e-07, |
|
"loss": 1.7464, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.954545454545454e-07, |
|
"loss": 1.6661, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 7.818181818181818e-07, |
|
"loss": 1.6213, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 7.681818181818182e-07, |
|
"loss": 1.5895, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"learning_rate": 7.545454545454546e-07, |
|
"loss": 1.5868, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.5621322393417358, |
|
"eval_runtime": 2.5161, |
|
"eval_samples_per_second": 105.322, |
|
"eval_steps_per_second": 21.064, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"learning_rate": 7.409090909090909e-07, |
|
"loss": 1.6402, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 7.272727272727274e-07, |
|
"loss": 1.5571, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 7.136363636363637e-07, |
|
"loss": 1.6449, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 7.000000000000001e-07, |
|
"loss": 1.5674, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 6.863636363636363e-07, |
|
"loss": 1.605, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.530236005783081, |
|
"eval_runtime": 2.5271, |
|
"eval_samples_per_second": 104.864, |
|
"eval_steps_per_second": 20.973, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 6.727272727272727e-07, |
|
"loss": 1.5795, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 6.590909090909091e-07, |
|
"loss": 1.5291, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"learning_rate": 6.454545454545455e-07, |
|
"loss": 1.5663, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 6.318181818181818e-07, |
|
"loss": 1.5326, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 6.181818181818181e-07, |
|
"loss": 1.5897, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.5036414861679077, |
|
"eval_runtime": 2.5257, |
|
"eval_samples_per_second": 104.921, |
|
"eval_steps_per_second": 20.984, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 8.21, |
|
"learning_rate": 6.045454545454545e-07, |
|
"loss": 1.5629, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 5.909090909090909e-07, |
|
"loss": 1.5069, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 8.63, |
|
"learning_rate": 5.772727272727273e-07, |
|
"loss": 1.5059, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 5.636363636363636e-07, |
|
"loss": 1.5103, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.4787436723709106, |
|
"eval_runtime": 2.5377, |
|
"eval_samples_per_second": 104.427, |
|
"eval_steps_per_second": 20.885, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"learning_rate": 5.5e-07, |
|
"loss": 1.5148, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"learning_rate": 5.363636363636363e-07, |
|
"loss": 1.4927, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 5.227272727272728e-07, |
|
"loss": 1.4826, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 9.68, |
|
"learning_rate": 5.090909090909092e-07, |
|
"loss": 1.4854, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 9.89, |
|
"learning_rate": 4.954545454545455e-07, |
|
"loss": 1.4713, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.4612189531326294, |
|
"eval_runtime": 2.5505, |
|
"eval_samples_per_second": 103.9, |
|
"eval_steps_per_second": 20.78, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 10.1, |
|
"learning_rate": 4.818181818181817e-07, |
|
"loss": 1.4923, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 10.31, |
|
"learning_rate": 4.6818181818181814e-07, |
|
"loss": 1.4864, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 10.52, |
|
"learning_rate": 4.545454545454545e-07, |
|
"loss": 1.4972, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 10.73, |
|
"learning_rate": 4.409090909090909e-07, |
|
"loss": 1.4156, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"learning_rate": 4.272727272727273e-07, |
|
"loss": 1.4455, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.44575035572052, |
|
"eval_runtime": 2.5276, |
|
"eval_samples_per_second": 104.843, |
|
"eval_steps_per_second": 20.969, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 11.16, |
|
"learning_rate": 4.1363636363636366e-07, |
|
"loss": 1.5115, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 11.37, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 1.4458, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 3.863636363636364e-07, |
|
"loss": 1.461, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 11.79, |
|
"learning_rate": 3.727272727272727e-07, |
|
"loss": 1.4437, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 3.5909090909090907e-07, |
|
"loss": 1.4553, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.432648777961731, |
|
"eval_runtime": 2.5587, |
|
"eval_samples_per_second": 103.568, |
|
"eval_steps_per_second": 20.714, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 12.21, |
|
"learning_rate": 3.4545454545454544e-07, |
|
"loss": 1.4858, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 12.42, |
|
"learning_rate": 3.3181818181818177e-07, |
|
"loss": 1.4193, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.1818181818181815e-07, |
|
"loss": 1.4363, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 12.84, |
|
"learning_rate": 3.0454545454545453e-07, |
|
"loss": 1.4501, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.4239989519119263, |
|
"eval_runtime": 2.5751, |
|
"eval_samples_per_second": 102.907, |
|
"eval_steps_per_second": 20.581, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 13.05, |
|
"learning_rate": 2.909090909090909e-07, |
|
"loss": 1.4586, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 13.26, |
|
"learning_rate": 2.772727272727273e-07, |
|
"loss": 1.4263, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 13.47, |
|
"learning_rate": 2.6363636363636356e-07, |
|
"loss": 1.4484, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 2.4999999999999994e-07, |
|
"loss": 1.3985, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 13.89, |
|
"learning_rate": 2.3636363636363634e-07, |
|
"loss": 1.4134, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.4161295890808105, |
|
"eval_runtime": 2.5462, |
|
"eval_samples_per_second": 104.076, |
|
"eval_steps_per_second": 20.815, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 14.1, |
|
"learning_rate": 2.2272727272727272e-07, |
|
"loss": 1.4115, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 14.31, |
|
"learning_rate": 2.0909090909090907e-07, |
|
"loss": 1.4126, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 14.52, |
|
"learning_rate": 1.9545454545454545e-07, |
|
"loss": 1.4652, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 14.73, |
|
"learning_rate": 1.8181818181818186e-07, |
|
"loss": 1.4327, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 14.94, |
|
"learning_rate": 1.681818181818182e-07, |
|
"loss": 1.422, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.4098496437072754, |
|
"eval_runtime": 2.5612, |
|
"eval_samples_per_second": 103.468, |
|
"eval_steps_per_second": 20.694, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 15.16, |
|
"learning_rate": 1.5454545454545448e-07, |
|
"loss": 1.4582, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 15.37, |
|
"learning_rate": 1.4090909090909086e-07, |
|
"loss": 1.4484, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 15.58, |
|
"learning_rate": 1.2727272727272724e-07, |
|
"loss": 1.3937, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 1.1363636363636362e-07, |
|
"loss": 1.416, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1e-07, |
|
"loss": 1.4139, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.4071465730667114, |
|
"eval_runtime": 2.5382, |
|
"eval_samples_per_second": 104.404, |
|
"eval_steps_per_second": 20.881, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"step": 1520, |
|
"total_flos": 1443270331367424.0, |
|
"train_loss": 1.854057946016914, |
|
"train_runtime": 1311.705, |
|
"train_samples_per_second": 29.055, |
|
"train_steps_per_second": 1.159 |
|
} |
|
], |
|
"max_steps": 1520, |
|
"num_train_epochs": 16, |
|
"total_flos": 1443270331367424.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|