{ "best_metric": 1.4071465730667114, "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted/checkpoint-1520", "epoch": 15.9958071278826, "global_step": 1520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5e-09, "loss": 6.0549, "step": 1 }, { "epoch": 0.21, "learning_rate": 1e-07, "loss": 5.9202, "step": 20 }, { "epoch": 0.42, "learning_rate": 2e-07, "loss": 5.3467, "step": 40 }, { "epoch": 0.63, "learning_rate": 3e-07, "loss": 4.6465, "step": 60 }, { "epoch": 0.84, "learning_rate": 4e-07, "loss": 4.3418, "step": 80 }, { "epoch": 1.0, "eval_loss": 3.378068447113037, "eval_runtime": 2.5518, "eval_samples_per_second": 103.849, "eval_steps_per_second": 20.77, "step": 95 }, { "epoch": 1.05, "learning_rate": 5e-07, "loss": 4.0362, "step": 100 }, { "epoch": 1.26, "learning_rate": 6e-07, "loss": 3.469, "step": 120 }, { "epoch": 1.47, "learning_rate": 7e-07, "loss": 2.8971, "step": 140 }, { "epoch": 1.68, "learning_rate": 8e-07, "loss": 2.4616, "step": 160 }, { "epoch": 1.89, "learning_rate": 9e-07, "loss": 2.2282, "step": 180 }, { "epoch": 2.0, "eval_loss": 2.078709840774536, "eval_runtime": 2.5357, "eval_samples_per_second": 104.508, "eval_steps_per_second": 20.902, "step": 190 }, { "epoch": 2.1, "learning_rate": 1e-06, "loss": 2.2418, "step": 200 }, { "epoch": 2.31, "learning_rate": 9.863636363636363e-07, "loss": 2.0575, "step": 220 }, { "epoch": 2.52, "learning_rate": 9.727272727272727e-07, "loss": 2.0621, "step": 240 }, { "epoch": 2.73, "learning_rate": 9.59090909090909e-07, "loss": 1.9103, "step": 260 }, { "epoch": 2.94, "learning_rate": 9.454545454545455e-07, "loss": 1.947, "step": 280 }, { "epoch": 3.0, "eval_loss": 1.817661166191101, "eval_runtime": 2.5208, "eval_samples_per_second": 105.125, "eval_steps_per_second": 21.025, "step": 285 }, { "epoch": 3.16, "learning_rate": 9.318181818181817e-07, "loss": 1.9331, "step": 300 }, { "epoch": 3.37, "learning_rate": 9.181818181818181e-07, "loss": 1.7866, "step": 320 }, { "epoch": 3.58, "learning_rate": 9.045454545454545e-07, "loss": 1.8531, "step": 340 }, { "epoch": 3.79, "learning_rate": 8.909090909090909e-07, "loss": 1.7661, "step": 360 }, { "epoch": 4.0, "learning_rate": 8.772727272727273e-07, "loss": 1.7924, "step": 380 }, { "epoch": 4.0, "eval_loss": 1.6833910942077637, "eval_runtime": 2.5259, "eval_samples_per_second": 104.913, "eval_steps_per_second": 20.983, "step": 380 }, { "epoch": 4.21, "learning_rate": 8.636363636363636e-07, "loss": 1.7426, "step": 400 }, { "epoch": 4.42, "learning_rate": 8.5e-07, "loss": 1.6863, "step": 420 }, { "epoch": 4.63, "learning_rate": 8.363636363636363e-07, "loss": 1.6788, "step": 440 }, { "epoch": 4.84, "learning_rate": 8.227272727272727e-07, "loss": 1.6764, "step": 460 }, { "epoch": 5.0, "eval_loss": 1.613844633102417, "eval_runtime": 2.526, "eval_samples_per_second": 104.911, "eval_steps_per_second": 20.982, "step": 475 }, { "epoch": 5.05, "learning_rate": 8.09090909090909e-07, "loss": 1.7464, "step": 480 }, { "epoch": 5.26, "learning_rate": 7.954545454545454e-07, "loss": 1.6661, "step": 500 }, { "epoch": 5.47, "learning_rate": 7.818181818181818e-07, "loss": 1.6213, "step": 520 }, { "epoch": 5.68, "learning_rate": 7.681818181818182e-07, "loss": 1.5895, "step": 540 }, { "epoch": 5.89, "learning_rate": 7.545454545454546e-07, "loss": 1.5868, "step": 560 }, { "epoch": 6.0, "eval_loss": 1.5621322393417358, "eval_runtime": 2.5161, "eval_samples_per_second": 105.322, "eval_steps_per_second": 21.064, "step": 570 }, { "epoch": 6.1, "learning_rate": 7.409090909090909e-07, "loss": 1.6402, "step": 580 }, { "epoch": 6.31, "learning_rate": 7.272727272727274e-07, "loss": 1.5571, "step": 600 }, { "epoch": 6.52, "learning_rate": 7.136363636363637e-07, "loss": 1.6449, "step": 620 }, { "epoch": 6.73, "learning_rate": 7.000000000000001e-07, "loss": 1.5674, "step": 640 }, { "epoch": 6.94, "learning_rate": 6.863636363636363e-07, "loss": 1.605, "step": 660 }, { "epoch": 7.0, "eval_loss": 1.530236005783081, "eval_runtime": 2.5271, "eval_samples_per_second": 104.864, "eval_steps_per_second": 20.973, "step": 665 }, { "epoch": 7.16, "learning_rate": 6.727272727272727e-07, "loss": 1.5795, "step": 680 }, { "epoch": 7.37, "learning_rate": 6.590909090909091e-07, "loss": 1.5291, "step": 700 }, { "epoch": 7.58, "learning_rate": 6.454545454545455e-07, "loss": 1.5663, "step": 720 }, { "epoch": 7.79, "learning_rate": 6.318181818181818e-07, "loss": 1.5326, "step": 740 }, { "epoch": 8.0, "learning_rate": 6.181818181818181e-07, "loss": 1.5897, "step": 760 }, { "epoch": 8.0, "eval_loss": 1.5036414861679077, "eval_runtime": 2.5257, "eval_samples_per_second": 104.921, "eval_steps_per_second": 20.984, "step": 760 }, { "epoch": 8.21, "learning_rate": 6.045454545454545e-07, "loss": 1.5629, "step": 780 }, { "epoch": 8.42, "learning_rate": 5.909090909090909e-07, "loss": 1.5069, "step": 800 }, { "epoch": 8.63, "learning_rate": 5.772727272727273e-07, "loss": 1.5059, "step": 820 }, { "epoch": 8.84, "learning_rate": 5.636363636363636e-07, "loss": 1.5103, "step": 840 }, { "epoch": 9.0, "eval_loss": 1.4787436723709106, "eval_runtime": 2.5377, "eval_samples_per_second": 104.427, "eval_steps_per_second": 20.885, "step": 855 }, { "epoch": 9.05, "learning_rate": 5.5e-07, "loss": 1.5148, "step": 860 }, { "epoch": 9.26, "learning_rate": 5.363636363636363e-07, "loss": 1.4927, "step": 880 }, { "epoch": 9.47, "learning_rate": 5.227272727272728e-07, "loss": 1.4826, "step": 900 }, { "epoch": 9.68, "learning_rate": 5.090909090909092e-07, "loss": 1.4854, "step": 920 }, { "epoch": 9.89, "learning_rate": 4.954545454545455e-07, "loss": 1.4713, "step": 940 }, { "epoch": 10.0, "eval_loss": 1.4612189531326294, "eval_runtime": 2.5505, "eval_samples_per_second": 103.9, "eval_steps_per_second": 20.78, "step": 950 }, { "epoch": 10.1, "learning_rate": 4.818181818181817e-07, "loss": 1.4923, "step": 960 }, { "epoch": 10.31, "learning_rate": 4.6818181818181814e-07, "loss": 1.4864, "step": 980 }, { "epoch": 10.52, "learning_rate": 4.545454545454545e-07, "loss": 1.4972, "step": 1000 }, { "epoch": 10.73, "learning_rate": 4.409090909090909e-07, "loss": 1.4156, "step": 1020 }, { "epoch": 10.94, "learning_rate": 4.272727272727273e-07, "loss": 1.4455, "step": 1040 }, { "epoch": 11.0, "eval_loss": 1.44575035572052, "eval_runtime": 2.5276, "eval_samples_per_second": 104.843, "eval_steps_per_second": 20.969, "step": 1045 }, { "epoch": 11.16, "learning_rate": 4.1363636363636366e-07, "loss": 1.5115, "step": 1060 }, { "epoch": 11.37, "learning_rate": 4.0000000000000003e-07, "loss": 1.4458, "step": 1080 }, { "epoch": 11.58, "learning_rate": 3.863636363636364e-07, "loss": 1.461, "step": 1100 }, { "epoch": 11.79, "learning_rate": 3.727272727272727e-07, "loss": 1.4437, "step": 1120 }, { "epoch": 12.0, "learning_rate": 3.5909090909090907e-07, "loss": 1.4553, "step": 1140 }, { "epoch": 12.0, "eval_loss": 1.432648777961731, "eval_runtime": 2.5587, "eval_samples_per_second": 103.568, "eval_steps_per_second": 20.714, "step": 1140 }, { "epoch": 12.21, "learning_rate": 3.4545454545454544e-07, "loss": 1.4858, "step": 1160 }, { "epoch": 12.42, "learning_rate": 3.3181818181818177e-07, "loss": 1.4193, "step": 1180 }, { "epoch": 12.63, "learning_rate": 3.1818181818181815e-07, "loss": 1.4363, "step": 1200 }, { "epoch": 12.84, "learning_rate": 3.0454545454545453e-07, "loss": 1.4501, "step": 1220 }, { "epoch": 13.0, "eval_loss": 1.4239989519119263, "eval_runtime": 2.5751, "eval_samples_per_second": 102.907, "eval_steps_per_second": 20.581, "step": 1235 }, { "epoch": 13.05, "learning_rate": 2.909090909090909e-07, "loss": 1.4586, "step": 1240 }, { "epoch": 13.26, "learning_rate": 2.772727272727273e-07, "loss": 1.4263, "step": 1260 }, { "epoch": 13.47, "learning_rate": 2.6363636363636356e-07, "loss": 1.4484, "step": 1280 }, { "epoch": 13.68, "learning_rate": 2.4999999999999994e-07, "loss": 1.3985, "step": 1300 }, { "epoch": 13.89, "learning_rate": 2.3636363636363634e-07, "loss": 1.4134, "step": 1320 }, { "epoch": 14.0, "eval_loss": 1.4161295890808105, "eval_runtime": 2.5462, "eval_samples_per_second": 104.076, "eval_steps_per_second": 20.815, "step": 1330 }, { "epoch": 14.1, "learning_rate": 2.2272727272727272e-07, "loss": 1.4115, "step": 1340 }, { "epoch": 14.31, "learning_rate": 2.0909090909090907e-07, "loss": 1.4126, "step": 1360 }, { "epoch": 14.52, "learning_rate": 1.9545454545454545e-07, "loss": 1.4652, "step": 1380 }, { "epoch": 14.73, "learning_rate": 1.8181818181818186e-07, "loss": 1.4327, "step": 1400 }, { "epoch": 14.94, "learning_rate": 1.681818181818182e-07, "loss": 1.422, "step": 1420 }, { "epoch": 15.0, "eval_loss": 1.4098496437072754, "eval_runtime": 2.5612, "eval_samples_per_second": 103.468, "eval_steps_per_second": 20.694, "step": 1425 }, { "epoch": 15.16, "learning_rate": 1.5454545454545448e-07, "loss": 1.4582, "step": 1440 }, { "epoch": 15.37, "learning_rate": 1.4090909090909086e-07, "loss": 1.4484, "step": 1460 }, { "epoch": 15.58, "learning_rate": 1.2727272727272724e-07, "loss": 1.3937, "step": 1480 }, { "epoch": 15.79, "learning_rate": 1.1363636363636362e-07, "loss": 1.416, "step": 1500 }, { "epoch": 16.0, "learning_rate": 1e-07, "loss": 1.4139, "step": 1520 }, { "epoch": 16.0, "eval_loss": 1.4071465730667114, "eval_runtime": 2.5382, "eval_samples_per_second": 104.404, "eval_steps_per_second": 20.881, "step": 1520 }, { "epoch": 16.0, "step": 1520, "total_flos": 1443270331367424.0, "train_loss": 1.854057946016914, "train_runtime": 1311.705, "train_samples_per_second": 29.055, "train_steps_per_second": 1.159 } ], "max_steps": 1520, "num_train_epochs": 16, "total_flos": 1443270331367424.0, "trial_name": null, "trial_params": null }