{ "best_metric": 76.59314722877838, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Japanese-GSD/checkpoint-4000", "epoch": 29.41176470588235, "global_step": 6500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.45, "learning_rate": 7.52e-05, "loss": 3.6592, "step": 100 }, { "epoch": 0.9, "learning_rate": 7.949530201342283e-05, "loss": 1.457, "step": 200 }, { "epoch": 1.36, "learning_rate": 7.895838926174497e-05, "loss": 1.1567, "step": 300 }, { "epoch": 1.81, "learning_rate": 7.842147651006712e-05, "loss": 1.0594, "step": 400 }, { "epoch": 2.26, "learning_rate": 7.788456375838927e-05, "loss": 0.9461, "step": 500 }, { "epoch": 2.26, "eval_las": 72.71913404411167, "eval_loss": 0.9768415689468384, "eval_runtime": 3.6781, "eval_samples_per_second": 137.841, "eval_steps_per_second": 17.4, "eval_uas": 77.79767233661593, "step": 500 }, { "epoch": 2.71, "learning_rate": 7.734765100671142e-05, "loss": 0.8798, "step": 600 }, { "epoch": 3.17, "learning_rate": 7.681073825503357e-05, "loss": 0.8167, "step": 700 }, { "epoch": 3.62, "learning_rate": 7.627382550335572e-05, "loss": 0.7486, "step": 800 }, { "epoch": 4.07, "learning_rate": 7.573691275167786e-05, "loss": 0.7195, "step": 900 }, { "epoch": 4.52, "learning_rate": 7.52e-05, "loss": 0.6067, "step": 1000 }, { "epoch": 4.52, "eval_las": 75.72230813054448, "eval_loss": 0.9424547553062439, "eval_runtime": 3.6759, "eval_samples_per_second": 137.927, "eval_steps_per_second": 17.411, "eval_uas": 80.39391226499552, "step": 1000 }, { "epoch": 4.98, "learning_rate": 7.466308724832215e-05, "loss": 0.6358, "step": 1100 }, { "epoch": 5.43, "learning_rate": 7.41261744966443e-05, "loss": 0.4864, "step": 1200 }, { "epoch": 5.88, "learning_rate": 7.358926174496644e-05, "loss": 0.5311, "step": 1300 }, { "epoch": 6.33, "learning_rate": 7.305234899328859e-05, "loss": 0.4331, "step": 1400 }, { "epoch": 6.79, "learning_rate": 7.251543624161074e-05, "loss": 0.4255, "step": 1500 }, { "epoch": 6.79, "eval_las": 75.43745421990722, "eval_loss": 1.1136951446533203, "eval_runtime": 3.6925, "eval_samples_per_second": 137.307, "eval_steps_per_second": 17.333, "eval_uas": 80.19044518596891, "step": 1500 }, { "epoch": 7.24, "learning_rate": 7.197852348993289e-05, "loss": 0.3744, "step": 1600 }, { "epoch": 7.69, "learning_rate": 7.144161073825504e-05, "loss": 0.3474, "step": 1700 }, { "epoch": 8.14, "learning_rate": 7.090469798657718e-05, "loss": 0.3244, "step": 1800 }, { "epoch": 8.6, "learning_rate": 7.036778523489933e-05, "loss": 0.2716, "step": 1900 }, { "epoch": 9.05, "learning_rate": 6.983087248322148e-05, "loss": 0.2877, "step": 2000 }, { "epoch": 9.05, "eval_las": 75.58395051680637, "eval_loss": 1.3644814491271973, "eval_runtime": 3.6735, "eval_samples_per_second": 138.014, "eval_steps_per_second": 17.422, "eval_uas": 80.19858386912998, "step": 2000 }, { "epoch": 9.5, "learning_rate": 6.929395973154363e-05, "loss": 0.2162, "step": 2100 }, { "epoch": 9.95, "learning_rate": 6.875704697986578e-05, "loss": 0.2386, "step": 2200 }, { "epoch": 10.41, "learning_rate": 6.822013422818793e-05, "loss": 0.1861, "step": 2300 }, { "epoch": 10.86, "learning_rate": 6.768322147651007e-05, "loss": 0.1989, "step": 2400 }, { "epoch": 11.31, "learning_rate": 6.714630872483222e-05, "loss": 0.1631, "step": 2500 }, { "epoch": 11.31, "eval_las": 75.29095792300807, "eval_loss": 1.5608779191970825, "eval_runtime": 3.672, "eval_samples_per_second": 138.07, "eval_steps_per_second": 17.429, "eval_uas": 80.17416781964678, "step": 2500 }, { "epoch": 11.76, "learning_rate": 6.660939597315437e-05, "loss": 0.1637, "step": 2600 }, { "epoch": 12.22, "learning_rate": 6.607248322147652e-05, "loss": 0.1513, "step": 2700 }, { "epoch": 12.67, "learning_rate": 6.553557046979867e-05, "loss": 0.1318, "step": 2800 }, { "epoch": 13.12, "learning_rate": 6.499865771812081e-05, "loss": 0.1326, "step": 2900 }, { "epoch": 13.57, "learning_rate": 6.446174496644296e-05, "loss": 0.1156, "step": 3000 }, { "epoch": 13.57, "eval_las": 75.71416944738341, "eval_loss": 1.6040022373199463, "eval_runtime": 3.6712, "eval_samples_per_second": 138.103, "eval_steps_per_second": 17.433, "eval_uas": 80.20672255229104, "step": 3000 }, { "epoch": 14.03, "learning_rate": 6.392483221476511e-05, "loss": 0.117, "step": 3100 }, { "epoch": 14.48, "learning_rate": 6.338791946308726e-05, "loss": 0.0966, "step": 3200 }, { "epoch": 14.93, "learning_rate": 6.28510067114094e-05, "loss": 0.1057, "step": 3300 }, { "epoch": 15.38, "learning_rate": 6.231409395973154e-05, "loss": 0.091, "step": 3400 }, { "epoch": 15.84, "learning_rate": 6.177718120805369e-05, "loss": 0.09, "step": 3500 }, { "epoch": 15.84, "eval_las": 75.79555627899406, "eval_loss": 1.8544515371322632, "eval_runtime": 3.6721, "eval_samples_per_second": 138.068, "eval_steps_per_second": 17.429, "eval_uas": 80.03581020590869, "step": 3500 }, { "epoch": 16.29, "learning_rate": 6.124026845637584e-05, "loss": 0.0816, "step": 3600 }, { "epoch": 16.74, "learning_rate": 6.070335570469799e-05, "loss": 0.0809, "step": 3700 }, { "epoch": 17.19, "learning_rate": 6.0166442953020136e-05, "loss": 0.0753, "step": 3800 }, { "epoch": 17.65, "learning_rate": 5.962953020134229e-05, "loss": 0.073, "step": 3900 }, { "epoch": 18.1, "learning_rate": 5.909261744966444e-05, "loss": 0.07, "step": 4000 }, { "epoch": 18.1, "eval_las": 76.59314722877838, "eval_loss": 2.0029706954956055, "eval_runtime": 3.6667, "eval_samples_per_second": 138.273, "eval_steps_per_second": 17.455, "eval_uas": 80.7682916904045, "step": 4000 }, { "epoch": 18.55, "learning_rate": 5.855570469798659e-05, "loss": 0.0641, "step": 4100 }, { "epoch": 19.0, "learning_rate": 5.8018791946308735e-05, "loss": 0.0687, "step": 4200 }, { "epoch": 19.46, "learning_rate": 5.7481879194630884e-05, "loss": 0.0548, "step": 4300 }, { "epoch": 19.91, "learning_rate": 5.694496644295303e-05, "loss": 0.0608, "step": 4400 }, { "epoch": 20.36, "learning_rate": 5.6408053691275166e-05, "loss": 0.0526, "step": 4500 }, { "epoch": 20.36, "eval_las": 75.82811101163833, "eval_loss": 2.0474750995635986, "eval_runtime": 3.6759, "eval_samples_per_second": 137.925, "eval_steps_per_second": 17.411, "eval_uas": 80.42646699763978, "step": 4500 }, { "epoch": 20.81, "learning_rate": 5.5871140939597315e-05, "loss": 0.0574, "step": 4600 }, { "epoch": 21.27, "learning_rate": 5.533422818791946e-05, "loss": 0.0506, "step": 4700 }, { "epoch": 21.72, "learning_rate": 5.479731543624161e-05, "loss": 0.0506, "step": 4800 }, { "epoch": 22.17, "learning_rate": 5.426040268456376e-05, "loss": 0.0478, "step": 4900 }, { "epoch": 22.62, "learning_rate": 5.372348993288591e-05, "loss": 0.0455, "step": 5000 }, { "epoch": 22.62, "eval_las": 75.73044681370554, "eval_loss": 2.1534557342529297, "eval_runtime": 3.6739, "eval_samples_per_second": 138.001, "eval_steps_per_second": 17.42, "eval_uas": 80.47529909660616, "step": 5000 }, { "epoch": 23.08, "learning_rate": 5.3186577181208056e-05, "loss": 0.0446, "step": 5100 }, { "epoch": 23.53, "learning_rate": 5.2649664429530204e-05, "loss": 0.0409, "step": 5200 }, { "epoch": 23.98, "learning_rate": 5.211275167785235e-05, "loss": 0.0448, "step": 5300 }, { "epoch": 24.43, "learning_rate": 5.15758389261745e-05, "loss": 0.0379, "step": 5400 }, { "epoch": 24.89, "learning_rate": 5.1038926174496656e-05, "loss": 0.0416, "step": 5500 }, { "epoch": 24.89, "eval_las": 76.22690648653048, "eval_loss": 2.1511826515197754, "eval_runtime": 3.6773, "eval_samples_per_second": 137.872, "eval_steps_per_second": 17.404, "eval_uas": 80.80898510620982, "step": 5500 }, { "epoch": 25.34, "learning_rate": 5.050201342281879e-05, "loss": 0.0352, "step": 5600 }, { "epoch": 25.79, "learning_rate": 4.996510067114094e-05, "loss": 0.0369, "step": 5700 }, { "epoch": 26.24, "learning_rate": 4.942818791946309e-05, "loss": 0.0354, "step": 5800 }, { "epoch": 26.7, "learning_rate": 4.8891275167785235e-05, "loss": 0.0342, "step": 5900 }, { "epoch": 27.15, "learning_rate": 4.835436241610738e-05, "loss": 0.0342, "step": 6000 }, { "epoch": 27.15, "eval_las": 75.9908846748596, "eval_loss": 2.3812143802642822, "eval_runtime": 3.6674, "eval_samples_per_second": 138.245, "eval_steps_per_second": 17.451, "eval_uas": 80.6299340766664, "step": 6000 }, { "epoch": 27.6, "learning_rate": 4.781744966442953e-05, "loss": 0.0296, "step": 6100 }, { "epoch": 28.05, "learning_rate": 4.728053691275168e-05, "loss": 0.0304, "step": 6200 }, { "epoch": 28.51, "learning_rate": 4.674362416107383e-05, "loss": 0.0304, "step": 6300 }, { "epoch": 28.96, "learning_rate": 4.6206711409395976e-05, "loss": 0.0319, "step": 6400 }, { "epoch": 29.41, "learning_rate": 4.5669798657718125e-05, "loss": 0.0264, "step": 6500 }, { "epoch": 29.41, "eval_las": 75.8769431106047, "eval_loss": 2.4017791748046875, "eval_runtime": 3.6647, "eval_samples_per_second": 138.347, "eval_steps_per_second": 17.464, "eval_uas": 80.4671604134451, "step": 6500 }, { "epoch": 29.41, "step": 6500, "total_flos": 3.4624658436900864e+16, "train_loss": 0.30513293038881745, "train_runtime": 3575.6323, "train_samples_per_second": 134.242, "train_steps_per_second": 4.195 } ], "max_steps": 15000, "num_train_epochs": 68, "total_flos": 3.4624658436900864e+16, "trial_name": null, "trial_params": null }