{ "best_metric": 90.49664002544833, "best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-English-EWT/checkpoint-7500", "epoch": 25.510204081632654, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 7.52e-05, "loss": 3.6373, "step": 100 }, { "epoch": 0.51, "learning_rate": 7.949530201342283e-05, "loss": 0.7868, "step": 200 }, { "epoch": 0.77, "learning_rate": 7.895838926174497e-05, "loss": 0.6003, "step": 300 }, { "epoch": 1.02, "learning_rate": 7.842147651006712e-05, "loss": 0.4937, "step": 400 }, { "epoch": 1.28, "learning_rate": 7.788456375838927e-05, "loss": 0.3314, "step": 500 }, { "epoch": 1.28, "eval_las": 88.67151775418506, "eval_loss": 0.4770449995994568, "eval_runtime": 12.8846, "eval_samples_per_second": 155.301, "eval_steps_per_second": 19.481, "eval_uas": 91.08513260964651, "step": 500 }, { "epoch": 1.53, "learning_rate": 7.734765100671142e-05, "loss": 0.3157, "step": 600 }, { "epoch": 1.79, "learning_rate": 7.681073825503357e-05, "loss": 0.3354, "step": 700 }, { "epoch": 2.04, "learning_rate": 7.627382550335572e-05, "loss": 0.2813, "step": 800 }, { "epoch": 2.3, "learning_rate": 7.573691275167786e-05, "loss": 0.1771, "step": 900 }, { "epoch": 2.55, "learning_rate": 7.52e-05, "loss": 0.1868, "step": 1000 }, { "epoch": 2.55, "eval_las": 89.37532307447611, "eval_loss": 0.5148924589157104, "eval_runtime": 12.9189, "eval_samples_per_second": 154.889, "eval_steps_per_second": 19.429, "eval_uas": 91.68953039882301, "step": 1000 }, { "epoch": 2.81, "learning_rate": 7.466308724832215e-05, "loss": 0.18, "step": 1100 }, { "epoch": 3.06, "learning_rate": 7.41261744966443e-05, "loss": 0.1798, "step": 1200 }, { "epoch": 3.32, "learning_rate": 7.358926174496644e-05, "loss": 0.1099, "step": 1300 }, { "epoch": 3.57, "learning_rate": 7.305234899328859e-05, "loss": 0.1302, "step": 1400 }, { "epoch": 3.83, "learning_rate": 7.251543624161074e-05, "loss": 0.1241, "step": 1500 }, { "epoch": 3.83, "eval_las": 89.86440812755974, "eval_loss": 0.5785237550735474, "eval_runtime": 12.921, "eval_samples_per_second": 154.864, "eval_steps_per_second": 19.426, "eval_uas": 92.11499463199331, "step": 1500 }, { "epoch": 4.08, "learning_rate": 7.197852348993289e-05, "loss": 0.1088, "step": 1600 }, { "epoch": 4.34, "learning_rate": 7.144161073825504e-05, "loss": 0.086, "step": 1700 }, { "epoch": 4.59, "learning_rate": 7.090469798657718e-05, "loss": 0.0918, "step": 1800 }, { "epoch": 4.85, "learning_rate": 7.036778523489933e-05, "loss": 0.092, "step": 1900 }, { "epoch": 5.1, "learning_rate": 6.983087248322148e-05, "loss": 0.0802, "step": 2000 }, { "epoch": 5.1, "eval_las": 90.06322318978886, "eval_loss": 0.7516521215438843, "eval_runtime": 12.9274, "eval_samples_per_second": 154.787, "eval_steps_per_second": 19.416, "eval_uas": 92.23825997057537, "step": 2000 }, { "epoch": 5.36, "learning_rate": 6.929395973154363e-05, "loss": 0.0653, "step": 2100 }, { "epoch": 5.61, "learning_rate": 6.875704697986578e-05, "loss": 0.0647, "step": 2200 }, { "epoch": 5.87, "learning_rate": 6.822013422818793e-05, "loss": 0.0753, "step": 2300 }, { "epoch": 6.12, "learning_rate": 6.768322147651007e-05, "loss": 0.0639, "step": 2400 }, { "epoch": 6.38, "learning_rate": 6.714630872483222e-05, "loss": 0.0542, "step": 2500 }, { "epoch": 6.38, "eval_las": 90.0552705872997, "eval_loss": 0.7744932174682617, "eval_runtime": 12.9222, "eval_samples_per_second": 154.85, "eval_steps_per_second": 19.424, "eval_uas": 92.34164380293451, "step": 2500 }, { "epoch": 6.63, "learning_rate": 6.660939597315437e-05, "loss": 0.0525, "step": 2600 }, { "epoch": 6.89, "learning_rate": 6.607248322147652e-05, "loss": 0.0607, "step": 2700 }, { "epoch": 7.14, "learning_rate": 6.553557046979867e-05, "loss": 0.0499, "step": 2800 }, { "epoch": 7.4, "learning_rate": 6.499865771812081e-05, "loss": 0.0475, "step": 2900 }, { "epoch": 7.65, "learning_rate": 6.446174496644296e-05, "loss": 0.0488, "step": 3000 }, { "epoch": 7.65, "eval_las": 90.06322318978886, "eval_loss": 0.6991102695465088, "eval_runtime": 12.8997, "eval_samples_per_second": 155.119, "eval_steps_per_second": 19.458, "eval_uas": 92.27007038053203, "step": 3000 }, { "epoch": 7.91, "learning_rate": 6.392483221476511e-05, "loss": 0.0498, "step": 3100 }, { "epoch": 8.16, "learning_rate": 6.338791946308726e-05, "loss": 0.0452, "step": 3200 }, { "epoch": 8.42, "learning_rate": 6.28510067114094e-05, "loss": 0.0423, "step": 3300 }, { "epoch": 8.67, "learning_rate": 6.231409395973154e-05, "loss": 0.0434, "step": 3400 }, { "epoch": 8.93, "learning_rate": 6.177718120805369e-05, "loss": 0.0417, "step": 3500 }, { "epoch": 8.93, "eval_las": 90.22227523957214, "eval_loss": 0.8318419456481934, "eval_runtime": 12.8987, "eval_samples_per_second": 155.132, "eval_steps_per_second": 19.459, "eval_uas": 92.35357270666825, "step": 3500 }, { "epoch": 9.18, "learning_rate": 6.124026845637584e-05, "loss": 0.0391, "step": 3600 }, { "epoch": 9.44, "learning_rate": 6.070335570469799e-05, "loss": 0.0369, "step": 3700 }, { "epoch": 9.69, "learning_rate": 6.0166442953020136e-05, "loss": 0.0329, "step": 3800 }, { "epoch": 9.95, "learning_rate": 5.962953020134229e-05, "loss": 0.0329, "step": 3900 }, { "epoch": 10.2, "learning_rate": 5.909261744966444e-05, "loss": 0.0289, "step": 4000 }, { "epoch": 10.2, "eval_las": 90.11491510596844, "eval_loss": 0.9339661002159119, "eval_runtime": 12.8923, "eval_samples_per_second": 155.209, "eval_steps_per_second": 19.469, "eval_uas": 92.26211777804286, "step": 4000 }, { "epoch": 10.46, "learning_rate": 5.855570469798659e-05, "loss": 0.0311, "step": 4100 }, { "epoch": 10.71, "learning_rate": 5.8018791946308735e-05, "loss": 0.0294, "step": 4200 }, { "epoch": 10.97, "learning_rate": 5.7481879194630884e-05, "loss": 0.0312, "step": 4300 }, { "epoch": 11.22, "learning_rate": 5.694496644295303e-05, "loss": 0.0226, "step": 4400 }, { "epoch": 11.48, "learning_rate": 5.6408053691275166e-05, "loss": 0.0249, "step": 4500 }, { "epoch": 11.48, "eval_las": 90.21829893832756, "eval_loss": 0.850771963596344, "eval_runtime": 12.8981, "eval_samples_per_second": 155.139, "eval_steps_per_second": 19.46, "eval_uas": 92.34562010417909, "step": 4500 }, { "epoch": 11.73, "learning_rate": 5.5871140939597315e-05, "loss": 0.025, "step": 4600 }, { "epoch": 11.99, "learning_rate": 5.533422818791946e-05, "loss": 0.0297, "step": 4700 }, { "epoch": 12.24, "learning_rate": 5.479731543624161e-05, "loss": 0.022, "step": 4800 }, { "epoch": 12.5, "learning_rate": 5.426040268456376e-05, "loss": 0.0237, "step": 4900 }, { "epoch": 12.76, "learning_rate": 5.372348993288591e-05, "loss": 0.0244, "step": 5000 }, { "epoch": 12.76, "eval_las": 90.2779434569963, "eval_loss": 0.84713214635849, "eval_runtime": 12.9629, "eval_samples_per_second": 154.363, "eval_steps_per_second": 19.363, "eval_uas": 92.48876694898406, "step": 5000 }, { "epoch": 13.01, "learning_rate": 5.3186577181208056e-05, "loss": 0.0255, "step": 5100 }, { "epoch": 13.27, "learning_rate": 5.2649664429530204e-05, "loss": 0.0175, "step": 5200 }, { "epoch": 13.52, "learning_rate": 5.211275167785235e-05, "loss": 0.0285, "step": 5300 }, { "epoch": 13.78, "learning_rate": 5.15758389261745e-05, "loss": 0.0238, "step": 5400 }, { "epoch": 14.03, "learning_rate": 5.1038926174496656e-05, "loss": 0.0214, "step": 5500 }, { "epoch": 14.03, "eval_las": 89.99960236987555, "eval_loss": 0.9246010780334473, "eval_runtime": 12.9247, "eval_samples_per_second": 154.82, "eval_steps_per_second": 19.42, "eval_uas": 92.1627102469283, "step": 5500 }, { "epoch": 14.29, "learning_rate": 5.050201342281879e-05, "loss": 0.0179, "step": 5600 }, { "epoch": 14.54, "learning_rate": 4.996510067114094e-05, "loss": 0.0192, "step": 5700 }, { "epoch": 14.8, "learning_rate": 4.942818791946309e-05, "loss": 0.0218, "step": 5800 }, { "epoch": 15.05, "learning_rate": 4.8891275167785235e-05, "loss": 0.0215, "step": 5900 }, { "epoch": 15.31, "learning_rate": 4.835436241610738e-05, "loss": 0.0165, "step": 6000 }, { "epoch": 15.31, "eval_las": 89.90814744125015, "eval_loss": 1.003578782081604, "eval_runtime": 12.9261, "eval_samples_per_second": 154.803, "eval_steps_per_second": 19.418, "eval_uas": 92.17066284941747, "step": 6000 }, { "epoch": 15.56, "learning_rate": 4.781744966442953e-05, "loss": 0.0166, "step": 6100 }, { "epoch": 15.82, "learning_rate": 4.728053691275168e-05, "loss": 0.0184, "step": 6200 }, { "epoch": 16.07, "learning_rate": 4.674362416107383e-05, "loss": 0.0174, "step": 6300 }, { "epoch": 16.33, "learning_rate": 4.6206711409395976e-05, "loss": 0.0139, "step": 6400 }, { "epoch": 16.58, "learning_rate": 4.5669798657718125e-05, "loss": 0.015, "step": 6500 }, { "epoch": 16.58, "eval_las": 90.1904648296155, "eval_loss": 1.006374478340149, "eval_runtime": 12.9169, "eval_samples_per_second": 154.913, "eval_steps_per_second": 19.432, "eval_uas": 92.30585709173327, "step": 6500 }, { "epoch": 16.84, "learning_rate": 4.513288590604027e-05, "loss": 0.0166, "step": 6600 }, { "epoch": 17.09, "learning_rate": 4.459597315436242e-05, "loss": 0.0142, "step": 6700 }, { "epoch": 17.35, "learning_rate": 4.405906040268456e-05, "loss": 0.0144, "step": 6800 }, { "epoch": 17.6, "learning_rate": 4.352214765100671e-05, "loss": 0.0115, "step": 6900 }, { "epoch": 17.86, "learning_rate": 4.298523489932886e-05, "loss": 0.0135, "step": 7000 }, { "epoch": 17.86, "eval_las": 90.13479661219134, "eval_loss": 1.039555549621582, "eval_runtime": 12.9436, "eval_samples_per_second": 154.594, "eval_steps_per_second": 19.392, "eval_uas": 92.26211777804286, "step": 7000 }, { "epoch": 18.11, "learning_rate": 4.244832214765101e-05, "loss": 0.0117, "step": 7100 }, { "epoch": 18.37, "learning_rate": 4.1911409395973156e-05, "loss": 0.0137, "step": 7200 }, { "epoch": 18.62, "learning_rate": 4.1374496644295304e-05, "loss": 0.0107, "step": 7300 }, { "epoch": 18.88, "learning_rate": 4.083758389261745e-05, "loss": 0.0113, "step": 7400 }, { "epoch": 19.13, "learning_rate": 4.03006711409396e-05, "loss": 0.0118, "step": 7500 }, { "epoch": 19.13, "eval_las": 90.49664002544833, "eval_loss": 1.0788443088531494, "eval_runtime": 12.946, "eval_samples_per_second": 154.565, "eval_steps_per_second": 19.388, "eval_uas": 92.62793749254443, "step": 7500 }, { "epoch": 19.39, "learning_rate": 3.976375838926175e-05, "loss": 0.0192, "step": 7600 }, { "epoch": 19.64, "learning_rate": 3.92268456375839e-05, "loss": 0.0116, "step": 7700 }, { "epoch": 19.9, "learning_rate": 3.8689932885906045e-05, "loss": 0.0117, "step": 7800 }, { "epoch": 20.15, "learning_rate": 3.815302013422819e-05, "loss": 0.0142, "step": 7900 }, { "epoch": 20.41, "learning_rate": 3.761610738255034e-05, "loss": 0.0089, "step": 8000 }, { "epoch": 20.41, "eval_las": 90.34156427690961, "eval_loss": 1.065735936164856, "eval_runtime": 12.9533, "eval_samples_per_second": 154.478, "eval_steps_per_second": 19.377, "eval_uas": 92.4092409240924, "step": 8000 }, { "epoch": 20.66, "learning_rate": 3.707919463087249e-05, "loss": 0.0121, "step": 8100 }, { "epoch": 20.92, "learning_rate": 3.654228187919463e-05, "loss": 0.0112, "step": 8200 }, { "epoch": 21.17, "learning_rate": 3.600536912751678e-05, "loss": 0.0079, "step": 8300 }, { "epoch": 21.43, "learning_rate": 3.546845637583893e-05, "loss": 0.009, "step": 8400 }, { "epoch": 21.68, "learning_rate": 3.4931543624161076e-05, "loss": 0.0092, "step": 8500 }, { "epoch": 21.68, "eval_las": 90.38927989184461, "eval_loss": 1.0594043731689453, "eval_runtime": 12.978, "eval_samples_per_second": 154.184, "eval_steps_per_second": 19.34, "eval_uas": 92.49671955147322, "step": 8500 }, { "epoch": 21.94, "learning_rate": 3.4394630872483224e-05, "loss": 0.0064, "step": 8600 }, { "epoch": 22.19, "learning_rate": 3.385771812080537e-05, "loss": 0.0081, "step": 8700 }, { "epoch": 22.45, "learning_rate": 3.332080536912752e-05, "loss": 0.0079, "step": 8800 }, { "epoch": 22.7, "learning_rate": 3.278389261744967e-05, "loss": 0.0085, "step": 8900 }, { "epoch": 22.96, "learning_rate": 3.224697986577182e-05, "loss": 0.0089, "step": 9000 }, { "epoch": 22.96, "eval_las": 90.47675851922541, "eval_loss": 1.0339850187301636, "eval_runtime": 12.9806, "eval_samples_per_second": 154.153, "eval_steps_per_second": 19.337, "eval_uas": 92.54045886516363, "step": 9000 }, { "epoch": 23.21, "learning_rate": 3.1710067114093965e-05, "loss": 0.0068, "step": 9100 }, { "epoch": 23.47, "learning_rate": 3.1173154362416114e-05, "loss": 0.0047, "step": 9200 }, { "epoch": 23.72, "learning_rate": 3.0636241610738255e-05, "loss": 0.0064, "step": 9300 }, { "epoch": 23.98, "learning_rate": 3.0099328859060403e-05, "loss": 0.0071, "step": 9400 }, { "epoch": 24.23, "learning_rate": 2.956241610738255e-05, "loss": 0.0056, "step": 9500 }, { "epoch": 24.23, "eval_las": 90.49266372420375, "eval_loss": 1.0786528587341309, "eval_runtime": 12.983, "eval_samples_per_second": 154.125, "eval_steps_per_second": 19.333, "eval_uas": 92.57624557636487, "step": 9500 }, { "epoch": 24.49, "learning_rate": 2.90255033557047e-05, "loss": 0.0072, "step": 9600 }, { "epoch": 24.74, "learning_rate": 2.8488590604026848e-05, "loss": 0.0055, "step": 9700 }, { "epoch": 25.0, "learning_rate": 2.7951677852348996e-05, "loss": 0.0074, "step": 9800 }, { "epoch": 25.26, "learning_rate": 2.741476510067114e-05, "loss": 0.004, "step": 9900 }, { "epoch": 25.51, "learning_rate": 2.687785234899329e-05, "loss": 0.0045, "step": 10000 }, { "epoch": 25.51, "eval_las": 90.43301920553502, "eval_loss": 1.2013182640075684, "eval_runtime": 12.9849, "eval_samples_per_second": 154.102, "eval_steps_per_second": 19.33, "eval_uas": 92.4768380452503, "step": 10000 }, { "epoch": 25.51, "step": 10000, "total_flos": 5.34284250892032e+16, "train_loss": 0.1020653991997242, "train_runtime": 5461.7633, "train_samples_per_second": 87.884, "train_steps_per_second": 2.746 } ], "max_steps": 15000, "num_train_epochs": 39, "total_flos": 5.34284250892032e+16, "trial_name": null, "trial_params": null }