|
{ |
|
"best_metric": 77.3865625420762, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Arabic-PADT/checkpoint-15000", |
|
"epoch": 78.94736842105263, |
|
"global_step": 15000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 7.52e-05, |
|
"loss": 4.0025, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 7.949530201342283e-05, |
|
"loss": 1.5372, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 7.895838926174497e-05, |
|
"loss": 1.2033, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 7.842147651006712e-05, |
|
"loss": 1.0746, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.788456375838927e-05, |
|
"loss": 0.8869, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_las": 73.72761545711593, |
|
"eval_loss": 1.1565440893173218, |
|
"eval_runtime": 7.1881, |
|
"eval_samples_per_second": 126.459, |
|
"eval_steps_per_second": 15.86, |
|
"eval_uas": 79.98518917463309, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 7.734765100671142e-05, |
|
"loss": 0.8237, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 7.681073825503357e-05, |
|
"loss": 0.6923, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 7.627382550335572e-05, |
|
"loss": 0.6289, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.573691275167786e-05, |
|
"loss": 0.5416, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 7.52e-05, |
|
"loss": 0.4842, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"eval_las": 75.07405412683453, |
|
"eval_loss": 1.3640334606170654, |
|
"eval_runtime": 7.1724, |
|
"eval_samples_per_second": 126.735, |
|
"eval_steps_per_second": 15.894, |
|
"eval_uas": 81.23737713747138, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 7.466308724832215e-05, |
|
"loss": 0.4232, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 7.41261744966443e-05, |
|
"loss": 0.3678, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"learning_rate": 7.358926174496644e-05, |
|
"loss": 0.3381, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 7.37, |
|
"learning_rate": 7.305234899328859e-05, |
|
"loss": 0.2829, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"learning_rate": 7.251543624161074e-05, |
|
"loss": 0.2679, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.89, |
|
"eval_las": 75.22889457385216, |
|
"eval_loss": 1.5819050073623657, |
|
"eval_runtime": 7.1677, |
|
"eval_samples_per_second": 126.819, |
|
"eval_steps_per_second": 15.905, |
|
"eval_uas": 81.27777029756295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 7.197852348993289e-05, |
|
"loss": 0.217, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 8.95, |
|
"learning_rate": 7.144161073825504e-05, |
|
"loss": 0.2221, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 7.090469798657718e-05, |
|
"loss": 0.1718, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 7.036778523489933e-05, |
|
"loss": 0.1889, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 6.983087248322148e-05, |
|
"loss": 0.1429, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_las": 75.5722364346304, |
|
"eval_loss": 1.906549334526062, |
|
"eval_runtime": 7.1719, |
|
"eval_samples_per_second": 126.744, |
|
"eval_steps_per_second": 15.895, |
|
"eval_uas": 81.56052241820385, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 11.05, |
|
"learning_rate": 6.929395973154363e-05, |
|
"loss": 0.1566, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 11.58, |
|
"learning_rate": 6.875704697986578e-05, |
|
"loss": 0.1271, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 12.11, |
|
"learning_rate": 6.822013422818793e-05, |
|
"loss": 0.1319, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 6.768322147651007e-05, |
|
"loss": 0.118, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 6.714630872483222e-05, |
|
"loss": 0.1182, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"eval_las": 75.51164669449307, |
|
"eval_loss": 2.0345656871795654, |
|
"eval_runtime": 7.1813, |
|
"eval_samples_per_second": 126.579, |
|
"eval_steps_per_second": 15.875, |
|
"eval_uas": 81.62447825501549, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 13.68, |
|
"learning_rate": 6.660939597315437e-05, |
|
"loss": 0.1045, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 14.21, |
|
"learning_rate": 6.607248322147652e-05, |
|
"loss": 0.1042, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 14.74, |
|
"learning_rate": 6.553557046979867e-05, |
|
"loss": 0.0959, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 15.26, |
|
"learning_rate": 6.499865771812081e-05, |
|
"loss": 0.0923, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"learning_rate": 6.446174496644296e-05, |
|
"loss": 0.0874, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_las": 76.13774067591221, |
|
"eval_loss": 2.1854023933410645, |
|
"eval_runtime": 7.165, |
|
"eval_samples_per_second": 126.867, |
|
"eval_steps_per_second": 15.911, |
|
"eval_uas": 82.03177595260536, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 16.32, |
|
"learning_rate": 6.392483221476511e-05, |
|
"loss": 0.086, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 16.84, |
|
"learning_rate": 6.338791946308726e-05, |
|
"loss": 0.0831, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 6.28510067114094e-05, |
|
"loss": 0.0752, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 17.89, |
|
"learning_rate": 6.231409395973154e-05, |
|
"loss": 0.0796, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"learning_rate": 6.177718120805369e-05, |
|
"loss": 0.0708, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.42, |
|
"eval_las": 75.79776491180826, |
|
"eval_loss": 2.4079720973968506, |
|
"eval_runtime": 7.1633, |
|
"eval_samples_per_second": 126.897, |
|
"eval_steps_per_second": 15.915, |
|
"eval_uas": 81.9038642789821, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 18.95, |
|
"learning_rate": 6.124026845637584e-05, |
|
"loss": 0.0768, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 19.47, |
|
"learning_rate": 6.070335570469799e-05, |
|
"loss": 0.0663, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 6.0166442953020136e-05, |
|
"loss": 0.0687, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 20.53, |
|
"learning_rate": 5.962953020134229e-05, |
|
"loss": 0.0633, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"learning_rate": 5.909261744966444e-05, |
|
"loss": 0.0681, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_las": 76.15793725595799, |
|
"eval_loss": 2.4060051441192627, |
|
"eval_runtime": 7.167, |
|
"eval_samples_per_second": 126.831, |
|
"eval_steps_per_second": 15.906, |
|
"eval_uas": 82.17315201292581, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 21.58, |
|
"learning_rate": 5.855570469798659e-05, |
|
"loss": 0.0586, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 22.11, |
|
"learning_rate": 5.8018791946308735e-05, |
|
"loss": 0.0635, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 22.63, |
|
"learning_rate": 5.7481879194630884e-05, |
|
"loss": 0.0563, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 23.16, |
|
"learning_rate": 5.694496644295303e-05, |
|
"loss": 0.0586, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"learning_rate": 5.6408053691275166e-05, |
|
"loss": 0.0524, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 23.68, |
|
"eval_las": 75.88528342533998, |
|
"eval_loss": 2.648954153060913, |
|
"eval_runtime": 7.1677, |
|
"eval_samples_per_second": 126.819, |
|
"eval_steps_per_second": 15.905, |
|
"eval_uas": 81.84664063551905, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 24.21, |
|
"learning_rate": 5.5871140939597315e-05, |
|
"loss": 0.0533, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 24.74, |
|
"learning_rate": 5.533422818791946e-05, |
|
"loss": 0.0513, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 25.26, |
|
"learning_rate": 5.479731543624161e-05, |
|
"loss": 0.0538, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 25.79, |
|
"learning_rate": 5.426040268456376e-05, |
|
"loss": 0.0531, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 5.372348993288591e-05, |
|
"loss": 0.0516, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"eval_las": 76.18823212602666, |
|
"eval_loss": 2.6080164909362793, |
|
"eval_runtime": 7.1678, |
|
"eval_samples_per_second": 126.817, |
|
"eval_steps_per_second": 15.904, |
|
"eval_uas": 82.22027736636596, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 26.84, |
|
"learning_rate": 5.3186577181208056e-05, |
|
"loss": 0.0459, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 27.37, |
|
"learning_rate": 5.2649664429530204e-05, |
|
"loss": 0.0466, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 27.89, |
|
"learning_rate": 5.211275167785235e-05, |
|
"loss": 0.0503, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 28.42, |
|
"learning_rate": 5.15758389261745e-05, |
|
"loss": 0.0461, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"learning_rate": 5.1038926174496656e-05, |
|
"loss": 0.0452, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 28.95, |
|
"eval_las": 76.16803554598089, |
|
"eval_loss": 2.6569535732269287, |
|
"eval_runtime": 7.1554, |
|
"eval_samples_per_second": 127.037, |
|
"eval_steps_per_second": 15.932, |
|
"eval_uas": 82.10246398276558, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 29.47, |
|
"learning_rate": 5.050201342281879e-05, |
|
"loss": 0.0445, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 4.996510067114094e-05, |
|
"loss": 0.0416, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 30.53, |
|
"learning_rate": 4.942818791946309e-05, |
|
"loss": 0.0408, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 31.05, |
|
"learning_rate": 4.8891275167785235e-05, |
|
"loss": 0.0405, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 31.58, |
|
"learning_rate": 4.835436241610738e-05, |
|
"loss": 0.0398, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 31.58, |
|
"eval_las": 76.00982900228894, |
|
"eval_loss": 2.7657480239868164, |
|
"eval_runtime": 7.157, |
|
"eval_samples_per_second": 127.008, |
|
"eval_steps_per_second": 15.928, |
|
"eval_uas": 82.13949104618284, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 32.11, |
|
"learning_rate": 4.781744966442953e-05, |
|
"loss": 0.0411, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 32.63, |
|
"learning_rate": 4.728053691275168e-05, |
|
"loss": 0.037, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 33.16, |
|
"learning_rate": 4.674362416107383e-05, |
|
"loss": 0.0371, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 33.68, |
|
"learning_rate": 4.6206711409395976e-05, |
|
"loss": 0.0383, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 34.21, |
|
"learning_rate": 4.5669798657718125e-05, |
|
"loss": 0.0374, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 34.21, |
|
"eval_las": 76.46761814999327, |
|
"eval_loss": 2.943580389022827, |
|
"eval_runtime": 7.1627, |
|
"eval_samples_per_second": 126.908, |
|
"eval_steps_per_second": 15.916, |
|
"eval_uas": 82.3751178133836, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 34.74, |
|
"learning_rate": 4.513288590604027e-05, |
|
"loss": 0.0355, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 35.26, |
|
"learning_rate": 4.459597315436242e-05, |
|
"loss": 0.0338, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 35.79, |
|
"learning_rate": 4.405906040268456e-05, |
|
"loss": 0.0354, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 36.32, |
|
"learning_rate": 4.352214765100671e-05, |
|
"loss": 0.0328, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 36.84, |
|
"learning_rate": 4.298523489932886e-05, |
|
"loss": 0.0363, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 36.84, |
|
"eval_las": 76.36663524976437, |
|
"eval_loss": 2.909515857696533, |
|
"eval_runtime": 7.1643, |
|
"eval_samples_per_second": 126.879, |
|
"eval_steps_per_second": 15.912, |
|
"eval_uas": 82.31452807324627, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"learning_rate": 4.244832214765101e-05, |
|
"loss": 0.0329, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 37.89, |
|
"learning_rate": 4.1911409395973156e-05, |
|
"loss": 0.0319, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 38.42, |
|
"learning_rate": 4.1374496644295304e-05, |
|
"loss": 0.0321, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 38.95, |
|
"learning_rate": 4.083758389261745e-05, |
|
"loss": 0.033, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 4.03006711409396e-05, |
|
"loss": 0.0312, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"eval_las": 76.26901844620978, |
|
"eval_loss": 3.006049633026123, |
|
"eval_runtime": 7.1637, |
|
"eval_samples_per_second": 126.89, |
|
"eval_steps_per_second": 15.914, |
|
"eval_uas": 82.08226740271981, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 3.976375838926175e-05, |
|
"loss": 0.0305, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 40.53, |
|
"learning_rate": 3.92268456375839e-05, |
|
"loss": 0.0284, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 41.05, |
|
"learning_rate": 3.8689932885906045e-05, |
|
"loss": 0.0301, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 41.58, |
|
"learning_rate": 3.815302013422819e-05, |
|
"loss": 0.0295, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"learning_rate": 3.761610738255034e-05, |
|
"loss": 0.0291, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"eval_las": 76.41039450653024, |
|
"eval_loss": 3.065589189529419, |
|
"eval_runtime": 7.1599, |
|
"eval_samples_per_second": 126.957, |
|
"eval_steps_per_second": 15.922, |
|
"eval_uas": 82.25730442978322, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 42.63, |
|
"learning_rate": 3.707919463087249e-05, |
|
"loss": 0.0259, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 43.16, |
|
"learning_rate": 3.654228187919463e-05, |
|
"loss": 0.0287, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 43.68, |
|
"learning_rate": 3.600536912751678e-05, |
|
"loss": 0.0262, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 44.21, |
|
"learning_rate": 3.546845637583893e-05, |
|
"loss": 0.026, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 44.74, |
|
"learning_rate": 3.4931543624161076e-05, |
|
"loss": 0.027, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 44.74, |
|
"eval_las": 76.36326915309007, |
|
"eval_loss": 3.132659912109375, |
|
"eval_runtime": 7.1605, |
|
"eval_samples_per_second": 126.947, |
|
"eval_steps_per_second": 15.921, |
|
"eval_uas": 82.23037565638886, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 45.26, |
|
"learning_rate": 3.4394630872483224e-05, |
|
"loss": 0.0248, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 45.79, |
|
"learning_rate": 3.385771812080537e-05, |
|
"loss": 0.0245, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 46.32, |
|
"learning_rate": 3.332080536912752e-05, |
|
"loss": 0.025, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 46.84, |
|
"learning_rate": 3.278389261744967e-05, |
|
"loss": 0.0253, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"learning_rate": 3.224697986577182e-05, |
|
"loss": 0.0244, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"eval_las": 76.79749562407432, |
|
"eval_loss": 3.1936397552490234, |
|
"eval_runtime": 7.1684, |
|
"eval_samples_per_second": 126.807, |
|
"eval_steps_per_second": 15.903, |
|
"eval_uas": 82.52995826040123, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 47.89, |
|
"learning_rate": 3.1710067114093965e-05, |
|
"loss": 0.0252, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 48.42, |
|
"learning_rate": 3.1173154362416114e-05, |
|
"loss": 0.0219, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 48.95, |
|
"learning_rate": 3.0636241610738255e-05, |
|
"loss": 0.0253, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 49.47, |
|
"learning_rate": 3.0099328859060403e-05, |
|
"loss": 0.0233, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 2.956241610738255e-05, |
|
"loss": 0.0231, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_las": 76.79412952740002, |
|
"eval_loss": 3.195754051208496, |
|
"eval_runtime": 7.1619, |
|
"eval_samples_per_second": 126.921, |
|
"eval_steps_per_second": 15.917, |
|
"eval_uas": 82.52659216372695, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 50.53, |
|
"learning_rate": 2.90255033557047e-05, |
|
"loss": 0.0204, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 51.05, |
|
"learning_rate": 2.8488590604026848e-05, |
|
"loss": 0.0247, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 51.58, |
|
"learning_rate": 2.7951677852348996e-05, |
|
"loss": 0.0217, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 52.11, |
|
"learning_rate": 2.741476510067114e-05, |
|
"loss": 0.0217, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 2.687785234899329e-05, |
|
"loss": 0.021, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"eval_las": 76.66621785377676, |
|
"eval_loss": 3.3272011280059814, |
|
"eval_runtime": 7.1591, |
|
"eval_samples_per_second": 126.971, |
|
"eval_steps_per_second": 15.924, |
|
"eval_uas": 82.59728019388717, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 53.16, |
|
"learning_rate": 2.6340939597315438e-05, |
|
"loss": 0.0197, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 53.68, |
|
"learning_rate": 2.5804026845637586e-05, |
|
"loss": 0.0229, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 54.21, |
|
"learning_rate": 2.5267114093959734e-05, |
|
"loss": 0.0196, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 54.74, |
|
"learning_rate": 2.4730201342281882e-05, |
|
"loss": 0.0211, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 55.26, |
|
"learning_rate": 2.4193288590604027e-05, |
|
"loss": 0.0198, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 55.26, |
|
"eval_las": 76.77056685067996, |
|
"eval_loss": 3.3302974700927734, |
|
"eval_runtime": 7.1625, |
|
"eval_samples_per_second": 126.911, |
|
"eval_steps_per_second": 15.916, |
|
"eval_uas": 82.58718190386428, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 55.79, |
|
"learning_rate": 2.3656375838926175e-05, |
|
"loss": 0.0207, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 56.32, |
|
"learning_rate": 2.3119463087248324e-05, |
|
"loss": 0.0184, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 56.84, |
|
"learning_rate": 2.2582550335570472e-05, |
|
"loss": 0.0197, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 57.37, |
|
"learning_rate": 2.204563758389262e-05, |
|
"loss": 0.0197, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 57.89, |
|
"learning_rate": 2.150872483221477e-05, |
|
"loss": 0.02, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 57.89, |
|
"eval_las": 76.88838023428033, |
|
"eval_loss": 3.3782100677490234, |
|
"eval_runtime": 7.1625, |
|
"eval_samples_per_second": 126.91, |
|
"eval_steps_per_second": 15.916, |
|
"eval_uas": 82.61411067725865, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 58.42, |
|
"learning_rate": 2.0971812080536913e-05, |
|
"loss": 0.0192, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 58.95, |
|
"learning_rate": 2.043489932885906e-05, |
|
"loss": 0.0182, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 59.47, |
|
"learning_rate": 1.989798657718121e-05, |
|
"loss": 0.0186, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"learning_rate": 1.9361073825503358e-05, |
|
"loss": 0.0178, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 60.53, |
|
"learning_rate": 1.8824161073825503e-05, |
|
"loss": 0.0165, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 60.53, |
|
"eval_las": 77.07351555136664, |
|
"eval_loss": 3.4528448581695557, |
|
"eval_runtime": 7.1527, |
|
"eval_samples_per_second": 127.085, |
|
"eval_steps_per_second": 15.938, |
|
"eval_uas": 82.7622189309277, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 61.05, |
|
"learning_rate": 1.828724832214765e-05, |
|
"loss": 0.0195, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 61.58, |
|
"learning_rate": 1.77503355704698e-05, |
|
"loss": 0.019, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 62.11, |
|
"learning_rate": 1.7213422818791948e-05, |
|
"loss": 0.0157, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 62.63, |
|
"learning_rate": 1.6676510067114096e-05, |
|
"loss": 0.0174, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 63.16, |
|
"learning_rate": 1.6139597315436244e-05, |
|
"loss": 0.019, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 63.16, |
|
"eval_las": 76.99946142453211, |
|
"eval_loss": 3.499229669570923, |
|
"eval_runtime": 7.1524, |
|
"eval_samples_per_second": 127.09, |
|
"eval_steps_per_second": 15.939, |
|
"eval_uas": 82.73865625420763, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 63.68, |
|
"learning_rate": 1.560268456375839e-05, |
|
"loss": 0.0151, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 64.21, |
|
"learning_rate": 1.5065771812080539e-05, |
|
"loss": 0.0175, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 64.74, |
|
"learning_rate": 1.4528859060402685e-05, |
|
"loss": 0.0165, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 65.26, |
|
"learning_rate": 1.3991946308724834e-05, |
|
"loss": 0.0184, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"learning_rate": 1.3455033557046982e-05, |
|
"loss": 0.0175, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"eval_las": 77.08361384138952, |
|
"eval_loss": 3.48171067237854, |
|
"eval_runtime": 7.1671, |
|
"eval_samples_per_second": 126.829, |
|
"eval_steps_per_second": 15.906, |
|
"eval_uas": 82.72182577083615, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 66.32, |
|
"learning_rate": 1.2918120805369127e-05, |
|
"loss": 0.0173, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 66.84, |
|
"learning_rate": 1.2381208053691277e-05, |
|
"loss": 0.0157, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 67.37, |
|
"learning_rate": 1.1844295302013425e-05, |
|
"loss": 0.017, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 67.89, |
|
"learning_rate": 1.130738255033557e-05, |
|
"loss": 0.0155, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 68.42, |
|
"learning_rate": 1.0770469798657718e-05, |
|
"loss": 0.0162, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 68.42, |
|
"eval_las": 77.04322068129797, |
|
"eval_loss": 3.5367863178253174, |
|
"eval_runtime": 7.1597, |
|
"eval_samples_per_second": 126.961, |
|
"eval_steps_per_second": 15.923, |
|
"eval_uas": 82.68143261074458, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 68.95, |
|
"learning_rate": 1.0233557046979868e-05, |
|
"loss": 0.0163, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 69.47, |
|
"learning_rate": 9.696644295302015e-06, |
|
"loss": 0.018, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 70.0, |
|
"learning_rate": 9.159731543624161e-06, |
|
"loss": 0.0148, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 70.53, |
|
"learning_rate": 8.62281879194631e-06, |
|
"loss": 0.0146, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 71.05, |
|
"learning_rate": 8.085906040268458e-06, |
|
"loss": 0.0173, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 71.05, |
|
"eval_las": 77.18123064494412, |
|
"eval_loss": 3.553845167160034, |
|
"eval_runtime": 7.1565, |
|
"eval_samples_per_second": 127.017, |
|
"eval_steps_per_second": 15.93, |
|
"eval_uas": 82.84637134778511, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 71.58, |
|
"learning_rate": 7.548993288590605e-06, |
|
"loss": 0.0152, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 72.11, |
|
"learning_rate": 7.012080536912752e-06, |
|
"loss": 0.0169, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 72.63, |
|
"learning_rate": 6.475167785234899e-06, |
|
"loss": 0.0154, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 73.16, |
|
"learning_rate": 5.938255033557048e-06, |
|
"loss": 0.0155, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 73.68, |
|
"learning_rate": 5.401342281879195e-06, |
|
"loss": 0.017, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 73.68, |
|
"eval_las": 77.27884744849872, |
|
"eval_loss": 3.5654006004333496, |
|
"eval_runtime": 7.1577, |
|
"eval_samples_per_second": 126.996, |
|
"eval_steps_per_second": 15.927, |
|
"eval_uas": 82.85983573448229, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 74.21, |
|
"learning_rate": 4.864429530201343e-06, |
|
"loss": 0.0154, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 74.74, |
|
"learning_rate": 4.32751677852349e-06, |
|
"loss": 0.0161, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 75.26, |
|
"learning_rate": 3.790604026845638e-06, |
|
"loss": 0.0153, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 75.79, |
|
"learning_rate": 3.253691275167786e-06, |
|
"loss": 0.016, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 76.32, |
|
"learning_rate": 2.716778523489933e-06, |
|
"loss": 0.0154, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 76.32, |
|
"eval_las": 77.34616938198465, |
|
"eval_loss": 3.572803497314453, |
|
"eval_runtime": 7.1716, |
|
"eval_samples_per_second": 126.75, |
|
"eval_steps_per_second": 15.896, |
|
"eval_uas": 82.89349670122526, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 76.84, |
|
"learning_rate": 2.1798657718120807e-06, |
|
"loss": 0.0158, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 77.37, |
|
"learning_rate": 1.6429530201342283e-06, |
|
"loss": 0.0156, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 77.89, |
|
"learning_rate": 1.1060402684563759e-06, |
|
"loss": 0.0163, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 78.42, |
|
"learning_rate": 5.691275167785235e-07, |
|
"loss": 0.0151, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 3.2214765100671145e-08, |
|
"loss": 0.0149, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"eval_las": 77.3865625420762, |
|
"eval_loss": 3.5840232372283936, |
|
"eval_runtime": 7.1573, |
|
"eval_samples_per_second": 127.003, |
|
"eval_steps_per_second": 15.928, |
|
"eval_uas": 82.93052376464252, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"step": 15000, |
|
"total_flos": 8.008377828590592e+16, |
|
"train_loss": 0.12927290275096892, |
|
"train_runtime": 8965.3483, |
|
"train_samples_per_second": 53.539, |
|
"train_steps_per_second": 1.673 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 79, |
|
"total_flos": 8.008377828590592e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|