|
{ |
|
"best_metric": 29.294196353905335, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Korean-GSD/checkpoint-1500", |
|
"epoch": 28.985507246376812, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 7.52e-05, |
|
"loss": 4.639, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 7.949530201342283e-05, |
|
"loss": 3.2377, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 7.895838926174497e-05, |
|
"loss": 3.0918, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.842147651006712e-05, |
|
"loss": 2.9885, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 7.788456375838927e-05, |
|
"loss": 2.9165, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"eval_las": 27.86419133634387, |
|
"eval_loss": 2.9203531742095947, |
|
"eval_runtime": 6.1866, |
|
"eval_samples_per_second": 153.558, |
|
"eval_steps_per_second": 19.235, |
|
"eval_uas": 44.94062552266265, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 7.734765100671142e-05, |
|
"loss": 2.8823, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 5.07, |
|
"learning_rate": 7.681073825503357e-05, |
|
"loss": 2.8323, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 5.8, |
|
"learning_rate": 7.627382550335572e-05, |
|
"loss": 2.7932, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 6.52, |
|
"learning_rate": 7.573691275167786e-05, |
|
"loss": 2.753, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"learning_rate": 7.52e-05, |
|
"loss": 2.7026, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_las": 29.118581702625857, |
|
"eval_loss": 2.91562819480896, |
|
"eval_runtime": 6.1737, |
|
"eval_samples_per_second": 153.878, |
|
"eval_steps_per_second": 19.275, |
|
"eval_uas": 46.52115738417796, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 7.97, |
|
"learning_rate": 7.466308724832215e-05, |
|
"loss": 2.6742, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 7.41261744966443e-05, |
|
"loss": 2.5757, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"learning_rate": 7.358926174496644e-05, |
|
"loss": 2.5635, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 10.14, |
|
"learning_rate": 7.305234899328859e-05, |
|
"loss": 2.5617, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"learning_rate": 7.251543624161074e-05, |
|
"loss": 2.4701, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 10.87, |
|
"eval_las": 29.294196353905335, |
|
"eval_loss": 3.0349910259246826, |
|
"eval_runtime": 6.1916, |
|
"eval_samples_per_second": 153.433, |
|
"eval_steps_per_second": 19.219, |
|
"eval_uas": 47.00618832580699, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 11.59, |
|
"learning_rate": 7.197852348993289e-05, |
|
"loss": 2.3668, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 7.144161073825504e-05, |
|
"loss": 2.3771, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 13.04, |
|
"learning_rate": 7.090469798657718e-05, |
|
"loss": 2.3208, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 7.036778523489933e-05, |
|
"loss": 2.2402, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"learning_rate": 6.983087248322148e-05, |
|
"loss": 2.1529, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 14.49, |
|
"eval_las": 29.0600434855327, |
|
"eval_loss": 3.292515277862549, |
|
"eval_runtime": 6.1874, |
|
"eval_samples_per_second": 153.538, |
|
"eval_steps_per_second": 19.233, |
|
"eval_uas": 47.09817695266767, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 15.22, |
|
"learning_rate": 6.929395973154363e-05, |
|
"loss": 2.1538, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 15.94, |
|
"learning_rate": 6.875704697986578e-05, |
|
"loss": 2.0934, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 16.67, |
|
"learning_rate": 6.822013422818793e-05, |
|
"loss": 1.9925, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 17.39, |
|
"learning_rate": 6.768322147651007e-05, |
|
"loss": 1.9471, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"learning_rate": 6.714630872483222e-05, |
|
"loss": 1.9399, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.12, |
|
"eval_las": 28.85934102692758, |
|
"eval_loss": 3.839879274368286, |
|
"eval_runtime": 6.1995, |
|
"eval_samples_per_second": 153.237, |
|
"eval_steps_per_second": 19.195, |
|
"eval_uas": 46.28700451580532, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 18.84, |
|
"learning_rate": 6.660939597315437e-05, |
|
"loss": 1.8651, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 19.57, |
|
"learning_rate": 6.607248322147652e-05, |
|
"loss": 1.7929, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 20.29, |
|
"learning_rate": 6.553557046979867e-05, |
|
"loss": 1.7378, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 21.01, |
|
"learning_rate": 6.499865771812081e-05, |
|
"loss": 1.7862, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"learning_rate": 6.446174496644296e-05, |
|
"loss": 1.6585, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 21.74, |
|
"eval_las": 28.349222277972906, |
|
"eval_loss": 4.253769397735596, |
|
"eval_runtime": 6.1742, |
|
"eval_samples_per_second": 153.865, |
|
"eval_steps_per_second": 19.274, |
|
"eval_uas": 45.63472152533868, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 6.392483221476511e-05, |
|
"loss": 1.6568, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 23.19, |
|
"learning_rate": 6.338791946308726e-05, |
|
"loss": 1.625, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 23.91, |
|
"learning_rate": 6.28510067114094e-05, |
|
"loss": 1.628, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 24.64, |
|
"learning_rate": 6.231409395973154e-05, |
|
"loss": 1.5299, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"learning_rate": 6.177718120805369e-05, |
|
"loss": 1.5284, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 25.36, |
|
"eval_las": 28.6001003512293, |
|
"eval_loss": 4.6269941329956055, |
|
"eval_runtime": 6.1796, |
|
"eval_samples_per_second": 153.733, |
|
"eval_steps_per_second": 19.257, |
|
"eval_uas": 45.37548084964041, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 26.09, |
|
"learning_rate": 6.124026845637584e-05, |
|
"loss": 1.5192, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 26.81, |
|
"learning_rate": 6.070335570469799e-05, |
|
"loss": 1.4848, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 6.0166442953020136e-05, |
|
"loss": 1.4342, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 28.26, |
|
"learning_rate": 5.962953020134229e-05, |
|
"loss": 1.418, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"learning_rate": 5.909261744966444e-05, |
|
"loss": 1.4311, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"eval_las": 28.299046663321626, |
|
"eval_loss": 4.774014472961426, |
|
"eval_runtime": 6.1783, |
|
"eval_samples_per_second": 153.765, |
|
"eval_steps_per_second": 19.261, |
|
"eval_uas": 45.16641578859341, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 28.99, |
|
"step": 4000, |
|
"total_flos": 2.1298234165100544e+16, |
|
"train_loss": 2.234064235687256, |
|
"train_runtime": 2087.3588, |
|
"train_samples_per_second": 229.956, |
|
"train_steps_per_second": 7.186 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 109, |
|
"total_flos": 2.1298234165100544e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|