|
{ |
|
"best_metric": 18.92319873317498, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/bert/bert-base-finetuned-parsing-ud-Tamil-TTB/checkpoint-500", |
|
"epoch": 230.76923076923077, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.52e-05, |
|
"loss": 4.575, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 7.949530201342283e-05, |
|
"loss": 2.7427, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 7.895838926174497e-05, |
|
"loss": 1.7932, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 7.842147651006712e-05, |
|
"loss": 1.1589, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 7.788456375838927e-05, |
|
"loss": 0.787, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_las": 18.92319873317498, |
|
"eval_loss": 8.03709888458252, |
|
"eval_runtime": 0.5456, |
|
"eval_samples_per_second": 146.634, |
|
"eval_steps_per_second": 18.329, |
|
"eval_uas": 36.18368962787015, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 7.734765100671142e-05, |
|
"loss": 0.5851, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 53.85, |
|
"learning_rate": 7.681073825503357e-05, |
|
"loss": 0.4576, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 7.627382550335572e-05, |
|
"loss": 0.3946, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 7.573691275167786e-05, |
|
"loss": 0.3746, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 7.52e-05, |
|
"loss": 0.358, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_las": 16.943784639746635, |
|
"eval_loss": 11.071226119995117, |
|
"eval_runtime": 0.5411, |
|
"eval_samples_per_second": 147.855, |
|
"eval_steps_per_second": 18.482, |
|
"eval_uas": 33.966745843230406, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 84.62, |
|
"learning_rate": 7.466308724832215e-05, |
|
"loss": 0.3352, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 92.31, |
|
"learning_rate": 7.41261744966443e-05, |
|
"loss": 0.3213, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"learning_rate": 7.358926174496644e-05, |
|
"loss": 0.322, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 107.69, |
|
"learning_rate": 7.305234899328859e-05, |
|
"loss": 0.3044, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"learning_rate": 7.251543624161074e-05, |
|
"loss": 0.2942, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 115.38, |
|
"eval_las": 17.02296120348377, |
|
"eval_loss": 12.35420036315918, |
|
"eval_runtime": 0.542, |
|
"eval_samples_per_second": 147.589, |
|
"eval_steps_per_second": 18.449, |
|
"eval_uas": 34.837688044338876, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 123.08, |
|
"learning_rate": 7.197852348993289e-05, |
|
"loss": 0.3028, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 130.77, |
|
"learning_rate": 7.144161073825504e-05, |
|
"loss": 0.2954, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 138.46, |
|
"learning_rate": 7.090469798657718e-05, |
|
"loss": 0.2822, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 146.15, |
|
"learning_rate": 7.036778523489933e-05, |
|
"loss": 0.2814, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"learning_rate": 6.983087248322148e-05, |
|
"loss": 0.2812, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 153.85, |
|
"eval_las": 17.81472684085511, |
|
"eval_loss": 13.284212112426758, |
|
"eval_runtime": 0.5416, |
|
"eval_samples_per_second": 147.703, |
|
"eval_steps_per_second": 18.463, |
|
"eval_uas": 36.57957244655582, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 161.54, |
|
"learning_rate": 6.929395973154363e-05, |
|
"loss": 0.2837, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 169.23, |
|
"learning_rate": 6.875704697986578e-05, |
|
"loss": 0.2887, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 176.92, |
|
"learning_rate": 6.822013422818793e-05, |
|
"loss": 0.278, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 184.62, |
|
"learning_rate": 6.768322147651007e-05, |
|
"loss": 0.2721, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"learning_rate": 6.714630872483222e-05, |
|
"loss": 0.2727, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 192.31, |
|
"eval_las": 16.706254948535236, |
|
"eval_loss": 14.351984977722168, |
|
"eval_runtime": 0.5408, |
|
"eval_samples_per_second": 147.943, |
|
"eval_steps_per_second": 18.493, |
|
"eval_uas": 34.36262866191607, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"learning_rate": 6.660939597315437e-05, |
|
"loss": 0.2718, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 207.69, |
|
"learning_rate": 6.607248322147652e-05, |
|
"loss": 0.264, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 215.38, |
|
"learning_rate": 6.553557046979867e-05, |
|
"loss": 0.2648, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 223.08, |
|
"learning_rate": 6.499865771812081e-05, |
|
"loss": 0.2699, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"learning_rate": 6.446174496644296e-05, |
|
"loss": 0.2604, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"eval_las": 17.339667458432302, |
|
"eval_loss": 14.176854133605957, |
|
"eval_runtime": 0.5399, |
|
"eval_samples_per_second": 148.179, |
|
"eval_steps_per_second": 18.522, |
|
"eval_uas": 34.04592240696754, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 230.77, |
|
"step": 3000, |
|
"total_flos": 1.541530495893504e+16, |
|
"train_loss": 0.6324253260294597, |
|
"train_runtime": 1509.7376, |
|
"train_samples_per_second": 317.936, |
|
"train_steps_per_second": 9.936 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 1154, |
|
"total_flos": 1.541530495893504e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|