|
{ |
|
"best_metric": 0.2892071891250798, |
|
"best_model_checkpoint": "/scratch/mrahma45/pixel/finetuned_models/mbert/mbert-base-finetuned-pos-ud-Coptic-Scriptorium/checkpoint-500", |
|
"epoch": 76.92307692307692, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.92e-05, |
|
"loss": 2.1425, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"learning_rate": 7.946845637583894e-05, |
|
"loss": 1.9316, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 7.69, |
|
"learning_rate": 7.893154362416109e-05, |
|
"loss": 1.9066, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 10.26, |
|
"learning_rate": 7.839463087248322e-05, |
|
"loss": 1.8917, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"learning_rate": 7.785771812080537e-05, |
|
"loss": 1.88, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 12.82, |
|
"eval_accuracy": 0.2892071891250798, |
|
"eval_loss": 1.9474276304244995, |
|
"eval_runtime": 1.9459, |
|
"eval_samples_per_second": 195.796, |
|
"eval_steps_per_second": 24.667, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.38, |
|
"learning_rate": 7.732080536912752e-05, |
|
"loss": 1.8777, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 17.95, |
|
"learning_rate": 7.678389261744967e-05, |
|
"loss": 1.8674, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 20.51, |
|
"learning_rate": 7.624697986577182e-05, |
|
"loss": 1.8495, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 23.08, |
|
"learning_rate": 7.571006711409396e-05, |
|
"loss": 1.838, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 7.517315436241611e-05, |
|
"loss": 1.8131, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"eval_accuracy": 0.28847732871088405, |
|
"eval_loss": 1.996766209602356, |
|
"eval_runtime": 1.9595, |
|
"eval_samples_per_second": 194.438, |
|
"eval_steps_per_second": 24.496, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 28.21, |
|
"learning_rate": 7.463624161073826e-05, |
|
"loss": 1.7921, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 30.77, |
|
"learning_rate": 7.409932885906041e-05, |
|
"loss": 1.7496, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 33.33, |
|
"learning_rate": 7.356241610738256e-05, |
|
"loss": 1.7099, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 35.9, |
|
"learning_rate": 7.30255033557047e-05, |
|
"loss": 1.6553, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"learning_rate": 7.248859060402685e-05, |
|
"loss": 1.5873, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 38.46, |
|
"eval_accuracy": 0.261290028282091, |
|
"eval_loss": 2.346529960632324, |
|
"eval_runtime": 1.9481, |
|
"eval_samples_per_second": 195.576, |
|
"eval_steps_per_second": 24.639, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 41.03, |
|
"learning_rate": 7.1951677852349e-05, |
|
"loss": 1.5155, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 43.59, |
|
"learning_rate": 7.141476510067115e-05, |
|
"loss": 1.4393, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 46.15, |
|
"learning_rate": 7.08778523489933e-05, |
|
"loss": 1.362, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 48.72, |
|
"learning_rate": 7.034093959731545e-05, |
|
"loss": 1.2868, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"learning_rate": 6.98040268456376e-05, |
|
"loss": 1.2522, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 51.28, |
|
"eval_accuracy": 0.23583614633701305, |
|
"eval_loss": 3.054924249649048, |
|
"eval_runtime": 1.9514, |
|
"eval_samples_per_second": 195.244, |
|
"eval_steps_per_second": 24.598, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 53.85, |
|
"learning_rate": 6.927248322147651e-05, |
|
"loss": 1.1646, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 56.41, |
|
"learning_rate": 6.873557046979866e-05, |
|
"loss": 1.1278, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 58.97, |
|
"learning_rate": 6.820402684563758e-05, |
|
"loss": 1.0644, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 61.54, |
|
"learning_rate": 6.766711409395973e-05, |
|
"loss": 1.021, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"learning_rate": 6.713020134228188e-05, |
|
"loss": 0.9782, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 64.1, |
|
"eval_accuracy": 0.2366572393029833, |
|
"eval_loss": 3.7386221885681152, |
|
"eval_runtime": 1.9495, |
|
"eval_samples_per_second": 195.433, |
|
"eval_steps_per_second": 24.622, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 66.67, |
|
"learning_rate": 6.659328859060403e-05, |
|
"loss": 0.9309, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 69.23, |
|
"learning_rate": 6.605637583892618e-05, |
|
"loss": 0.9079, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 71.79, |
|
"learning_rate": 6.551946308724832e-05, |
|
"loss": 0.8622, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 74.36, |
|
"learning_rate": 6.498255033557047e-05, |
|
"loss": 0.8537, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"learning_rate": 6.444563758389262e-05, |
|
"loss": 0.8122, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"eval_accuracy": 0.22726028647021257, |
|
"eval_loss": 3.944620132446289, |
|
"eval_runtime": 1.958, |
|
"eval_samples_per_second": 194.582, |
|
"eval_steps_per_second": 24.514, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 76.92, |
|
"step": 3000, |
|
"total_flos": 1.2335402138867712e+16, |
|
"train_loss": 1.4690344212849935, |
|
"train_runtime": 809.6738, |
|
"train_samples_per_second": 592.831, |
|
"train_steps_per_second": 18.526 |
|
} |
|
], |
|
"max_steps": 15000, |
|
"num_train_epochs": 385, |
|
"total_flos": 1.2335402138867712e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|