BramVanroy's picture
hpc model
c3446ba
{
"best_metric": 0.7412639349881154,
"best_model_checkpoint": "trained/hebban-reviews/robbert-v2-dutch-base/checkpoint-3500",
"epoch": 4.382997370727432,
"global_step": 5001,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.44,
"learning_rate": 4.501099780043991e-05,
"loss": 0.6723,
"step": 500
},
{
"epoch": 0.44,
"eval_accuracy": 0.8079388560157791,
"eval_f1": 0.8117501994363496,
"eval_loss": 0.5760409235954285,
"eval_precision": 0.8174343819162313,
"eval_qwk": 0.7302784941209646,
"eval_recall": 0.8079388560157791,
"eval_runtime": 23.4439,
"eval_samples_per_second": 692.036,
"eval_steps_per_second": 5.417,
"step": 500
},
{
"epoch": 0.88,
"learning_rate": 4.0011997600479906e-05,
"loss": 0.5754,
"step": 1000
},
{
"epoch": 0.88,
"eval_accuracy": 0.7649778106508875,
"eval_f1": 0.7798244436633573,
"eval_loss": 0.5568957328796387,
"eval_precision": 0.8129894746393622,
"eval_qwk": 0.6942695698574534,
"eval_recall": 0.7649778106508875,
"eval_runtime": 23.1665,
"eval_samples_per_second": 700.32,
"eval_steps_per_second": 5.482,
"step": 1000
},
{
"epoch": 1.31,
"learning_rate": 3.50129974005199e-05,
"loss": 0.5052,
"step": 1500
},
{
"epoch": 1.31,
"eval_accuracy": 0.7829758382642998,
"eval_f1": 0.7967934169486903,
"eval_loss": 0.5705748200416565,
"eval_precision": 0.8279826615797163,
"eval_qwk": 0.7174911347556729,
"eval_recall": 0.7829758382642998,
"eval_runtime": 23.1285,
"eval_samples_per_second": 701.473,
"eval_steps_per_second": 5.491,
"step": 1500
},
{
"epoch": 1.75,
"learning_rate": 3.001399720055989e-05,
"loss": 0.4723,
"step": 2000
},
{
"epoch": 1.75,
"eval_accuracy": 0.7924063116370809,
"eval_f1": 0.8033559165914831,
"eval_loss": 0.5601416230201721,
"eval_precision": 0.8252798007612112,
"eval_qwk": 0.7270656679689509,
"eval_recall": 0.7924063116370809,
"eval_runtime": 23.2157,
"eval_samples_per_second": 698.837,
"eval_steps_per_second": 5.47,
"step": 2000
},
{
"epoch": 2.19,
"learning_rate": 2.5024995000999802e-05,
"loss": 0.4285,
"step": 2500
},
{
"epoch": 2.19,
"eval_accuracy": 0.8101577909270217,
"eval_f1": 0.8186486636369545,
"eval_loss": 0.691852867603302,
"eval_precision": 0.8361687645268726,
"eval_qwk": 0.7349979059232339,
"eval_recall": 0.8101577909270217,
"eval_runtime": 23.1035,
"eval_samples_per_second": 702.232,
"eval_steps_per_second": 5.497,
"step": 2500
},
{
"epoch": 2.63,
"learning_rate": 2.0025994801039795e-05,
"loss": 0.3553,
"step": 3000
},
{
"epoch": 2.63,
"eval_accuracy": 0.7843318540433925,
"eval_f1": 0.7982445447583074,
"eval_loss": 0.674355685710907,
"eval_precision": 0.8311821725851122,
"eval_qwk": 0.7133869717906458,
"eval_recall": 0.7843318540433925,
"eval_runtime": 23.1222,
"eval_samples_per_second": 701.663,
"eval_steps_per_second": 5.493,
"step": 3000
},
{
"epoch": 3.07,
"learning_rate": 1.5026994601079786e-05,
"loss": 0.3433,
"step": 3500
},
{
"epoch": 3.07,
"eval_accuracy": 0.813732741617357,
"eval_f1": 0.821008951036937,
"eval_loss": 0.8146640658378601,
"eval_precision": 0.8342737114916078,
"eval_qwk": 0.7412639349881154,
"eval_recall": 0.813732741617357,
"eval_runtime": 23.1151,
"eval_samples_per_second": 701.878,
"eval_steps_per_second": 5.494,
"step": 3500
},
{
"epoch": 3.51,
"learning_rate": 1.0037992401519696e-05,
"loss": 0.2751,
"step": 4000
},
{
"epoch": 3.51,
"eval_accuracy": 0.8128698224852071,
"eval_f1": 0.8199992286734568,
"eval_loss": 0.8419223427772522,
"eval_precision": 0.8332550382998175,
"eval_qwk": 0.7383621154665407,
"eval_recall": 0.8128698224852071,
"eval_runtime": 23.1425,
"eval_samples_per_second": 701.047,
"eval_steps_per_second": 5.488,
"step": 4000
},
{
"epoch": 3.94,
"learning_rate": 5.038992201559688e-06,
"loss": 0.2659,
"step": 4500
},
{
"epoch": 3.94,
"eval_accuracy": 0.8022682445759369,
"eval_f1": 0.8125481330626305,
"eval_loss": 0.8309345841407776,
"eval_precision": 0.8344366995643693,
"eval_qwk": 0.7292405615679007,
"eval_recall": 0.8022682445759369,
"eval_runtime": 23.1428,
"eval_samples_per_second": 701.039,
"eval_steps_per_second": 5.488,
"step": 4500
},
{
"epoch": 4.38,
"learning_rate": 3.9992001599680065e-08,
"loss": 0.2346,
"step": 5000
},
{
"epoch": 4.38,
"eval_accuracy": 0.8102810650887574,
"eval_f1": 0.8179537922797215,
"eval_loss": 0.8737895488739014,
"eval_precision": 0.8320255779887702,
"eval_qwk": 0.7367921505621005,
"eval_recall": 0.8102810650887574,
"eval_runtime": 23.1382,
"eval_samples_per_second": 701.178,
"eval_steps_per_second": 5.489,
"step": 5000
},
{
"epoch": 4.38,
"step": 5001,
"total_flos": 1.6838837781764506e+17,
"train_loss": 0.4127759954567505,
"train_runtime": 2598.1165,
"train_samples_per_second": 246.382,
"train_steps_per_second": 1.925
}
],
"max_steps": 5001,
"num_train_epochs": 5,
"total_flos": 1.6838837781764506e+17,
"trial_name": null,
"trial_params": null
}