pierreguillou's picture
Training in progress, step 8500
86104c8
{
"best_metric": 0.8849383152916955,
"best_model_checkpoint": "DocLayNet/lilt-xlm-roberta-base-finetuned-DocLayNet-base_ml384-v2/checkpoint-8500",
"epoch": 3.5313668466971335,
"global_step": 8500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.21,
"learning_rate": 4.793934358122144e-05,
"loss": 0.7223,
"step": 500
},
{
"epoch": 0.21,
"eval_accuracy": 0.7741471103890881,
"eval_f1": 0.7741471103890881,
"eval_loss": 0.7764509916305542,
"eval_precision": 0.7741471103890881,
"eval_recall": 0.7741471103890881,
"eval_runtime": 33.2902,
"eval_samples_per_second": 61.64,
"eval_steps_per_second": 3.875,
"step": 500
},
{
"epoch": 0.42,
"learning_rate": 4.5870378063980065e-05,
"loss": 0.4469,
"step": 1000
},
{
"epoch": 0.42,
"eval_accuracy": 0.8311511798105226,
"eval_f1": 0.8311511798105226,
"eval_loss": 0.5913723707199097,
"eval_precision": 0.8311511798105226,
"eval_recall": 0.8311511798105226,
"eval_runtime": 37.4699,
"eval_samples_per_second": 54.764,
"eval_steps_per_second": 3.443,
"step": 1000
},
{
"epoch": 0.62,
"learning_rate": 4.3793103448275864e-05,
"loss": 0.3819,
"step": 1500
},
{
"epoch": 0.62,
"eval_accuracy": 0.8102330668639719,
"eval_f1": 0.8102330668639719,
"eval_loss": 0.8744572997093201,
"eval_precision": 0.8102330668639719,
"eval_recall": 0.8102330668639719,
"eval_runtime": 34.6123,
"eval_samples_per_second": 59.285,
"eval_steps_per_second": 3.727,
"step": 1500
},
{
"epoch": 0.83,
"learning_rate": 4.171582883257167e-05,
"loss": 0.3361,
"step": 2000
},
{
"epoch": 0.83,
"eval_accuracy": 0.8337166846278812,
"eval_f1": 0.8337166846278812,
"eval_loss": 0.6990912556648254,
"eval_precision": 0.8337166846278812,
"eval_recall": 0.8337166846278812,
"eval_runtime": 38.6559,
"eval_samples_per_second": 53.084,
"eval_steps_per_second": 3.337,
"step": 2000
},
{
"epoch": 1.04,
"learning_rate": 3.964270876609888e-05,
"loss": 0.2784,
"step": 2500
},
{
"epoch": 1.04,
"eval_accuracy": 0.8119380418523106,
"eval_f1": 0.8119380418523107,
"eval_loss": 0.7512525320053101,
"eval_precision": 0.8119380418523106,
"eval_recall": 0.8119380418523106,
"eval_runtime": 33.6314,
"eval_samples_per_second": 61.014,
"eval_steps_per_second": 3.836,
"step": 2500
},
{
"epoch": 1.25,
"learning_rate": 3.756543415039468e-05,
"loss": 0.2377,
"step": 3000
},
{
"epoch": 1.25,
"eval_accuracy": 0.8165945537308391,
"eval_f1": 0.8165945537308391,
"eval_loss": 0.9047627449035645,
"eval_precision": 0.8165945537308391,
"eval_recall": 0.8165945537308391,
"eval_runtime": 39.8436,
"eval_samples_per_second": 51.501,
"eval_steps_per_second": 3.238,
"step": 3000
},
{
"epoch": 1.45,
"learning_rate": 3.548815953469049e-05,
"loss": 0.2401,
"step": 3500
},
{
"epoch": 1.45,
"eval_accuracy": 0.7939071271171446,
"eval_f1": 0.7939071271171445,
"eval_loss": 1.241086721420288,
"eval_precision": 0.7939071271171446,
"eval_recall": 0.7939071271171446,
"eval_runtime": 34.8048,
"eval_samples_per_second": 58.957,
"eval_steps_per_second": 3.706,
"step": 3500
},
{
"epoch": 1.66,
"learning_rate": 3.341088491898629e-05,
"loss": 0.2054,
"step": 4000
},
{
"epoch": 1.66,
"eval_accuracy": 0.8079812131057889,
"eval_f1": 0.8079812131057889,
"eval_loss": 1.1594407558441162,
"eval_precision": 0.8079812131057889,
"eval_recall": 0.8079812131057889,
"eval_runtime": 39.5136,
"eval_samples_per_second": 51.932,
"eval_steps_per_second": 3.265,
"step": 4000
},
{
"epoch": 1.87,
"learning_rate": 3.133361030328209e-05,
"loss": 0.1909,
"step": 4500
},
{
"epoch": 1.87,
"eval_accuracy": 0.8424747872802432,
"eval_f1": 0.842474787280243,
"eval_loss": 0.7545156478881836,
"eval_precision": 0.8424747872802432,
"eval_recall": 0.8424747872802432,
"eval_runtime": 35.061,
"eval_samples_per_second": 58.527,
"eval_steps_per_second": 3.679,
"step": 4500
},
{
"epoch": 2.08,
"learning_rate": 2.9256335687577903e-05,
"loss": 0.1704,
"step": 5000
},
{
"epoch": 2.08,
"eval_accuracy": 0.8317543549243216,
"eval_f1": 0.8317543549243216,
"eval_loss": 0.856721818447113,
"eval_precision": 0.8317543549243216,
"eval_recall": 0.8317543549243216,
"eval_runtime": 38.2513,
"eval_samples_per_second": 53.645,
"eval_steps_per_second": 3.372,
"step": 5000
},
{
"epoch": 2.29,
"learning_rate": 2.7179061071873702e-05,
"loss": 0.1294,
"step": 5500
},
{
"epoch": 2.29,
"eval_accuracy": 0.8489327821653182,
"eval_f1": 0.8489327821653182,
"eval_loss": 0.8485749959945679,
"eval_precision": 0.8489327821653182,
"eval_recall": 0.8489327821653182,
"eval_runtime": 33.5765,
"eval_samples_per_second": 61.114,
"eval_steps_per_second": 3.842,
"step": 5500
},
{
"epoch": 2.49,
"learning_rate": 2.5101786456169508e-05,
"loss": 0.134,
"step": 6000
},
{
"epoch": 2.49,
"eval_accuracy": 0.8573209374145502,
"eval_f1": 0.8573209374145502,
"eval_loss": 0.7681939005851746,
"eval_precision": 0.8573209374145502,
"eval_recall": 0.8573209374145502,
"eval_runtime": 39.5768,
"eval_samples_per_second": 51.849,
"eval_steps_per_second": 3.259,
"step": 6000
},
{
"epoch": 2.7,
"learning_rate": 2.302451184046531e-05,
"loss": 0.1354,
"step": 6500
},
{
"epoch": 2.7,
"eval_accuracy": 0.8255697994241689,
"eval_f1": 0.8255697994241689,
"eval_loss": 0.9870807528495789,
"eval_precision": 0.8255697994241689,
"eval_recall": 0.8255697994241689,
"eval_runtime": 34.6532,
"eval_samples_per_second": 59.215,
"eval_steps_per_second": 3.723,
"step": 6500
},
{
"epoch": 2.91,
"learning_rate": 2.0947237224761117e-05,
"loss": 0.1239,
"step": 7000
},
{
"epoch": 2.91,
"eval_accuracy": 0.8189268308375287,
"eval_f1": 0.8189268308375287,
"eval_loss": 1.1430405378341675,
"eval_precision": 0.8189268308375287,
"eval_recall": 0.8189268308375287,
"eval_runtime": 39.4751,
"eval_samples_per_second": 51.982,
"eval_steps_per_second": 3.268,
"step": 7000
},
{
"epoch": 3.12,
"learning_rate": 1.886996260905692e-05,
"loss": 0.1012,
"step": 7500
},
{
"epoch": 3.12,
"eval_accuracy": 0.8385581702079747,
"eval_f1": 0.8385581702079747,
"eval_loss": 0.8271787166595459,
"eval_precision": 0.8385581702079747,
"eval_recall": 0.8385581702079747,
"eval_runtime": 33.307,
"eval_samples_per_second": 61.609,
"eval_steps_per_second": 3.873,
"step": 7500
},
{
"epoch": 3.32,
"learning_rate": 1.6792687993352722e-05,
"loss": 0.0788,
"step": 8000
},
{
"epoch": 3.32,
"eval_accuracy": 0.8364993324862074,
"eval_f1": 0.8364993324862076,
"eval_loss": 1.028805136680603,
"eval_precision": 0.8364993324862074,
"eval_recall": 0.8364993324862074,
"eval_runtime": 38.2117,
"eval_samples_per_second": 53.701,
"eval_steps_per_second": 3.376,
"step": 8000
},
{
"epoch": 3.53,
"learning_rate": 1.4715413377648527e-05,
"loss": 0.0802,
"step": 8500
},
{
"epoch": 3.53,
"eval_accuracy": 0.8849383152916955,
"eval_f1": 0.8849383152916955,
"eval_loss": 0.719699501991272,
"eval_precision": 0.8849383152916955,
"eval_recall": 0.8849383152916955,
"eval_runtime": 34.4056,
"eval_samples_per_second": 59.641,
"eval_steps_per_second": 3.749,
"step": 8500
}
],
"max_steps": 12035,
"num_train_epochs": 5,
"total_flos": 1.4182402968486144e+16,
"trial_name": null,
"trial_params": null
}