lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-linelevel-ml384
/
checkpoint-8500
/trainer_state.json
{ | |
"best_metric": 0.8849383152916955, | |
"best_model_checkpoint": "DocLayNet/lilt-xlm-roberta-base-finetuned-DocLayNet-base_ml384-v2/checkpoint-8500", | |
"epoch": 3.5313668466971335, | |
"global_step": 8500, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.21, | |
"learning_rate": 4.793934358122144e-05, | |
"loss": 0.7223, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.21, | |
"eval_accuracy": 0.7741471103890881, | |
"eval_f1": 0.7741471103890881, | |
"eval_loss": 0.7764509916305542, | |
"eval_precision": 0.7741471103890881, | |
"eval_recall": 0.7741471103890881, | |
"eval_runtime": 33.2902, | |
"eval_samples_per_second": 61.64, | |
"eval_steps_per_second": 3.875, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 4.5870378063980065e-05, | |
"loss": 0.4469, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.42, | |
"eval_accuracy": 0.8311511798105226, | |
"eval_f1": 0.8311511798105226, | |
"eval_loss": 0.5913723707199097, | |
"eval_precision": 0.8311511798105226, | |
"eval_recall": 0.8311511798105226, | |
"eval_runtime": 37.4699, | |
"eval_samples_per_second": 54.764, | |
"eval_steps_per_second": 3.443, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 4.3793103448275864e-05, | |
"loss": 0.3819, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.62, | |
"eval_accuracy": 0.8102330668639719, | |
"eval_f1": 0.8102330668639719, | |
"eval_loss": 0.8744572997093201, | |
"eval_precision": 0.8102330668639719, | |
"eval_recall": 0.8102330668639719, | |
"eval_runtime": 34.6123, | |
"eval_samples_per_second": 59.285, | |
"eval_steps_per_second": 3.727, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 4.171582883257167e-05, | |
"loss": 0.3361, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.83, | |
"eval_accuracy": 0.8337166846278812, | |
"eval_f1": 0.8337166846278812, | |
"eval_loss": 0.6990912556648254, | |
"eval_precision": 0.8337166846278812, | |
"eval_recall": 0.8337166846278812, | |
"eval_runtime": 38.6559, | |
"eval_samples_per_second": 53.084, | |
"eval_steps_per_second": 3.337, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 3.964270876609888e-05, | |
"loss": 0.2784, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 1.04, | |
"eval_accuracy": 0.8119380418523106, | |
"eval_f1": 0.8119380418523107, | |
"eval_loss": 0.7512525320053101, | |
"eval_precision": 0.8119380418523106, | |
"eval_recall": 0.8119380418523106, | |
"eval_runtime": 33.6314, | |
"eval_samples_per_second": 61.014, | |
"eval_steps_per_second": 3.836, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 1.25, | |
"learning_rate": 3.756543415039468e-05, | |
"loss": 0.2377, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 1.25, | |
"eval_accuracy": 0.8165945537308391, | |
"eval_f1": 0.8165945537308391, | |
"eval_loss": 0.9047627449035645, | |
"eval_precision": 0.8165945537308391, | |
"eval_recall": 0.8165945537308391, | |
"eval_runtime": 39.8436, | |
"eval_samples_per_second": 51.501, | |
"eval_steps_per_second": 3.238, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 3.548815953469049e-05, | |
"loss": 0.2401, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 1.45, | |
"eval_accuracy": 0.7939071271171446, | |
"eval_f1": 0.7939071271171445, | |
"eval_loss": 1.241086721420288, | |
"eval_precision": 0.7939071271171446, | |
"eval_recall": 0.7939071271171446, | |
"eval_runtime": 34.8048, | |
"eval_samples_per_second": 58.957, | |
"eval_steps_per_second": 3.706, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 3.341088491898629e-05, | |
"loss": 0.2054, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 1.66, | |
"eval_accuracy": 0.8079812131057889, | |
"eval_f1": 0.8079812131057889, | |
"eval_loss": 1.1594407558441162, | |
"eval_precision": 0.8079812131057889, | |
"eval_recall": 0.8079812131057889, | |
"eval_runtime": 39.5136, | |
"eval_samples_per_second": 51.932, | |
"eval_steps_per_second": 3.265, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 3.133361030328209e-05, | |
"loss": 0.1909, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 1.87, | |
"eval_accuracy": 0.8424747872802432, | |
"eval_f1": 0.842474787280243, | |
"eval_loss": 0.7545156478881836, | |
"eval_precision": 0.8424747872802432, | |
"eval_recall": 0.8424747872802432, | |
"eval_runtime": 35.061, | |
"eval_samples_per_second": 58.527, | |
"eval_steps_per_second": 3.679, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 2.9256335687577903e-05, | |
"loss": 0.1704, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 2.08, | |
"eval_accuracy": 0.8317543549243216, | |
"eval_f1": 0.8317543549243216, | |
"eval_loss": 0.856721818447113, | |
"eval_precision": 0.8317543549243216, | |
"eval_recall": 0.8317543549243216, | |
"eval_runtime": 38.2513, | |
"eval_samples_per_second": 53.645, | |
"eval_steps_per_second": 3.372, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 2.29, | |
"learning_rate": 2.7179061071873702e-05, | |
"loss": 0.1294, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 2.29, | |
"eval_accuracy": 0.8489327821653182, | |
"eval_f1": 0.8489327821653182, | |
"eval_loss": 0.8485749959945679, | |
"eval_precision": 0.8489327821653182, | |
"eval_recall": 0.8489327821653182, | |
"eval_runtime": 33.5765, | |
"eval_samples_per_second": 61.114, | |
"eval_steps_per_second": 3.842, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 2.5101786456169508e-05, | |
"loss": 0.134, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 2.49, | |
"eval_accuracy": 0.8573209374145502, | |
"eval_f1": 0.8573209374145502, | |
"eval_loss": 0.7681939005851746, | |
"eval_precision": 0.8573209374145502, | |
"eval_recall": 0.8573209374145502, | |
"eval_runtime": 39.5768, | |
"eval_samples_per_second": 51.849, | |
"eval_steps_per_second": 3.259, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 2.302451184046531e-05, | |
"loss": 0.1354, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 2.7, | |
"eval_accuracy": 0.8255697994241689, | |
"eval_f1": 0.8255697994241689, | |
"eval_loss": 0.9870807528495789, | |
"eval_precision": 0.8255697994241689, | |
"eval_recall": 0.8255697994241689, | |
"eval_runtime": 34.6532, | |
"eval_samples_per_second": 59.215, | |
"eval_steps_per_second": 3.723, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 2.91, | |
"learning_rate": 2.0947237224761117e-05, | |
"loss": 0.1239, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 2.91, | |
"eval_accuracy": 0.8189268308375287, | |
"eval_f1": 0.8189268308375287, | |
"eval_loss": 1.1430405378341675, | |
"eval_precision": 0.8189268308375287, | |
"eval_recall": 0.8189268308375287, | |
"eval_runtime": 39.4751, | |
"eval_samples_per_second": 51.982, | |
"eval_steps_per_second": 3.268, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 1.886996260905692e-05, | |
"loss": 0.1012, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 3.12, | |
"eval_accuracy": 0.8385581702079747, | |
"eval_f1": 0.8385581702079747, | |
"eval_loss": 0.8271787166595459, | |
"eval_precision": 0.8385581702079747, | |
"eval_recall": 0.8385581702079747, | |
"eval_runtime": 33.307, | |
"eval_samples_per_second": 61.609, | |
"eval_steps_per_second": 3.873, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 3.32, | |
"learning_rate": 1.6792687993352722e-05, | |
"loss": 0.0788, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 3.32, | |
"eval_accuracy": 0.8364993324862074, | |
"eval_f1": 0.8364993324862076, | |
"eval_loss": 1.028805136680603, | |
"eval_precision": 0.8364993324862074, | |
"eval_recall": 0.8364993324862074, | |
"eval_runtime": 38.2117, | |
"eval_samples_per_second": 53.701, | |
"eval_steps_per_second": 3.376, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 3.53, | |
"learning_rate": 1.4715413377648527e-05, | |
"loss": 0.0802, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 3.53, | |
"eval_accuracy": 0.8849383152916955, | |
"eval_f1": 0.8849383152916955, | |
"eval_loss": 0.719699501991272, | |
"eval_precision": 0.8849383152916955, | |
"eval_recall": 0.8849383152916955, | |
"eval_runtime": 34.4056, | |
"eval_samples_per_second": 59.641, | |
"eval_steps_per_second": 3.749, | |
"step": 8500 | |
} | |
], | |
"max_steps": 12035, | |
"num_train_epochs": 5, | |
"total_flos": 1.4182402968486144e+16, | |
"trial_name": null, | |
"trial_params": null | |
} | |