lilt-xlm-roberta-base-finetuned-with-DocLayNet-base-at-linelevel-ml384
/
checkpoint-12000
/trainer_state.json
{ | |
"best_metric": 0.8849383152916955, | |
"best_model_checkpoint": "DocLayNet/lilt-xlm-roberta-base-finetuned-DocLayNet-base_ml384-v2/checkpoint-8500", | |
"epoch": 4.985459077690071, | |
"global_step": 12000, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.21, | |
"learning_rate": 4.793934358122144e-05, | |
"loss": 0.7223, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.21, | |
"eval_accuracy": 0.7741471103890881, | |
"eval_f1": 0.7741471103890881, | |
"eval_loss": 0.7764509916305542, | |
"eval_precision": 0.7741471103890881, | |
"eval_recall": 0.7741471103890881, | |
"eval_runtime": 33.2902, | |
"eval_samples_per_second": 61.64, | |
"eval_steps_per_second": 3.875, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.42, | |
"learning_rate": 4.5870378063980065e-05, | |
"loss": 0.4469, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.42, | |
"eval_accuracy": 0.8311511798105226, | |
"eval_f1": 0.8311511798105226, | |
"eval_loss": 0.5913723707199097, | |
"eval_precision": 0.8311511798105226, | |
"eval_recall": 0.8311511798105226, | |
"eval_runtime": 37.4699, | |
"eval_samples_per_second": 54.764, | |
"eval_steps_per_second": 3.443, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.62, | |
"learning_rate": 4.3793103448275864e-05, | |
"loss": 0.3819, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.62, | |
"eval_accuracy": 0.8102330668639719, | |
"eval_f1": 0.8102330668639719, | |
"eval_loss": 0.8744572997093201, | |
"eval_precision": 0.8102330668639719, | |
"eval_recall": 0.8102330668639719, | |
"eval_runtime": 34.6123, | |
"eval_samples_per_second": 59.285, | |
"eval_steps_per_second": 3.727, | |
"step": 1500 | |
}, | |
{ | |
"epoch": 0.83, | |
"learning_rate": 4.171582883257167e-05, | |
"loss": 0.3361, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 0.83, | |
"eval_accuracy": 0.8337166846278812, | |
"eval_f1": 0.8337166846278812, | |
"eval_loss": 0.6990912556648254, | |
"eval_precision": 0.8337166846278812, | |
"eval_recall": 0.8337166846278812, | |
"eval_runtime": 38.6559, | |
"eval_samples_per_second": 53.084, | |
"eval_steps_per_second": 3.337, | |
"step": 2000 | |
}, | |
{ | |
"epoch": 1.04, | |
"learning_rate": 3.964270876609888e-05, | |
"loss": 0.2784, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 1.04, | |
"eval_accuracy": 0.8119380418523106, | |
"eval_f1": 0.8119380418523107, | |
"eval_loss": 0.7512525320053101, | |
"eval_precision": 0.8119380418523106, | |
"eval_recall": 0.8119380418523106, | |
"eval_runtime": 33.6314, | |
"eval_samples_per_second": 61.014, | |
"eval_steps_per_second": 3.836, | |
"step": 2500 | |
}, | |
{ | |
"epoch": 1.25, | |
"learning_rate": 3.756543415039468e-05, | |
"loss": 0.2377, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 1.25, | |
"eval_accuracy": 0.8165945537308391, | |
"eval_f1": 0.8165945537308391, | |
"eval_loss": 0.9047627449035645, | |
"eval_precision": 0.8165945537308391, | |
"eval_recall": 0.8165945537308391, | |
"eval_runtime": 39.8436, | |
"eval_samples_per_second": 51.501, | |
"eval_steps_per_second": 3.238, | |
"step": 3000 | |
}, | |
{ | |
"epoch": 1.45, | |
"learning_rate": 3.548815953469049e-05, | |
"loss": 0.2401, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 1.45, | |
"eval_accuracy": 0.7939071271171446, | |
"eval_f1": 0.7939071271171445, | |
"eval_loss": 1.241086721420288, | |
"eval_precision": 0.7939071271171446, | |
"eval_recall": 0.7939071271171446, | |
"eval_runtime": 34.8048, | |
"eval_samples_per_second": 58.957, | |
"eval_steps_per_second": 3.706, | |
"step": 3500 | |
}, | |
{ | |
"epoch": 1.66, | |
"learning_rate": 3.341088491898629e-05, | |
"loss": 0.2054, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 1.66, | |
"eval_accuracy": 0.8079812131057889, | |
"eval_f1": 0.8079812131057889, | |
"eval_loss": 1.1594407558441162, | |
"eval_precision": 0.8079812131057889, | |
"eval_recall": 0.8079812131057889, | |
"eval_runtime": 39.5136, | |
"eval_samples_per_second": 51.932, | |
"eval_steps_per_second": 3.265, | |
"step": 4000 | |
}, | |
{ | |
"epoch": 1.87, | |
"learning_rate": 3.133361030328209e-05, | |
"loss": 0.1909, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 1.87, | |
"eval_accuracy": 0.8424747872802432, | |
"eval_f1": 0.842474787280243, | |
"eval_loss": 0.7545156478881836, | |
"eval_precision": 0.8424747872802432, | |
"eval_recall": 0.8424747872802432, | |
"eval_runtime": 35.061, | |
"eval_samples_per_second": 58.527, | |
"eval_steps_per_second": 3.679, | |
"step": 4500 | |
}, | |
{ | |
"epoch": 2.08, | |
"learning_rate": 2.9256335687577903e-05, | |
"loss": 0.1704, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 2.08, | |
"eval_accuracy": 0.8317543549243216, | |
"eval_f1": 0.8317543549243216, | |
"eval_loss": 0.856721818447113, | |
"eval_precision": 0.8317543549243216, | |
"eval_recall": 0.8317543549243216, | |
"eval_runtime": 38.2513, | |
"eval_samples_per_second": 53.645, | |
"eval_steps_per_second": 3.372, | |
"step": 5000 | |
}, | |
{ | |
"epoch": 2.29, | |
"learning_rate": 2.7179061071873702e-05, | |
"loss": 0.1294, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 2.29, | |
"eval_accuracy": 0.8489327821653182, | |
"eval_f1": 0.8489327821653182, | |
"eval_loss": 0.8485749959945679, | |
"eval_precision": 0.8489327821653182, | |
"eval_recall": 0.8489327821653182, | |
"eval_runtime": 33.5765, | |
"eval_samples_per_second": 61.114, | |
"eval_steps_per_second": 3.842, | |
"step": 5500 | |
}, | |
{ | |
"epoch": 2.49, | |
"learning_rate": 2.5101786456169508e-05, | |
"loss": 0.134, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 2.49, | |
"eval_accuracy": 0.8573209374145502, | |
"eval_f1": 0.8573209374145502, | |
"eval_loss": 0.7681939005851746, | |
"eval_precision": 0.8573209374145502, | |
"eval_recall": 0.8573209374145502, | |
"eval_runtime": 39.5768, | |
"eval_samples_per_second": 51.849, | |
"eval_steps_per_second": 3.259, | |
"step": 6000 | |
}, | |
{ | |
"epoch": 2.7, | |
"learning_rate": 2.302451184046531e-05, | |
"loss": 0.1354, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 2.7, | |
"eval_accuracy": 0.8255697994241689, | |
"eval_f1": 0.8255697994241689, | |
"eval_loss": 0.9870807528495789, | |
"eval_precision": 0.8255697994241689, | |
"eval_recall": 0.8255697994241689, | |
"eval_runtime": 34.6532, | |
"eval_samples_per_second": 59.215, | |
"eval_steps_per_second": 3.723, | |
"step": 6500 | |
}, | |
{ | |
"epoch": 2.91, | |
"learning_rate": 2.0947237224761117e-05, | |
"loss": 0.1239, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 2.91, | |
"eval_accuracy": 0.8189268308375287, | |
"eval_f1": 0.8189268308375287, | |
"eval_loss": 1.1430405378341675, | |
"eval_precision": 0.8189268308375287, | |
"eval_recall": 0.8189268308375287, | |
"eval_runtime": 39.4751, | |
"eval_samples_per_second": 51.982, | |
"eval_steps_per_second": 3.268, | |
"step": 7000 | |
}, | |
{ | |
"epoch": 3.12, | |
"learning_rate": 1.886996260905692e-05, | |
"loss": 0.1012, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 3.12, | |
"eval_accuracy": 0.8385581702079747, | |
"eval_f1": 0.8385581702079747, | |
"eval_loss": 0.8271787166595459, | |
"eval_precision": 0.8385581702079747, | |
"eval_recall": 0.8385581702079747, | |
"eval_runtime": 33.307, | |
"eval_samples_per_second": 61.609, | |
"eval_steps_per_second": 3.873, | |
"step": 7500 | |
}, | |
{ | |
"epoch": 3.32, | |
"learning_rate": 1.6792687993352722e-05, | |
"loss": 0.0788, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 3.32, | |
"eval_accuracy": 0.8364993324862074, | |
"eval_f1": 0.8364993324862076, | |
"eval_loss": 1.028805136680603, | |
"eval_precision": 0.8364993324862074, | |
"eval_recall": 0.8364993324862074, | |
"eval_runtime": 38.2117, | |
"eval_samples_per_second": 53.701, | |
"eval_steps_per_second": 3.376, | |
"step": 8000 | |
}, | |
{ | |
"epoch": 3.53, | |
"learning_rate": 1.4715413377648527e-05, | |
"loss": 0.0802, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 3.53, | |
"eval_accuracy": 0.8849383152916955, | |
"eval_f1": 0.8849383152916955, | |
"eval_loss": 0.719699501991272, | |
"eval_precision": 0.8849383152916955, | |
"eval_recall": 0.8849383152916955, | |
"eval_runtime": 34.4056, | |
"eval_samples_per_second": 59.641, | |
"eval_steps_per_second": 3.749, | |
"step": 8500 | |
}, | |
{ | |
"epoch": 3.74, | |
"learning_rate": 1.263813876194433e-05, | |
"loss": 0.0861, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 3.74, | |
"eval_accuracy": 0.8319634556304386, | |
"eval_f1": 0.8319634556304386, | |
"eval_loss": 1.1419589519500732, | |
"eval_precision": 0.8319634556304386, | |
"eval_recall": 0.8319634556304386, | |
"eval_runtime": 39.7118, | |
"eval_samples_per_second": 51.672, | |
"eval_steps_per_second": 3.248, | |
"step": 9000 | |
}, | |
{ | |
"epoch": 3.95, | |
"learning_rate": 1.0560864146240134e-05, | |
"loss": 0.0639, | |
"step": 9500 | |
}, | |
{ | |
"epoch": 3.95, | |
"eval_accuracy": 0.8585272876421483, | |
"eval_f1": 0.8585272876421483, | |
"eval_loss": 0.9562506079673767, | |
"eval_precision": 0.8585272876421483, | |
"eval_recall": 0.8585272876421483, | |
"eval_runtime": 33.8854, | |
"eval_samples_per_second": 60.557, | |
"eval_steps_per_second": 3.807, | |
"step": 9500 | |
}, | |
{ | |
"epoch": 4.15, | |
"learning_rate": 8.483589530535937e-06, | |
"loss": 0.0464, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 4.15, | |
"eval_accuracy": 0.8510961702401442, | |
"eval_f1": 0.8510961702401442, | |
"eval_loss": 1.0767754316329956, | |
"eval_precision": 0.8510961702401442, | |
"eval_recall": 0.8510961702401442, | |
"eval_runtime": 38.7314, | |
"eval_samples_per_second": 52.98, | |
"eval_steps_per_second": 3.331, | |
"step": 10000 | |
}, | |
{ | |
"epoch": 4.36, | |
"learning_rate": 6.406314914831742e-06, | |
"loss": 0.0412, | |
"step": 10500 | |
}, | |
{ | |
"epoch": 4.36, | |
"eval_accuracy": 0.8439143652185103, | |
"eval_f1": 0.8439143652185103, | |
"eval_loss": 1.1184284687042236, | |
"eval_precision": 0.8439143652185103, | |
"eval_recall": 0.8439143652185103, | |
"eval_runtime": 34.8108, | |
"eval_samples_per_second": 58.947, | |
"eval_steps_per_second": 3.706, | |
"step": 10500 | |
}, | |
{ | |
"epoch": 4.57, | |
"learning_rate": 4.333194848358953e-06, | |
"loss": 0.039, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 4.57, | |
"eval_accuracy": 0.8635778739283588, | |
"eval_f1": 0.8635778739283588, | |
"eval_loss": 0.9634060263633728, | |
"eval_precision": 0.8635778739283588, | |
"eval_recall": 0.8635778739283588, | |
"eval_runtime": 39.6244, | |
"eval_samples_per_second": 51.786, | |
"eval_steps_per_second": 3.256, | |
"step": 11000 | |
}, | |
{ | |
"epoch": 4.78, | |
"learning_rate": 2.2559202326547573e-06, | |
"loss": 0.0469, | |
"step": 11500 | |
}, | |
{ | |
"epoch": 4.78, | |
"eval_accuracy": 0.8634089848964952, | |
"eval_f1": 0.8634089848964951, | |
"eval_loss": 0.9584938883781433, | |
"eval_precision": 0.8634089848964952, | |
"eval_recall": 0.8634089848964952, | |
"eval_runtime": 35.0357, | |
"eval_samples_per_second": 58.569, | |
"eval_steps_per_second": 3.682, | |
"step": 11500 | |
}, | |
{ | |
"epoch": 4.99, | |
"learning_rate": 1.7864561695056087e-07, | |
"loss": 0.0395, | |
"step": 12000 | |
}, | |
{ | |
"epoch": 4.99, | |
"eval_accuracy": 0.8583503562754339, | |
"eval_f1": 0.8583503562754339, | |
"eval_loss": 1.0003422498703003, | |
"eval_precision": 0.8583503562754339, | |
"eval_recall": 0.8583503562754339, | |
"eval_runtime": 39.4996, | |
"eval_samples_per_second": 51.95, | |
"eval_steps_per_second": 3.266, | |
"step": 12000 | |
} | |
], | |
"max_steps": 12035, | |
"num_train_epochs": 5, | |
"total_flos": 2.002255958004019e+16, | |
"trial_name": null, | |
"trial_params": null | |
} | |