longformer-base-health-fact / trainer_state.json
nbroad's picture
nbroad HF staff
End of training
2b187d2
{
"best_metric": 0.812191103789127,
"best_model_checkpoint": "./longformer-base-health-fact2/checkpoint-1839",
"epoch": 3.0,
"global_step": 1839,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.16,
"learning_rate": 1.597826086956522e-05,
"loss": 1.1814,
"step": 100
},
{
"epoch": 0.33,
"learning_rate": 2.9764350453172204e-05,
"loss": 0.7803,
"step": 200
},
{
"epoch": 0.49,
"learning_rate": 2.795166163141994e-05,
"loss": 0.695,
"step": 300
},
{
"epoch": 0.65,
"learning_rate": 2.61570996978852e-05,
"loss": 0.6093,
"step": 400
},
{
"epoch": 0.82,
"learning_rate": 2.434441087613293e-05,
"loss": 0.5988,
"step": 500
},
{
"epoch": 0.98,
"learning_rate": 2.2531722054380667e-05,
"loss": 0.555,
"step": 600
},
{
"epoch": 1.0,
"eval_false_f1": 0.7698209718670076,
"eval_loss": 0.5242577791213989,
"eval_macro_f1": 0.5534734720356734,
"eval_micro_f1": 0.7841845140032949,
"eval_mixture_f1": 0.4169611307420495,
"eval_runtime": 35.7477,
"eval_samples_per_second": 33.96,
"eval_steps_per_second": 1.063,
"eval_true_f1": 0.8937784522003035,
"eval_unproven_f1": 0.13333333333333333,
"step": 613
},
{
"epoch": 1.14,
"learning_rate": 2.07190332326284e-05,
"loss": 0.5169,
"step": 700
},
{
"epoch": 1.31,
"learning_rate": 1.8906344410876132e-05,
"loss": 0.4659,
"step": 800
},
{
"epoch": 1.47,
"learning_rate": 1.709365558912387e-05,
"loss": 0.4781,
"step": 900
},
{
"epoch": 1.63,
"learning_rate": 1.52809667673716e-05,
"loss": 0.4591,
"step": 1000
},
{
"epoch": 1.79,
"learning_rate": 1.3468277945619335e-05,
"loss": 0.4591,
"step": 1100
},
{
"epoch": 1.96,
"learning_rate": 1.165558912386707e-05,
"loss": 0.4282,
"step": 1200
},
{
"epoch": 2.0,
"eval_false_f1": 0.7829360100376411,
"eval_loss": 0.5008112788200378,
"eval_macro_f1": 0.6393266392992799,
"eval_micro_f1": 0.8031301482701811,
"eval_mixture_f1": 0.46052631578947373,
"eval_runtime": 35.7682,
"eval_samples_per_second": 33.941,
"eval_steps_per_second": 1.062,
"eval_true_f1": 0.9199048374306107,
"eval_unproven_f1": 0.3939393939393939,
"step": 1226
},
{
"epoch": 2.12,
"learning_rate": 9.842900302114804e-06,
"loss": 0.3421,
"step": 1300
},
{
"epoch": 2.28,
"learning_rate": 8.030211480362539e-06,
"loss": 0.3261,
"step": 1400
},
{
"epoch": 2.45,
"learning_rate": 6.217522658610272e-06,
"loss": 0.3269,
"step": 1500
},
{
"epoch": 2.61,
"learning_rate": 4.404833836858006e-06,
"loss": 0.3102,
"step": 1600
},
{
"epoch": 2.77,
"learning_rate": 2.5921450151057403e-06,
"loss": 0.2872,
"step": 1700
},
{
"epoch": 2.94,
"learning_rate": 7.794561933534744e-07,
"loss": 0.2897,
"step": 1800
},
{
"epoch": 3.0,
"eval_false_f1": 0.7941176470588236,
"eval_loss": 0.5857986211776733,
"eval_macro_f1": 0.6829679742192657,
"eval_micro_f1": 0.812191103789127,
"eval_mixture_f1": 0.5014925373134329,
"eval_runtime": 35.8572,
"eval_samples_per_second": 33.857,
"eval_steps_per_second": 1.06,
"eval_true_f1": 0.9234411996842935,
"eval_unproven_f1": 0.5128205128205128,
"step": 1839
},
{
"epoch": 3.0,
"step": 1839,
"total_flos": 2.18669047246848e+16,
"train_loss": 0.49966207906692944,
"train_runtime": 2323.0594,
"train_samples_per_second": 12.661,
"train_steps_per_second": 0.792
}
],
"max_steps": 1839,
"num_train_epochs": 3,
"total_flos": 2.18669047246848e+16,
"trial_name": null,
"trial_params": null
}