PT-UP-xlmR-FalseTrue-0_0_BEST / trainer_state.json
Harish Tayyar Madabushi
added model
04378e2
{
"best_metric": 0.7140781182094741,
"best_model_checkpoint": "OUTPUT/xlm-roberta-base/FalseTrue-0/0/checkpoint-296",
"epoch": 8.0,
"global_step": 296,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"eval_accuracy": 0.5641025900840759,
"eval_f1": 0.4068936527952921,
"eval_loss": 0.6925989985466003,
"eval_runtime": 0.8159,
"eval_samples_per_second": 334.599,
"eval_steps_per_second": 42.897,
"step": 37
},
{
"epoch": 2.0,
"eval_accuracy": 0.5641025900840759,
"eval_f1": 0.4068936527952921,
"eval_loss": 0.7125938534736633,
"eval_runtime": 0.8146,
"eval_samples_per_second": 335.145,
"eval_steps_per_second": 42.967,
"step": 74
},
{
"epoch": 3.0,
"eval_accuracy": 0.5604395866394043,
"eval_f1": 0.4052004333694475,
"eval_loss": 0.7705000042915344,
"eval_runtime": 0.8132,
"eval_samples_per_second": 335.709,
"eval_steps_per_second": 43.04,
"step": 111
},
{
"epoch": 4.0,
"eval_accuracy": 0.7142857313156128,
"eval_f1": 0.7028323699421966,
"eval_loss": 0.8162457346916199,
"eval_runtime": 0.814,
"eval_samples_per_second": 335.389,
"eval_steps_per_second": 42.999,
"step": 148
},
{
"epoch": 5.0,
"eval_accuracy": 0.7069597244262695,
"eval_f1": 0.6908952629291613,
"eval_loss": 1.4020615816116333,
"eval_runtime": 0.8166,
"eval_samples_per_second": 334.305,
"eval_steps_per_second": 42.86,
"step": 185
},
{
"epoch": 6.0,
"eval_accuracy": 0.721611738204956,
"eval_f1": 0.7063504997827033,
"eval_loss": 1.4205191135406494,
"eval_runtime": 0.8137,
"eval_samples_per_second": 335.499,
"eval_steps_per_second": 43.013,
"step": 222
},
{
"epoch": 7.0,
"eval_accuracy": 0.7252747416496277,
"eval_f1": 0.7118124041200964,
"eval_loss": 1.5443077087402344,
"eval_runtime": 0.8137,
"eval_samples_per_second": 335.506,
"eval_steps_per_second": 43.014,
"step": 259
},
{
"epoch": 8.0,
"eval_accuracy": 0.7289377450942993,
"eval_f1": 0.7140781182094741,
"eval_loss": 1.6495815515518188,
"eval_runtime": 0.8093,
"eval_samples_per_second": 337.346,
"eval_steps_per_second": 43.249,
"step": 296
}
],
"max_steps": 333,
"num_train_epochs": 9,
"total_flos": 1988472400920576.0,
"trial_name": null,
"trial_params": null
}