pasha / trainer_state.json
mijungkim's picture
End of training
251564b
{
"best_metric": 0.9879474725670084,
"best_model_checkpoint": "pasha/checkpoint-500",
"epoch": 21.27659574468085,
"global_step": 1000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 2.13,
"eval_accuracy": 0.9570877531340405,
"eval_f1": 0.9485879797248371,
"eval_loss": 0.2664182484149933,
"eval_precision": 0.9534206695778749,
"eval_recall": 0.9438040345821326,
"eval_runtime": 13.9899,
"eval_samples_per_second": 13.51,
"eval_steps_per_second": 0.858,
"step": 100
},
{
"epoch": 4.26,
"eval_accuracy": 0.983847637415622,
"eval_f1": 0.9778975741239893,
"eval_loss": 0.10435084253549576,
"eval_precision": 0.9756185012549301,
"eval_recall": 0.9801873198847262,
"eval_runtime": 13.823,
"eval_samples_per_second": 13.673,
"eval_steps_per_second": 0.868,
"step": 200
},
{
"epoch": 6.38,
"eval_accuracy": 0.9903567984570878,
"eval_f1": 0.987601078167116,
"eval_loss": 0.06718672811985016,
"eval_precision": 0.9852993904625313,
"eval_recall": 0.9899135446685879,
"eval_runtime": 13.9839,
"eval_samples_per_second": 13.516,
"eval_steps_per_second": 0.858,
"step": 300
},
{
"epoch": 8.51,
"eval_accuracy": 0.9884281581485053,
"eval_f1": 0.9841783531103919,
"eval_loss": 0.06342343986034393,
"eval_precision": 0.9824120603015075,
"eval_recall": 0.9859510086455331,
"eval_runtime": 13.8826,
"eval_samples_per_second": 13.614,
"eval_steps_per_second": 0.864,
"step": 400
},
{
"epoch": 10.64,
"learning_rate": 5e-06,
"loss": 0.2958,
"step": 500
},
{
"epoch": 10.64,
"eval_accuracy": 0.9905978784956606,
"eval_f1": 0.9879474725670084,
"eval_loss": 0.058533914387226105,
"eval_precision": 0.986704994610133,
"eval_recall": 0.989193083573487,
"eval_runtime": 13.739,
"eval_samples_per_second": 13.756,
"eval_steps_per_second": 0.873,
"step": 500
},
{
"epoch": 12.77,
"eval_accuracy": 0.9927675988428158,
"eval_f1": 0.9908322847384505,
"eval_loss": 0.051136456429958344,
"eval_precision": 0.9888769285970578,
"eval_recall": 0.9927953890489913,
"eval_runtime": 14.0393,
"eval_samples_per_second": 13.462,
"eval_steps_per_second": 0.855,
"step": 600
},
{
"epoch": 14.89,
"eval_accuracy": 0.992526518804243,
"eval_f1": 0.9895795903701042,
"eval_loss": 0.05025022476911545,
"eval_precision": 0.9870967741935484,
"eval_recall": 0.9920749279538905,
"eval_runtime": 14.0132,
"eval_samples_per_second": 13.487,
"eval_steps_per_second": 0.856,
"step": 700
},
{
"epoch": 17.02,
"eval_accuracy": 0.991321118611379,
"eval_f1": 0.9881380301941048,
"eval_loss": 0.05291323363780975,
"eval_precision": 0.9860114777618364,
"eval_recall": 0.9902737752161384,
"eval_runtime": 13.9493,
"eval_samples_per_second": 13.549,
"eval_steps_per_second": 0.86,
"step": 800
},
{
"epoch": 19.15,
"eval_accuracy": 0.9903567984570878,
"eval_f1": 0.986704994610133,
"eval_loss": 0.0581122450530529,
"eval_precision": 0.9842293906810036,
"eval_recall": 0.989193083573487,
"eval_runtime": 13.9545,
"eval_samples_per_second": 13.544,
"eval_steps_per_second": 0.86,
"step": 900
},
{
"epoch": 21.28,
"learning_rate": 0.0,
"loss": 0.0256,
"step": 1000
},
{
"epoch": 21.28,
"eval_accuracy": 0.990115718418515,
"eval_f1": 0.9868775840373899,
"eval_loss": 0.057054802775382996,
"eval_precision": 0.984930032292788,
"eval_recall": 0.9888328530259366,
"eval_runtime": 14.2093,
"eval_samples_per_second": 13.301,
"eval_steps_per_second": 0.845,
"step": 1000
},
{
"epoch": 21.28,
"step": 1000,
"total_flos": 4247054450688000.0,
"train_loss": 0.16068801975250244,
"train_runtime": 1757.2853,
"train_samples_per_second": 9.105,
"train_steps_per_second": 0.569
}
],
"max_steps": 1000,
"num_train_epochs": 22,
"total_flos": 4247054450688000.0,
"trial_name": null,
"trial_params": null
}