vit-lr-cosine-warmup / trainer_state.json
sharren's picture
🍻 cheers
b844ddf verified
{
"best_metric": 0.4735751152038574,
"best_model_checkpoint": "./vit-lr-cosine-warmup/checkpoint-963",
"epoch": 13.0,
"eval_steps": 500,
"global_step": 4173,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.0,
"grad_norm": 7.814958572387695,
"learning_rate": 4.1428571428571437e-05,
"loss": 0.86,
"step": 321
},
{
"epoch": 1.0,
"eval_accuracy": 0.8141470180305131,
"eval_f1": 0.8010598763076213,
"eval_loss": 0.5250416994094849,
"eval_precision": 0.8100096575743447,
"eval_recall": 0.8141470180305131,
"eval_runtime": 36.0914,
"eval_samples_per_second": 79.908,
"eval_steps_per_second": 10.002,
"step": 321
},
{
"epoch": 2.0,
"grad_norm": 6.845722198486328,
"learning_rate": 8.311688311688312e-05,
"loss": 0.4517,
"step": 642
},
{
"epoch": 2.0,
"eval_accuracy": 0.8221220527045769,
"eval_f1": 0.8099830282273331,
"eval_loss": 0.5117006897926331,
"eval_precision": 0.8347375649374938,
"eval_recall": 0.8221220527045769,
"eval_runtime": 37.2073,
"eval_samples_per_second": 77.512,
"eval_steps_per_second": 9.702,
"step": 642
},
{
"epoch": 3.0,
"grad_norm": 2.5204238891601562,
"learning_rate": 9.985334621908699e-05,
"loss": 0.3512,
"step": 963
},
{
"epoch": 3.0,
"eval_accuracy": 0.8394590846047156,
"eval_f1": 0.830758624078281,
"eval_loss": 0.4735751152038574,
"eval_precision": 0.8318317467279469,
"eval_recall": 0.8394590846047156,
"eval_runtime": 35.8524,
"eval_samples_per_second": 80.441,
"eval_steps_per_second": 10.069,
"step": 963
},
{
"epoch": 4.0,
"grad_norm": 5.594597816467285,
"learning_rate": 9.894936461151184e-05,
"loss": 0.2184,
"step": 1284
},
{
"epoch": 4.0,
"eval_accuracy": 0.8567961165048543,
"eval_f1": 0.85051019948,
"eval_loss": 0.4796653389930725,
"eval_precision": 0.8536361493542505,
"eval_recall": 0.8567961165048543,
"eval_runtime": 35.3028,
"eval_samples_per_second": 81.693,
"eval_steps_per_second": 10.226,
"step": 1284
},
{
"epoch": 5.0,
"grad_norm": 9.165299415588379,
"learning_rate": 9.723506398349735e-05,
"loss": 0.1264,
"step": 1605
},
{
"epoch": 5.0,
"eval_accuracy": 0.8547156726768377,
"eval_f1": 0.8530254035056796,
"eval_loss": 0.6211732029914856,
"eval_precision": 0.8551837556766221,
"eval_recall": 0.8547156726768377,
"eval_runtime": 36.6331,
"eval_samples_per_second": 78.727,
"eval_steps_per_second": 9.854,
"step": 1605
},
{
"epoch": 6.0,
"grad_norm": 0.9397739768028259,
"learning_rate": 9.473882326123909e-05,
"loss": 0.0687,
"step": 1926
},
{
"epoch": 6.0,
"eval_accuracy": 0.8463938973647711,
"eval_f1": 0.840249522586874,
"eval_loss": 0.7659199237823486,
"eval_precision": 0.8475689441425499,
"eval_recall": 0.8463938973647711,
"eval_runtime": 35.8316,
"eval_samples_per_second": 80.488,
"eval_steps_per_second": 10.075,
"step": 1926
},
{
"epoch": 7.0,
"grad_norm": 11.412993431091309,
"learning_rate": 9.15019657867844e-05,
"loss": 0.0463,
"step": 2247
},
{
"epoch": 7.0,
"eval_accuracy": 0.8519417475728155,
"eval_f1": 0.84690540461018,
"eval_loss": 0.8237490057945251,
"eval_precision": 0.8546320390871954,
"eval_recall": 0.8519417475728155,
"eval_runtime": 36.3237,
"eval_samples_per_second": 79.397,
"eval_steps_per_second": 9.938,
"step": 2247
},
{
"epoch": 8.0,
"grad_norm": 0.15119314193725586,
"learning_rate": 8.759130166350091e-05,
"loss": 0.0373,
"step": 2568
},
{
"epoch": 8.0,
"eval_accuracy": 0.8377253814147018,
"eval_f1": 0.8414971604167042,
"eval_loss": 0.871150553226471,
"eval_precision": 0.8492780112281874,
"eval_recall": 0.8377253814147018,
"eval_runtime": 37.0971,
"eval_samples_per_second": 77.742,
"eval_steps_per_second": 9.731,
"step": 2568
},
{
"epoch": 9.0,
"grad_norm": 0.9838098883628845,
"learning_rate": 8.304716115113689e-05,
"loss": 0.0347,
"step": 2889
},
{
"epoch": 9.0,
"eval_accuracy": 0.8567961165048543,
"eval_f1": 0.8533749018674412,
"eval_loss": 0.8180708885192871,
"eval_precision": 0.8549859977362129,
"eval_recall": 0.8567961165048543,
"eval_runtime": 36.43,
"eval_samples_per_second": 79.166,
"eval_steps_per_second": 9.909,
"step": 2889
},
{
"epoch": 10.0,
"grad_norm": 2.4911880493164062,
"learning_rate": 7.795595034552552e-05,
"loss": 0.0263,
"step": 3210
},
{
"epoch": 10.0,
"eval_accuracy": 0.8446601941747572,
"eval_f1": 0.8308350673322552,
"eval_loss": 1.0705382823944092,
"eval_precision": 0.8388632159592988,
"eval_recall": 0.8446601941747572,
"eval_runtime": 36.2711,
"eval_samples_per_second": 79.512,
"eval_steps_per_second": 9.953,
"step": 3210
},
{
"epoch": 11.0,
"grad_norm": 0.003689270233735442,
"learning_rate": 7.240195031927308e-05,
"loss": 0.0289,
"step": 3531
},
{
"epoch": 11.0,
"eval_accuracy": 0.858876560332871,
"eval_f1": 0.855018220816544,
"eval_loss": 0.9376017451286316,
"eval_precision": 0.8605983316828895,
"eval_recall": 0.858876560332871,
"eval_runtime": 36.7783,
"eval_samples_per_second": 78.416,
"eval_steps_per_second": 9.816,
"step": 3531
},
{
"epoch": 12.0,
"grad_norm": 0.08966358751058578,
"learning_rate": 6.647710326399964e-05,
"loss": 0.0164,
"step": 3852
},
{
"epoch": 12.0,
"eval_accuracy": 0.863384188626907,
"eval_f1": 0.8610588511525862,
"eval_loss": 0.9714025259017944,
"eval_precision": 0.8611342448885915,
"eval_recall": 0.863384188626907,
"eval_runtime": 36.4831,
"eval_samples_per_second": 79.05,
"eval_steps_per_second": 9.895,
"step": 3852
},
{
"epoch": 13.0,
"grad_norm": 0.05049363151192665,
"learning_rate": 6.027949045818934e-05,
"loss": 0.0077,
"step": 4173
},
{
"epoch": 13.0,
"eval_accuracy": 0.8398058252427184,
"eval_f1": 0.8242916893123671,
"eval_loss": 1.2992373704910278,
"eval_precision": 0.8395816522197255,
"eval_recall": 0.8398058252427184,
"eval_runtime": 37.0798,
"eval_samples_per_second": 77.778,
"eval_steps_per_second": 9.736,
"step": 4173
},
{
"epoch": 13.0,
"step": 4173,
"total_flos": 5.166157498470679e+18,
"train_loss": 0.1749291451406399,
"train_runtime": 1863.5469,
"train_samples_per_second": 275.174,
"train_steps_per_second": 17.225
}
],
"logging_steps": 500,
"max_steps": 32100,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 500,
"total_flos": 5.166157498470679e+18,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}