vit-base-patch16-224-ve-Ub / trainer_state.json
Augusto777's picture
End of training
f60cd36 verified
{
"best_metric": 0.7254901960784313,
"best_model_checkpoint": "vit-base-patch16-224-ve-Ub\\checkpoint-63",
"epoch": 45.714285714285715,
"eval_steps": 500,
"global_step": 80,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.57,
"eval_accuracy": 0.09803921568627451,
"eval_loss": 1.3862946033477783,
"eval_runtime": 0.7706,
"eval_samples_per_second": 66.182,
"eval_steps_per_second": 2.595,
"step": 1
},
{
"epoch": 1.71,
"eval_accuracy": 0.47058823529411764,
"eval_loss": 1.381344199180603,
"eval_runtime": 0.7881,
"eval_samples_per_second": 64.714,
"eval_steps_per_second": 2.538,
"step": 3
},
{
"epoch": 2.86,
"eval_accuracy": 0.47058823529411764,
"eval_loss": 1.368581771850586,
"eval_runtime": 0.7663,
"eval_samples_per_second": 66.551,
"eval_steps_per_second": 2.61,
"step": 5
},
{
"epoch": 4.0,
"eval_accuracy": 0.47058823529411764,
"eval_loss": 1.347954273223877,
"eval_runtime": 0.7685,
"eval_samples_per_second": 66.363,
"eval_steps_per_second": 2.602,
"step": 7
},
{
"epoch": 4.57,
"eval_accuracy": 0.47058823529411764,
"eval_loss": 1.3345353603363037,
"eval_runtime": 0.7796,
"eval_samples_per_second": 65.42,
"eval_steps_per_second": 2.565,
"step": 8
},
{
"epoch": 5.71,
"learning_rate": 4.8611111111111115e-05,
"loss": 1.3658,
"step": 10
},
{
"epoch": 5.71,
"eval_accuracy": 0.47058823529411764,
"eval_loss": 1.304049015045166,
"eval_runtime": 0.7926,
"eval_samples_per_second": 64.342,
"eval_steps_per_second": 2.523,
"step": 10
},
{
"epoch": 6.86,
"eval_accuracy": 0.47058823529411764,
"eval_loss": 1.2754029035568237,
"eval_runtime": 0.8003,
"eval_samples_per_second": 63.726,
"eval_steps_per_second": 2.499,
"step": 12
},
{
"epoch": 8.0,
"eval_accuracy": 0.49019607843137253,
"eval_loss": 1.2477165460586548,
"eval_runtime": 0.7776,
"eval_samples_per_second": 65.59,
"eval_steps_per_second": 2.572,
"step": 14
},
{
"epoch": 8.57,
"eval_accuracy": 0.5294117647058824,
"eval_loss": 1.2347149848937988,
"eval_runtime": 0.762,
"eval_samples_per_second": 66.926,
"eval_steps_per_second": 2.625,
"step": 15
},
{
"epoch": 9.71,
"eval_accuracy": 0.5490196078431373,
"eval_loss": 1.2109137773513794,
"eval_runtime": 0.7942,
"eval_samples_per_second": 64.215,
"eval_steps_per_second": 2.518,
"step": 17
},
{
"epoch": 10.86,
"eval_accuracy": 0.6078431372549019,
"eval_loss": 1.1888847351074219,
"eval_runtime": 0.7771,
"eval_samples_per_second": 65.629,
"eval_steps_per_second": 2.574,
"step": 19
},
{
"epoch": 11.43,
"learning_rate": 4.166666666666667e-05,
"loss": 1.2512,
"step": 20
},
{
"epoch": 12.0,
"eval_accuracy": 0.6274509803921569,
"eval_loss": 1.1671414375305176,
"eval_runtime": 0.8252,
"eval_samples_per_second": 61.806,
"eval_steps_per_second": 2.424,
"step": 21
},
{
"epoch": 12.57,
"eval_accuracy": 0.6078431372549019,
"eval_loss": 1.1560198068618774,
"eval_runtime": 0.8006,
"eval_samples_per_second": 63.699,
"eval_steps_per_second": 2.498,
"step": 22
},
{
"epoch": 13.71,
"eval_accuracy": 0.6470588235294118,
"eval_loss": 1.1310560703277588,
"eval_runtime": 0.8047,
"eval_samples_per_second": 63.376,
"eval_steps_per_second": 2.485,
"step": 24
},
{
"epoch": 14.86,
"eval_accuracy": 0.6274509803921569,
"eval_loss": 1.1128166913986206,
"eval_runtime": 0.7724,
"eval_samples_per_second": 66.031,
"eval_steps_per_second": 2.589,
"step": 26
},
{
"epoch": 16.0,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.0873538255691528,
"eval_runtime": 0.7769,
"eval_samples_per_second": 65.648,
"eval_steps_per_second": 2.574,
"step": 28
},
{
"epoch": 16.57,
"eval_accuracy": 0.6862745098039216,
"eval_loss": 1.0828280448913574,
"eval_runtime": 0.7846,
"eval_samples_per_second": 64.999,
"eval_steps_per_second": 2.549,
"step": 29
},
{
"epoch": 17.14,
"learning_rate": 3.472222222222222e-05,
"loss": 1.1299,
"step": 30
},
{
"epoch": 17.71,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.0585854053497314,
"eval_runtime": 0.7897,
"eval_samples_per_second": 64.579,
"eval_steps_per_second": 2.533,
"step": 31
},
{
"epoch": 18.86,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.0361896753311157,
"eval_runtime": 0.7852,
"eval_samples_per_second": 64.955,
"eval_steps_per_second": 2.547,
"step": 33
},
{
"epoch": 20.0,
"eval_accuracy": 0.6862745098039216,
"eval_loss": 1.0172666311264038,
"eval_runtime": 0.7638,
"eval_samples_per_second": 66.769,
"eval_steps_per_second": 2.618,
"step": 35
},
{
"epoch": 20.57,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.0064767599105835,
"eval_runtime": 0.7647,
"eval_samples_per_second": 66.691,
"eval_steps_per_second": 2.615,
"step": 36
},
{
"epoch": 21.71,
"eval_accuracy": 0.6470588235294118,
"eval_loss": 1.007025957107544,
"eval_runtime": 0.8066,
"eval_samples_per_second": 63.232,
"eval_steps_per_second": 2.48,
"step": 38
},
{
"epoch": 22.86,
"learning_rate": 2.777777777777778e-05,
"loss": 1.0212,
"step": 40
},
{
"epoch": 22.86,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.9791509509086609,
"eval_runtime": 0.775,
"eval_samples_per_second": 65.808,
"eval_steps_per_second": 2.581,
"step": 40
},
{
"epoch": 24.0,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.9611671566963196,
"eval_runtime": 0.7739,
"eval_samples_per_second": 65.903,
"eval_steps_per_second": 2.584,
"step": 42
},
{
"epoch": 24.57,
"eval_accuracy": 0.6470588235294118,
"eval_loss": 0.9584113955497742,
"eval_runtime": 0.79,
"eval_samples_per_second": 64.559,
"eval_steps_per_second": 2.532,
"step": 43
},
{
"epoch": 25.71,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.9494024515151978,
"eval_runtime": 0.7674,
"eval_samples_per_second": 66.461,
"eval_steps_per_second": 2.606,
"step": 45
},
{
"epoch": 26.86,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.9293842911720276,
"eval_runtime": 0.8021,
"eval_samples_per_second": 63.584,
"eval_steps_per_second": 2.493,
"step": 47
},
{
"epoch": 28.0,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.919573962688446,
"eval_runtime": 0.8029,
"eval_samples_per_second": 63.516,
"eval_steps_per_second": 2.491,
"step": 49
},
{
"epoch": 28.57,
"learning_rate": 2.0833333333333336e-05,
"loss": 0.9222,
"step": 50
},
{
"epoch": 28.57,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.9100386500358582,
"eval_runtime": 0.7579,
"eval_samples_per_second": 67.287,
"eval_steps_per_second": 2.639,
"step": 50
},
{
"epoch": 29.71,
"eval_accuracy": 0.6862745098039216,
"eval_loss": 0.9060911536216736,
"eval_runtime": 0.7886,
"eval_samples_per_second": 64.672,
"eval_steps_per_second": 2.536,
"step": 52
},
{
"epoch": 30.86,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8903732895851135,
"eval_runtime": 0.8095,
"eval_samples_per_second": 63.005,
"eval_steps_per_second": 2.471,
"step": 54
},
{
"epoch": 32.0,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8797234892845154,
"eval_runtime": 0.7783,
"eval_samples_per_second": 65.531,
"eval_steps_per_second": 2.57,
"step": 56
},
{
"epoch": 32.57,
"eval_accuracy": 0.6862745098039216,
"eval_loss": 0.8747111558914185,
"eval_runtime": 0.8011,
"eval_samples_per_second": 63.663,
"eval_steps_per_second": 2.497,
"step": 57
},
{
"epoch": 33.71,
"eval_accuracy": 0.6862745098039216,
"eval_loss": 0.8690942525863647,
"eval_runtime": 0.7795,
"eval_samples_per_second": 65.424,
"eval_steps_per_second": 2.566,
"step": 59
},
{
"epoch": 34.29,
"learning_rate": 1.388888888888889e-05,
"loss": 0.8419,
"step": 60
},
{
"epoch": 34.86,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8549560904502869,
"eval_runtime": 0.854,
"eval_samples_per_second": 59.715,
"eval_steps_per_second": 2.342,
"step": 61
},
{
"epoch": 36.0,
"eval_accuracy": 0.7254901960784313,
"eval_loss": 0.8469617366790771,
"eval_runtime": 0.7796,
"eval_samples_per_second": 65.415,
"eval_steps_per_second": 2.565,
"step": 63
},
{
"epoch": 36.57,
"eval_accuracy": 0.7254901960784313,
"eval_loss": 0.8430066704750061,
"eval_runtime": 0.7729,
"eval_samples_per_second": 65.989,
"eval_steps_per_second": 2.588,
"step": 64
},
{
"epoch": 37.71,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8388990759849548,
"eval_runtime": 0.8337,
"eval_samples_per_second": 61.174,
"eval_steps_per_second": 2.399,
"step": 66
},
{
"epoch": 38.86,
"eval_accuracy": 0.7254901960784313,
"eval_loss": 0.8297919631004333,
"eval_runtime": 0.7574,
"eval_samples_per_second": 67.332,
"eval_steps_per_second": 2.64,
"step": 68
},
{
"epoch": 40.0,
"learning_rate": 6.944444444444445e-06,
"loss": 0.7865,
"step": 70
},
{
"epoch": 40.0,
"eval_accuracy": 0.7254901960784313,
"eval_loss": 0.826995849609375,
"eval_runtime": 0.7696,
"eval_samples_per_second": 66.265,
"eval_steps_per_second": 2.599,
"step": 70
},
{
"epoch": 40.57,
"eval_accuracy": 0.7254901960784313,
"eval_loss": 0.8258256912231445,
"eval_runtime": 0.7911,
"eval_samples_per_second": 64.471,
"eval_steps_per_second": 2.528,
"step": 71
},
{
"epoch": 41.71,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8234664797782898,
"eval_runtime": 0.7812,
"eval_samples_per_second": 65.285,
"eval_steps_per_second": 2.56,
"step": 73
},
{
"epoch": 42.86,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8210938572883606,
"eval_runtime": 0.7884,
"eval_samples_per_second": 64.687,
"eval_steps_per_second": 2.537,
"step": 75
},
{
"epoch": 44.0,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8188748359680176,
"eval_runtime": 0.7819,
"eval_samples_per_second": 65.225,
"eval_steps_per_second": 2.558,
"step": 77
},
{
"epoch": 44.57,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.8188669681549072,
"eval_runtime": 0.783,
"eval_samples_per_second": 65.134,
"eval_steps_per_second": 2.554,
"step": 78
},
{
"epoch": 45.71,
"learning_rate": 0.0,
"loss": 0.7555,
"step": 80
},
{
"epoch": 45.71,
"eval_accuracy": 0.7058823529411765,
"eval_loss": 0.81866455078125,
"eval_runtime": 0.7693,
"eval_samples_per_second": 66.291,
"eval_steps_per_second": 2.6,
"step": 80
},
{
"epoch": 45.71,
"step": 80,
"total_flos": 7.203009548954419e+17,
"train_loss": 1.0092584133148192,
"train_runtime": 365.7142,
"train_samples_per_second": 44.406,
"train_steps_per_second": 0.219
}
],
"logging_steps": 10,
"max_steps": 80,
"num_input_tokens_seen": 0,
"num_train_epochs": 80,
"save_steps": 500,
"total_flos": 7.203009548954419e+17,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}