DinoV2_grading_Drawing / trainer_state.json
alicelouis's picture
Upload 8 files
5c33a33 verified
raw
history blame
13 kB
{
"best_metric": 0.6914285714285714,
"best_model_checkpoint": "dinov2-base-finetuned-eurosat/checkpoint-308",
"epoch": 30.0,
"eval_steps": 500,
"global_step": 330,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.91,
"grad_norm": 71.39833068847656,
"learning_rate": 1.5151515151515153e-05,
"loss": 6.646,
"step": 10
},
{
"epoch": 1.0,
"eval_accuracy": 0.004285714285714286,
"eval_loss": 6.344563961029053,
"eval_runtime": 12.2748,
"eval_samples_per_second": 57.027,
"eval_steps_per_second": 0.896,
"step": 11
},
{
"epoch": 1.82,
"grad_norm": 36.275108337402344,
"learning_rate": 3.0303030303030306e-05,
"loss": 6.0586,
"step": 20
},
{
"epoch": 2.0,
"eval_accuracy": 0.037142857142857144,
"eval_loss": 5.812839031219482,
"eval_runtime": 11.6948,
"eval_samples_per_second": 59.856,
"eval_steps_per_second": 0.941,
"step": 22
},
{
"epoch": 2.73,
"grad_norm": 78.4278564453125,
"learning_rate": 4.545454545454546e-05,
"loss": 4.9553,
"step": 30
},
{
"epoch": 3.0,
"eval_accuracy": 0.24285714285714285,
"eval_loss": 4.52340030670166,
"eval_runtime": 11.5613,
"eval_samples_per_second": 60.547,
"eval_steps_per_second": 0.951,
"step": 33
},
{
"epoch": 3.64,
"grad_norm": 94.20513153076172,
"learning_rate": 4.882154882154882e-05,
"loss": 3.2097,
"step": 40
},
{
"epoch": 4.0,
"eval_accuracy": 0.48428571428571426,
"eval_loss": 3.1874964237213135,
"eval_runtime": 11.6294,
"eval_samples_per_second": 60.192,
"eval_steps_per_second": 0.946,
"step": 44
},
{
"epoch": 4.55,
"grad_norm": 55.16205596923828,
"learning_rate": 4.713804713804714e-05,
"loss": 1.6208,
"step": 50
},
{
"epoch": 5.0,
"eval_accuracy": 0.5957142857142858,
"eval_loss": 2.3652451038360596,
"eval_runtime": 11.6572,
"eval_samples_per_second": 60.048,
"eval_steps_per_second": 0.944,
"step": 55
},
{
"epoch": 5.45,
"grad_norm": 28.252750396728516,
"learning_rate": 4.545454545454546e-05,
"loss": 0.7822,
"step": 60
},
{
"epoch": 6.0,
"eval_accuracy": 0.6485714285714286,
"eval_loss": 2.007438898086548,
"eval_runtime": 11.7326,
"eval_samples_per_second": 59.663,
"eval_steps_per_second": 0.938,
"step": 66
},
{
"epoch": 6.36,
"grad_norm": 17.972673416137695,
"learning_rate": 4.3771043771043774e-05,
"loss": 0.3699,
"step": 70
},
{
"epoch": 7.0,
"eval_accuracy": 0.66,
"eval_loss": 1.9254851341247559,
"eval_runtime": 11.7512,
"eval_samples_per_second": 59.569,
"eval_steps_per_second": 0.936,
"step": 77
},
{
"epoch": 7.27,
"grad_norm": 21.875259399414062,
"learning_rate": 4.208754208754209e-05,
"loss": 0.1745,
"step": 80
},
{
"epoch": 8.0,
"eval_accuracy": 0.6557142857142857,
"eval_loss": 1.865968942642212,
"eval_runtime": 11.609,
"eval_samples_per_second": 60.298,
"eval_steps_per_second": 0.948,
"step": 88
},
{
"epoch": 8.18,
"grad_norm": 13.34464168548584,
"learning_rate": 4.0404040404040405e-05,
"loss": 0.1285,
"step": 90
},
{
"epoch": 9.0,
"eval_accuracy": 0.6571428571428571,
"eval_loss": 1.8786249160766602,
"eval_runtime": 11.6967,
"eval_samples_per_second": 59.846,
"eval_steps_per_second": 0.94,
"step": 99
},
{
"epoch": 9.09,
"grad_norm": 11.297475814819336,
"learning_rate": 3.872053872053872e-05,
"loss": 0.1178,
"step": 100
},
{
"epoch": 10.0,
"grad_norm": 16.153575897216797,
"learning_rate": 3.7037037037037037e-05,
"loss": 0.0883,
"step": 110
},
{
"epoch": 10.0,
"eval_accuracy": 0.6585714285714286,
"eval_loss": 1.8617857694625854,
"eval_runtime": 11.6432,
"eval_samples_per_second": 60.121,
"eval_steps_per_second": 0.945,
"step": 110
},
{
"epoch": 10.91,
"grad_norm": 13.22706127166748,
"learning_rate": 3.535353535353535e-05,
"loss": 0.0721,
"step": 120
},
{
"epoch": 11.0,
"eval_accuracy": 0.6514285714285715,
"eval_loss": 1.9431724548339844,
"eval_runtime": 12.6039,
"eval_samples_per_second": 55.538,
"eval_steps_per_second": 0.873,
"step": 121
},
{
"epoch": 11.82,
"grad_norm": 8.195013046264648,
"learning_rate": 3.3670033670033675e-05,
"loss": 0.0693,
"step": 130
},
{
"epoch": 12.0,
"eval_accuracy": 0.6642857142857143,
"eval_loss": 1.873042345046997,
"eval_runtime": 11.7524,
"eval_samples_per_second": 59.562,
"eval_steps_per_second": 0.936,
"step": 132
},
{
"epoch": 12.73,
"grad_norm": 9.13159465789795,
"learning_rate": 3.198653198653199e-05,
"loss": 0.0901,
"step": 140
},
{
"epoch": 13.0,
"eval_accuracy": 0.6557142857142857,
"eval_loss": 1.8676621913909912,
"eval_runtime": 11.7011,
"eval_samples_per_second": 59.823,
"eval_steps_per_second": 0.94,
"step": 143
},
{
"epoch": 13.64,
"grad_norm": 5.170494556427002,
"learning_rate": 3.0303030303030306e-05,
"loss": 0.0608,
"step": 150
},
{
"epoch": 14.0,
"eval_accuracy": 0.6757142857142857,
"eval_loss": 1.846497654914856,
"eval_runtime": 12.5066,
"eval_samples_per_second": 55.971,
"eval_steps_per_second": 0.88,
"step": 154
},
{
"epoch": 14.55,
"grad_norm": 4.774472713470459,
"learning_rate": 2.8619528619528618e-05,
"loss": 0.0443,
"step": 160
},
{
"epoch": 15.0,
"eval_accuracy": 0.6642857142857143,
"eval_loss": 1.8421980142593384,
"eval_runtime": 11.808,
"eval_samples_per_second": 59.282,
"eval_steps_per_second": 0.932,
"step": 165
},
{
"epoch": 15.45,
"grad_norm": 2.623682737350464,
"learning_rate": 2.6936026936026937e-05,
"loss": 0.0552,
"step": 170
},
{
"epoch": 16.0,
"eval_accuracy": 0.6585714285714286,
"eval_loss": 1.9717400074005127,
"eval_runtime": 11.7743,
"eval_samples_per_second": 59.451,
"eval_steps_per_second": 0.934,
"step": 176
},
{
"epoch": 16.36,
"grad_norm": 3.4440066814422607,
"learning_rate": 2.5252525252525256e-05,
"loss": 0.0416,
"step": 180
},
{
"epoch": 17.0,
"eval_accuracy": 0.6657142857142857,
"eval_loss": 1.8076777458190918,
"eval_runtime": 11.7226,
"eval_samples_per_second": 59.714,
"eval_steps_per_second": 0.938,
"step": 187
},
{
"epoch": 17.27,
"grad_norm": 8.230661392211914,
"learning_rate": 2.356902356902357e-05,
"loss": 0.0366,
"step": 190
},
{
"epoch": 18.0,
"eval_accuracy": 0.6742857142857143,
"eval_loss": 1.8198397159576416,
"eval_runtime": 11.6594,
"eval_samples_per_second": 60.037,
"eval_steps_per_second": 0.943,
"step": 198
},
{
"epoch": 18.18,
"grad_norm": 3.6574606895446777,
"learning_rate": 2.1885521885521887e-05,
"loss": 0.0313,
"step": 200
},
{
"epoch": 19.0,
"eval_accuracy": 0.6757142857142857,
"eval_loss": 1.8081269264221191,
"eval_runtime": 11.957,
"eval_samples_per_second": 58.543,
"eval_steps_per_second": 0.92,
"step": 209
},
{
"epoch": 19.09,
"grad_norm": 4.515919208526611,
"learning_rate": 2.0202020202020203e-05,
"loss": 0.0272,
"step": 210
},
{
"epoch": 20.0,
"grad_norm": 4.542725086212158,
"learning_rate": 1.8518518518518518e-05,
"loss": 0.0296,
"step": 220
},
{
"epoch": 20.0,
"eval_accuracy": 0.6785714285714286,
"eval_loss": 1.776505947113037,
"eval_runtime": 11.5903,
"eval_samples_per_second": 60.395,
"eval_steps_per_second": 0.949,
"step": 220
},
{
"epoch": 20.91,
"grad_norm": 2.6347365379333496,
"learning_rate": 1.6835016835016837e-05,
"loss": 0.0215,
"step": 230
},
{
"epoch": 21.0,
"eval_accuracy": 0.6828571428571428,
"eval_loss": 1.6916331052780151,
"eval_runtime": 13.3341,
"eval_samples_per_second": 52.497,
"eval_steps_per_second": 0.825,
"step": 231
},
{
"epoch": 21.82,
"grad_norm": 0.4444705545902252,
"learning_rate": 1.5151515151515153e-05,
"loss": 0.0144,
"step": 240
},
{
"epoch": 22.0,
"eval_accuracy": 0.6857142857142857,
"eval_loss": 1.7237095832824707,
"eval_runtime": 11.4907,
"eval_samples_per_second": 60.919,
"eval_steps_per_second": 0.957,
"step": 242
},
{
"epoch": 22.73,
"grad_norm": 4.013304710388184,
"learning_rate": 1.3468013468013468e-05,
"loss": 0.0108,
"step": 250
},
{
"epoch": 23.0,
"eval_accuracy": 0.67,
"eval_loss": 1.792176365852356,
"eval_runtime": 11.5859,
"eval_samples_per_second": 60.418,
"eval_steps_per_second": 0.949,
"step": 253
},
{
"epoch": 23.64,
"grad_norm": 0.9613437056541443,
"learning_rate": 1.1784511784511786e-05,
"loss": 0.0232,
"step": 260
},
{
"epoch": 24.0,
"eval_accuracy": 0.6828571428571428,
"eval_loss": 1.7594307661056519,
"eval_runtime": 12.8499,
"eval_samples_per_second": 54.475,
"eval_steps_per_second": 0.856,
"step": 264
},
{
"epoch": 24.55,
"grad_norm": 2.5503318309783936,
"learning_rate": 1.0101010101010101e-05,
"loss": 0.0129,
"step": 270
},
{
"epoch": 25.0,
"eval_accuracy": 0.6828571428571428,
"eval_loss": 1.7361136674880981,
"eval_runtime": 11.7158,
"eval_samples_per_second": 59.749,
"eval_steps_per_second": 0.939,
"step": 275
},
{
"epoch": 25.45,
"grad_norm": 5.675755977630615,
"learning_rate": 8.417508417508419e-06,
"loss": 0.0093,
"step": 280
},
{
"epoch": 26.0,
"eval_accuracy": 0.6828571428571428,
"eval_loss": 1.7426681518554688,
"eval_runtime": 12.593,
"eval_samples_per_second": 55.586,
"eval_steps_per_second": 0.873,
"step": 286
},
{
"epoch": 26.36,
"grad_norm": 2.090123176574707,
"learning_rate": 6.734006734006734e-06,
"loss": 0.0067,
"step": 290
},
{
"epoch": 27.0,
"eval_accuracy": 0.69,
"eval_loss": 1.730440378189087,
"eval_runtime": 11.8655,
"eval_samples_per_second": 58.995,
"eval_steps_per_second": 0.927,
"step": 297
},
{
"epoch": 27.27,
"grad_norm": 0.6074270009994507,
"learning_rate": 5.050505050505051e-06,
"loss": 0.0013,
"step": 300
},
{
"epoch": 28.0,
"eval_accuracy": 0.6914285714285714,
"eval_loss": 1.726584792137146,
"eval_runtime": 11.8751,
"eval_samples_per_second": 58.947,
"eval_steps_per_second": 0.926,
"step": 308
},
{
"epoch": 28.18,
"grad_norm": 0.04077678918838501,
"learning_rate": 3.367003367003367e-06,
"loss": 0.0031,
"step": 310
},
{
"epoch": 29.0,
"eval_accuracy": 0.69,
"eval_loss": 1.7368921041488647,
"eval_runtime": 11.7621,
"eval_samples_per_second": 59.513,
"eval_steps_per_second": 0.935,
"step": 319
},
{
"epoch": 29.09,
"grad_norm": 0.32179221510887146,
"learning_rate": 1.6835016835016836e-06,
"loss": 0.002,
"step": 320
},
{
"epoch": 30.0,
"grad_norm": 0.05003494769334793,
"learning_rate": 0.0,
"loss": 0.0019,
"step": 330
},
{
"epoch": 30.0,
"eval_accuracy": 0.69,
"eval_loss": 1.7391921281814575,
"eval_runtime": 12.8063,
"eval_samples_per_second": 54.661,
"eval_steps_per_second": 0.859,
"step": 330
}
],
"logging_steps": 10,
"max_steps": 330,
"num_input_tokens_seen": 0,
"num_train_epochs": 30,
"save_steps": 500,
"total_flos": 5.2828663104e+18,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}