|
{ |
|
"best_metric": 0.6914285714285714, |
|
"best_model_checkpoint": "dinov2-base-finetuned-eurosat/checkpoint-308", |
|
"epoch": 30.0, |
|
"eval_steps": 500, |
|
"global_step": 330, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 71.39833068847656, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 6.646, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.004285714285714286, |
|
"eval_loss": 6.344563961029053, |
|
"eval_runtime": 12.2748, |
|
"eval_samples_per_second": 57.027, |
|
"eval_steps_per_second": 0.896, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 36.275108337402344, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 6.0586, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.037142857142857144, |
|
"eval_loss": 5.812839031219482, |
|
"eval_runtime": 11.6948, |
|
"eval_samples_per_second": 59.856, |
|
"eval_steps_per_second": 0.941, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 78.4278564453125, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 4.9553, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.24285714285714285, |
|
"eval_loss": 4.52340030670166, |
|
"eval_runtime": 11.5613, |
|
"eval_samples_per_second": 60.547, |
|
"eval_steps_per_second": 0.951, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 94.20513153076172, |
|
"learning_rate": 4.882154882154882e-05, |
|
"loss": 3.2097, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.48428571428571426, |
|
"eval_loss": 3.1874964237213135, |
|
"eval_runtime": 11.6294, |
|
"eval_samples_per_second": 60.192, |
|
"eval_steps_per_second": 0.946, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"grad_norm": 55.16205596923828, |
|
"learning_rate": 4.713804713804714e-05, |
|
"loss": 1.6208, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5957142857142858, |
|
"eval_loss": 2.3652451038360596, |
|
"eval_runtime": 11.6572, |
|
"eval_samples_per_second": 60.048, |
|
"eval_steps_per_second": 0.944, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"grad_norm": 28.252750396728516, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.7822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.6485714285714286, |
|
"eval_loss": 2.007438898086548, |
|
"eval_runtime": 11.7326, |
|
"eval_samples_per_second": 59.663, |
|
"eval_steps_per_second": 0.938, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"grad_norm": 17.972673416137695, |
|
"learning_rate": 4.3771043771043774e-05, |
|
"loss": 0.3699, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.66, |
|
"eval_loss": 1.9254851341247559, |
|
"eval_runtime": 11.7512, |
|
"eval_samples_per_second": 59.569, |
|
"eval_steps_per_second": 0.936, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 7.27, |
|
"grad_norm": 21.875259399414062, |
|
"learning_rate": 4.208754208754209e-05, |
|
"loss": 0.1745, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.6557142857142857, |
|
"eval_loss": 1.865968942642212, |
|
"eval_runtime": 11.609, |
|
"eval_samples_per_second": 60.298, |
|
"eval_steps_per_second": 0.948, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"grad_norm": 13.34464168548584, |
|
"learning_rate": 4.0404040404040405e-05, |
|
"loss": 0.1285, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.6571428571428571, |
|
"eval_loss": 1.8786249160766602, |
|
"eval_runtime": 11.6967, |
|
"eval_samples_per_second": 59.846, |
|
"eval_steps_per_second": 0.94, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"grad_norm": 11.297475814819336, |
|
"learning_rate": 3.872053872053872e-05, |
|
"loss": 0.1178, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 16.153575897216797, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.0883, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.6585714285714286, |
|
"eval_loss": 1.8617857694625854, |
|
"eval_runtime": 11.6432, |
|
"eval_samples_per_second": 60.121, |
|
"eval_steps_per_second": 0.945, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 10.91, |
|
"grad_norm": 13.22706127166748, |
|
"learning_rate": 3.535353535353535e-05, |
|
"loss": 0.0721, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.6514285714285715, |
|
"eval_loss": 1.9431724548339844, |
|
"eval_runtime": 12.6039, |
|
"eval_samples_per_second": 55.538, |
|
"eval_steps_per_second": 0.873, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 11.82, |
|
"grad_norm": 8.195013046264648, |
|
"learning_rate": 3.3670033670033675e-05, |
|
"loss": 0.0693, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.6642857142857143, |
|
"eval_loss": 1.873042345046997, |
|
"eval_runtime": 11.7524, |
|
"eval_samples_per_second": 59.562, |
|
"eval_steps_per_second": 0.936, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 12.73, |
|
"grad_norm": 9.13159465789795, |
|
"learning_rate": 3.198653198653199e-05, |
|
"loss": 0.0901, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.6557142857142857, |
|
"eval_loss": 1.8676621913909912, |
|
"eval_runtime": 11.7011, |
|
"eval_samples_per_second": 59.823, |
|
"eval_steps_per_second": 0.94, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 13.64, |
|
"grad_norm": 5.170494556427002, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 0.0608, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.6757142857142857, |
|
"eval_loss": 1.846497654914856, |
|
"eval_runtime": 12.5066, |
|
"eval_samples_per_second": 55.971, |
|
"eval_steps_per_second": 0.88, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 14.55, |
|
"grad_norm": 4.774472713470459, |
|
"learning_rate": 2.8619528619528618e-05, |
|
"loss": 0.0443, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.6642857142857143, |
|
"eval_loss": 1.8421980142593384, |
|
"eval_runtime": 11.808, |
|
"eval_samples_per_second": 59.282, |
|
"eval_steps_per_second": 0.932, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 15.45, |
|
"grad_norm": 2.623682737350464, |
|
"learning_rate": 2.6936026936026937e-05, |
|
"loss": 0.0552, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6585714285714286, |
|
"eval_loss": 1.9717400074005127, |
|
"eval_runtime": 11.7743, |
|
"eval_samples_per_second": 59.451, |
|
"eval_steps_per_second": 0.934, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 16.36, |
|
"grad_norm": 3.4440066814422607, |
|
"learning_rate": 2.5252525252525256e-05, |
|
"loss": 0.0416, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.6657142857142857, |
|
"eval_loss": 1.8076777458190918, |
|
"eval_runtime": 11.7226, |
|
"eval_samples_per_second": 59.714, |
|
"eval_steps_per_second": 0.938, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 17.27, |
|
"grad_norm": 8.230661392211914, |
|
"learning_rate": 2.356902356902357e-05, |
|
"loss": 0.0366, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.6742857142857143, |
|
"eval_loss": 1.8198397159576416, |
|
"eval_runtime": 11.6594, |
|
"eval_samples_per_second": 60.037, |
|
"eval_steps_per_second": 0.943, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 18.18, |
|
"grad_norm": 3.6574606895446777, |
|
"learning_rate": 2.1885521885521887e-05, |
|
"loss": 0.0313, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.6757142857142857, |
|
"eval_loss": 1.8081269264221191, |
|
"eval_runtime": 11.957, |
|
"eval_samples_per_second": 58.543, |
|
"eval_steps_per_second": 0.92, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 19.09, |
|
"grad_norm": 4.515919208526611, |
|
"learning_rate": 2.0202020202020203e-05, |
|
"loss": 0.0272, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"grad_norm": 4.542725086212158, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.0296, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.6785714285714286, |
|
"eval_loss": 1.776505947113037, |
|
"eval_runtime": 11.5903, |
|
"eval_samples_per_second": 60.395, |
|
"eval_steps_per_second": 0.949, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 20.91, |
|
"grad_norm": 2.6347365379333496, |
|
"learning_rate": 1.6835016835016837e-05, |
|
"loss": 0.0215, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.6828571428571428, |
|
"eval_loss": 1.6916331052780151, |
|
"eval_runtime": 13.3341, |
|
"eval_samples_per_second": 52.497, |
|
"eval_steps_per_second": 0.825, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"grad_norm": 0.4444705545902252, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 0.0144, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.6857142857142857, |
|
"eval_loss": 1.7237095832824707, |
|
"eval_runtime": 11.4907, |
|
"eval_samples_per_second": 60.919, |
|
"eval_steps_per_second": 0.957, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 22.73, |
|
"grad_norm": 4.013304710388184, |
|
"learning_rate": 1.3468013468013468e-05, |
|
"loss": 0.0108, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.67, |
|
"eval_loss": 1.792176365852356, |
|
"eval_runtime": 11.5859, |
|
"eval_samples_per_second": 60.418, |
|
"eval_steps_per_second": 0.949, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 23.64, |
|
"grad_norm": 0.9613437056541443, |
|
"learning_rate": 1.1784511784511786e-05, |
|
"loss": 0.0232, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.6828571428571428, |
|
"eval_loss": 1.7594307661056519, |
|
"eval_runtime": 12.8499, |
|
"eval_samples_per_second": 54.475, |
|
"eval_steps_per_second": 0.856, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 24.55, |
|
"grad_norm": 2.5503318309783936, |
|
"learning_rate": 1.0101010101010101e-05, |
|
"loss": 0.0129, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.6828571428571428, |
|
"eval_loss": 1.7361136674880981, |
|
"eval_runtime": 11.7158, |
|
"eval_samples_per_second": 59.749, |
|
"eval_steps_per_second": 0.939, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 25.45, |
|
"grad_norm": 5.675755977630615, |
|
"learning_rate": 8.417508417508419e-06, |
|
"loss": 0.0093, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.6828571428571428, |
|
"eval_loss": 1.7426681518554688, |
|
"eval_runtime": 12.593, |
|
"eval_samples_per_second": 55.586, |
|
"eval_steps_per_second": 0.873, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 26.36, |
|
"grad_norm": 2.090123176574707, |
|
"learning_rate": 6.734006734006734e-06, |
|
"loss": 0.0067, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 1.730440378189087, |
|
"eval_runtime": 11.8655, |
|
"eval_samples_per_second": 58.995, |
|
"eval_steps_per_second": 0.927, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 27.27, |
|
"grad_norm": 0.6074270009994507, |
|
"learning_rate": 5.050505050505051e-06, |
|
"loss": 0.0013, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.6914285714285714, |
|
"eval_loss": 1.726584792137146, |
|
"eval_runtime": 11.8751, |
|
"eval_samples_per_second": 58.947, |
|
"eval_steps_per_second": 0.926, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 28.18, |
|
"grad_norm": 0.04077678918838501, |
|
"learning_rate": 3.367003367003367e-06, |
|
"loss": 0.0031, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 1.7368921041488647, |
|
"eval_runtime": 11.7621, |
|
"eval_samples_per_second": 59.513, |
|
"eval_steps_per_second": 0.935, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 29.09, |
|
"grad_norm": 0.32179221510887146, |
|
"learning_rate": 1.6835016835016836e-06, |
|
"loss": 0.002, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"grad_norm": 0.05003494769334793, |
|
"learning_rate": 0.0, |
|
"loss": 0.0019, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.69, |
|
"eval_loss": 1.7391921281814575, |
|
"eval_runtime": 12.8063, |
|
"eval_samples_per_second": 54.661, |
|
"eval_steps_per_second": 0.859, |
|
"step": 330 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 330, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 30, |
|
"save_steps": 500, |
|
"total_flos": 5.2828663104e+18, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|