|
{ |
|
"best_metric": 1.4132405519485474, |
|
"best_model_checkpoint": "facial_age_image_detection/checkpoint-6080", |
|
"epoch": 19.0, |
|
"eval_steps": 500, |
|
"global_step": 6080, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.52443315089914, |
|
"eval_loss": 1.4843693971633911, |
|
"eval_runtime": 53.5481, |
|
"eval_samples_per_second": 95.54, |
|
"eval_steps_per_second": 2.988, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 8.704541206359863, |
|
"learning_rate": 9.291338582677165e-07, |
|
"loss": 1.1351, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5242376856919468, |
|
"eval_loss": 1.4704760313034058, |
|
"eval_runtime": 53.4617, |
|
"eval_samples_per_second": 95.695, |
|
"eval_steps_per_second": 2.993, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.522869429241595, |
|
"eval_loss": 1.469245195388794, |
|
"eval_runtime": 53.691, |
|
"eval_samples_per_second": 95.286, |
|
"eval_steps_per_second": 2.98, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 8.570690155029297, |
|
"learning_rate": 8.503937007874016e-07, |
|
"loss": 1.1099, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.527756059421423, |
|
"eval_loss": 1.459570050239563, |
|
"eval_runtime": 53.2303, |
|
"eval_samples_per_second": 96.111, |
|
"eval_steps_per_second": 3.006, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 9.125391960144043, |
|
"learning_rate": 7.716535433070865e-07, |
|
"loss": 1.0965, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5275605942142298, |
|
"eval_loss": 1.4572798013687134, |
|
"eval_runtime": 53.1246, |
|
"eval_samples_per_second": 96.302, |
|
"eval_steps_per_second": 3.012, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5267787333854574, |
|
"eval_loss": 1.4519189596176147, |
|
"eval_runtime": 53.1208, |
|
"eval_samples_per_second": 96.309, |
|
"eval_steps_per_second": 3.012, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 8.86064624786377, |
|
"learning_rate": 6.929133858267716e-07, |
|
"loss": 1.0762, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5287333854573886, |
|
"eval_loss": 1.4468164443969727, |
|
"eval_runtime": 53.0338, |
|
"eval_samples_per_second": 96.467, |
|
"eval_steps_per_second": 3.017, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"grad_norm": 6.661502361297607, |
|
"learning_rate": 6.141732283464567e-07, |
|
"loss": 1.068, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5334245504300235, |
|
"eval_loss": 1.4398757219314575, |
|
"eval_runtime": 53.1182, |
|
"eval_samples_per_second": 96.314, |
|
"eval_steps_per_second": 3.012, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5340109460516028, |
|
"eval_loss": 1.4362281560897827, |
|
"eval_runtime": 53.8211, |
|
"eval_samples_per_second": 95.056, |
|
"eval_steps_per_second": 2.973, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"grad_norm": 7.235806941986084, |
|
"learning_rate": 5.354330708661418e-07, |
|
"loss": 1.0497, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5349882720875684, |
|
"eval_loss": 1.4307777881622314, |
|
"eval_runtime": 53.0594, |
|
"eval_samples_per_second": 96.42, |
|
"eval_steps_per_second": 3.015, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"grad_norm": 5.052951812744141, |
|
"learning_rate": 4.5669291338582676e-07, |
|
"loss": 1.0425, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5367474589523065, |
|
"eval_loss": 1.4270881414413452, |
|
"eval_runtime": 53.3177, |
|
"eval_samples_per_second": 95.953, |
|
"eval_steps_per_second": 3.001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5340109460516028, |
|
"eval_loss": 1.4276313781738281, |
|
"eval_runtime": 53.3844, |
|
"eval_samples_per_second": 95.833, |
|
"eval_steps_per_second": 2.997, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 7.315602779388428, |
|
"learning_rate": 3.779527559055118e-07, |
|
"loss": 1.0301, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.534401876465989, |
|
"eval_loss": 1.4230455160140991, |
|
"eval_runtime": 53.3059, |
|
"eval_samples_per_second": 95.974, |
|
"eval_steps_per_second": 3.002, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5359655981235341, |
|
"eval_loss": 1.4216346740722656, |
|
"eval_runtime": 52.9593, |
|
"eval_samples_per_second": 96.602, |
|
"eval_steps_per_second": 3.021, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"grad_norm": 5.868752956390381, |
|
"learning_rate": 2.9921259842519687e-07, |
|
"loss": 1.0284, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5369429241594996, |
|
"eval_loss": 1.4190484285354614, |
|
"eval_runtime": 53.6104, |
|
"eval_samples_per_second": 95.429, |
|
"eval_steps_per_second": 2.984, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"grad_norm": 11.117311477661133, |
|
"learning_rate": 2.2047244094488187e-07, |
|
"loss": 1.0137, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5383111806098514, |
|
"eval_loss": 1.4154096841812134, |
|
"eval_runtime": 53.1485, |
|
"eval_samples_per_second": 96.259, |
|
"eval_steps_per_second": 3.01, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5377247849882721, |
|
"eval_loss": 1.4145456552505493, |
|
"eval_runtime": 53.3337, |
|
"eval_samples_per_second": 95.924, |
|
"eval_steps_per_second": 3.0, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"grad_norm": 7.341709136962891, |
|
"learning_rate": 1.4173228346456693e-07, |
|
"loss": 1.0175, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5381157154026583, |
|
"eval_loss": 1.4157603979110718, |
|
"eval_runtime": 53.3037, |
|
"eval_samples_per_second": 95.978, |
|
"eval_steps_per_second": 3.002, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 9.26841926574707, |
|
"learning_rate": 6.299212598425197e-08, |
|
"loss": 1.0071, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5381157154026583, |
|
"eval_loss": 1.4132405519485474, |
|
"eval_runtime": 53.3332, |
|
"eval_samples_per_second": 95.925, |
|
"eval_steps_per_second": 3.0, |
|
"step": 6080 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.0129905916761334e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|