|
{ |
|
"best_metric": 1.3211654424667358, |
|
"best_model_checkpoint": "facial_age_image_detection/checkpoint-6400", |
|
"epoch": 20.0, |
|
"eval_steps": 500, |
|
"global_step": 6400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.5492572322126661, |
|
"eval_loss": 1.3843398094177246, |
|
"eval_runtime": 53.0532, |
|
"eval_samples_per_second": 96.431, |
|
"eval_steps_per_second": 3.016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 9.20909595489502, |
|
"learning_rate": 9.291338582677165e-07, |
|
"loss": 1.0144, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.5471071149335418, |
|
"eval_loss": 1.3768093585968018, |
|
"eval_runtime": 52.7424, |
|
"eval_samples_per_second": 97.0, |
|
"eval_steps_per_second": 3.034, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.5433932759968726, |
|
"eval_loss": 1.3764420747756958, |
|
"eval_runtime": 51.5402, |
|
"eval_samples_per_second": 99.262, |
|
"eval_steps_per_second": 3.104, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 8.877883911132812, |
|
"learning_rate": 8.503937007874016e-07, |
|
"loss": 1.0053, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.547302580140735, |
|
"eval_loss": 1.366408348083496, |
|
"eval_runtime": 52.1841, |
|
"eval_samples_per_second": 98.038, |
|
"eval_steps_per_second": 3.066, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 4.69, |
|
"grad_norm": 12.386143684387207, |
|
"learning_rate": 7.716535433070865e-07, |
|
"loss": 0.9744, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.5517982799061767, |
|
"eval_loss": 1.3610535860061646, |
|
"eval_runtime": 52.1993, |
|
"eval_samples_per_second": 98.009, |
|
"eval_steps_per_second": 3.065, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.5521892103205629, |
|
"eval_loss": 1.3569880723953247, |
|
"eval_runtime": 51.8422, |
|
"eval_samples_per_second": 98.684, |
|
"eval_steps_per_second": 3.086, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 6.25, |
|
"grad_norm": 12.370375633239746, |
|
"learning_rate": 6.929133858267716e-07, |
|
"loss": 0.9667, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.5494526974198592, |
|
"eval_loss": 1.3512985706329346, |
|
"eval_runtime": 52.2761, |
|
"eval_samples_per_second": 97.865, |
|
"eval_steps_per_second": 3.061, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"grad_norm": 11.062934875488281, |
|
"learning_rate": 6.141732283464567e-07, |
|
"loss": 0.9536, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.5492572322126661, |
|
"eval_loss": 1.3439780473709106, |
|
"eval_runtime": 51.9554, |
|
"eval_samples_per_second": 98.469, |
|
"eval_steps_per_second": 3.08, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.5484753713838937, |
|
"eval_loss": 1.3428295850753784, |
|
"eval_runtime": 52.082, |
|
"eval_samples_per_second": 98.23, |
|
"eval_steps_per_second": 3.072, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"grad_norm": 12.75069808959961, |
|
"learning_rate": 5.354330708661418e-07, |
|
"loss": 0.9437, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.5484753713838937, |
|
"eval_loss": 1.3394215106964111, |
|
"eval_runtime": 51.1719, |
|
"eval_samples_per_second": 99.977, |
|
"eval_steps_per_second": 3.127, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 10.94, |
|
"grad_norm": 11.664592742919922, |
|
"learning_rate": 4.5669291338582676e-07, |
|
"loss": 0.9279, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.5471071149335418, |
|
"eval_loss": 1.3347781896591187, |
|
"eval_runtime": 51.4976, |
|
"eval_samples_per_second": 99.344, |
|
"eval_steps_per_second": 3.107, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.5541438623924941, |
|
"eval_loss": 1.3312886953353882, |
|
"eval_runtime": 51.2447, |
|
"eval_samples_per_second": 99.835, |
|
"eval_steps_per_second": 3.122, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"grad_norm": 8.319609642028809, |
|
"learning_rate": 3.779527559055118e-07, |
|
"loss": 0.921, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.5547302580140735, |
|
"eval_loss": 1.3314929008483887, |
|
"eval_runtime": 51.5959, |
|
"eval_samples_per_second": 99.155, |
|
"eval_steps_per_second": 3.101, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.5508209538702111, |
|
"eval_loss": 1.3308985233306885, |
|
"eval_runtime": 51.327, |
|
"eval_samples_per_second": 99.675, |
|
"eval_steps_per_second": 3.117, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 14.06, |
|
"grad_norm": 6.9851226806640625, |
|
"learning_rate": 2.9921259842519687e-07, |
|
"loss": 0.9223, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.5564894448788116, |
|
"eval_loss": 1.3276044130325317, |
|
"eval_runtime": 51.4978, |
|
"eval_samples_per_second": 99.344, |
|
"eval_steps_per_second": 3.107, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 15.62, |
|
"grad_norm": 6.1127119064331055, |
|
"learning_rate": 2.2047244094488187e-07, |
|
"loss": 0.9054, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.5545347928068803, |
|
"eval_loss": 1.3215641975402832, |
|
"eval_runtime": 51.1737, |
|
"eval_samples_per_second": 99.973, |
|
"eval_steps_per_second": 3.127, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.5568803752931978, |
|
"eval_loss": 1.324193000793457, |
|
"eval_runtime": 51.5617, |
|
"eval_samples_per_second": 99.221, |
|
"eval_steps_per_second": 3.103, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 17.19, |
|
"grad_norm": 7.823403358459473, |
|
"learning_rate": 1.4173228346456693e-07, |
|
"loss": 0.9092, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.5551211884284597, |
|
"eval_loss": 1.3241610527038574, |
|
"eval_runtime": 52.1852, |
|
"eval_samples_per_second": 98.035, |
|
"eval_steps_per_second": 3.066, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 18.75, |
|
"grad_norm": 8.171327590942383, |
|
"learning_rate": 6.299212598425197e-08, |
|
"loss": 0.9017, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.5576622361219703, |
|
"eval_loss": 1.3216114044189453, |
|
"eval_runtime": 51.9014, |
|
"eval_samples_per_second": 98.572, |
|
"eval_steps_per_second": 3.083, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.5586395621579359, |
|
"eval_loss": 1.3211654424667358, |
|
"eval_runtime": 51.4519, |
|
"eval_samples_per_second": 99.433, |
|
"eval_steps_per_second": 3.11, |
|
"step": 6400 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 6400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 20, |
|
"save_steps": 500, |
|
"total_flos": 3.171569043869614e+19, |
|
"train_batch_size": 64, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|