{ "best_metric": 1.3211654424667358, "best_model_checkpoint": "facial_age_image_detection/checkpoint-6400", "epoch": 20.0, "eval_steps": 500, "global_step": 6400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5492572322126661, "eval_loss": 1.3843398094177246, "eval_runtime": 53.0532, "eval_samples_per_second": 96.431, "eval_steps_per_second": 3.016, "step": 320 }, { "epoch": 1.56, "grad_norm": 9.20909595489502, "learning_rate": 9.291338582677165e-07, "loss": 1.0144, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.5471071149335418, "eval_loss": 1.3768093585968018, "eval_runtime": 52.7424, "eval_samples_per_second": 97.0, "eval_steps_per_second": 3.034, "step": 640 }, { "epoch": 3.0, "eval_accuracy": 0.5433932759968726, "eval_loss": 1.3764420747756958, "eval_runtime": 51.5402, "eval_samples_per_second": 99.262, "eval_steps_per_second": 3.104, "step": 960 }, { "epoch": 3.12, "grad_norm": 8.877883911132812, "learning_rate": 8.503937007874016e-07, "loss": 1.0053, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.547302580140735, "eval_loss": 1.366408348083496, "eval_runtime": 52.1841, "eval_samples_per_second": 98.038, "eval_steps_per_second": 3.066, "step": 1280 }, { "epoch": 4.69, "grad_norm": 12.386143684387207, "learning_rate": 7.716535433070865e-07, "loss": 0.9744, "step": 1500 }, { "epoch": 5.0, "eval_accuracy": 0.5517982799061767, "eval_loss": 1.3610535860061646, "eval_runtime": 52.1993, "eval_samples_per_second": 98.009, "eval_steps_per_second": 3.065, "step": 1600 }, { "epoch": 6.0, "eval_accuracy": 0.5521892103205629, "eval_loss": 1.3569880723953247, "eval_runtime": 51.8422, "eval_samples_per_second": 98.684, "eval_steps_per_second": 3.086, "step": 1920 }, { "epoch": 6.25, "grad_norm": 12.370375633239746, "learning_rate": 6.929133858267716e-07, "loss": 0.9667, "step": 2000 }, { "epoch": 7.0, "eval_accuracy": 0.5494526974198592, "eval_loss": 1.3512985706329346, "eval_runtime": 52.2761, "eval_samples_per_second": 97.865, "eval_steps_per_second": 3.061, "step": 2240 }, { "epoch": 7.81, "grad_norm": 11.062934875488281, "learning_rate": 6.141732283464567e-07, "loss": 0.9536, "step": 2500 }, { "epoch": 8.0, "eval_accuracy": 0.5492572322126661, "eval_loss": 1.3439780473709106, "eval_runtime": 51.9554, "eval_samples_per_second": 98.469, "eval_steps_per_second": 3.08, "step": 2560 }, { "epoch": 9.0, "eval_accuracy": 0.5484753713838937, "eval_loss": 1.3428295850753784, "eval_runtime": 52.082, "eval_samples_per_second": 98.23, "eval_steps_per_second": 3.072, "step": 2880 }, { "epoch": 9.38, "grad_norm": 12.75069808959961, "learning_rate": 5.354330708661418e-07, "loss": 0.9437, "step": 3000 }, { "epoch": 10.0, "eval_accuracy": 0.5484753713838937, "eval_loss": 1.3394215106964111, "eval_runtime": 51.1719, "eval_samples_per_second": 99.977, "eval_steps_per_second": 3.127, "step": 3200 }, { "epoch": 10.94, "grad_norm": 11.664592742919922, "learning_rate": 4.5669291338582676e-07, "loss": 0.9279, "step": 3500 }, { "epoch": 11.0, "eval_accuracy": 0.5471071149335418, "eval_loss": 1.3347781896591187, "eval_runtime": 51.4976, "eval_samples_per_second": 99.344, "eval_steps_per_second": 3.107, "step": 3520 }, { "epoch": 12.0, "eval_accuracy": 0.5541438623924941, "eval_loss": 1.3312886953353882, "eval_runtime": 51.2447, "eval_samples_per_second": 99.835, "eval_steps_per_second": 3.122, "step": 3840 }, { "epoch": 12.5, "grad_norm": 8.319609642028809, "learning_rate": 3.779527559055118e-07, "loss": 0.921, "step": 4000 }, { "epoch": 13.0, "eval_accuracy": 0.5547302580140735, "eval_loss": 1.3314929008483887, "eval_runtime": 51.5959, "eval_samples_per_second": 99.155, "eval_steps_per_second": 3.101, "step": 4160 }, { "epoch": 14.0, "eval_accuracy": 0.5508209538702111, "eval_loss": 1.3308985233306885, "eval_runtime": 51.327, "eval_samples_per_second": 99.675, "eval_steps_per_second": 3.117, "step": 4480 }, { "epoch": 14.06, "grad_norm": 6.9851226806640625, "learning_rate": 2.9921259842519687e-07, "loss": 0.9223, "step": 4500 }, { "epoch": 15.0, "eval_accuracy": 0.5564894448788116, "eval_loss": 1.3276044130325317, "eval_runtime": 51.4978, "eval_samples_per_second": 99.344, "eval_steps_per_second": 3.107, "step": 4800 }, { "epoch": 15.62, "grad_norm": 6.1127119064331055, "learning_rate": 2.2047244094488187e-07, "loss": 0.9054, "step": 5000 }, { "epoch": 16.0, "eval_accuracy": 0.5545347928068803, "eval_loss": 1.3215641975402832, "eval_runtime": 51.1737, "eval_samples_per_second": 99.973, "eval_steps_per_second": 3.127, "step": 5120 }, { "epoch": 17.0, "eval_accuracy": 0.5568803752931978, "eval_loss": 1.324193000793457, "eval_runtime": 51.5617, "eval_samples_per_second": 99.221, "eval_steps_per_second": 3.103, "step": 5440 }, { "epoch": 17.19, "grad_norm": 7.823403358459473, "learning_rate": 1.4173228346456693e-07, "loss": 0.9092, "step": 5500 }, { "epoch": 18.0, "eval_accuracy": 0.5551211884284597, "eval_loss": 1.3241610527038574, "eval_runtime": 52.1852, "eval_samples_per_second": 98.035, "eval_steps_per_second": 3.066, "step": 5760 }, { "epoch": 18.75, "grad_norm": 8.171327590942383, "learning_rate": 6.299212598425197e-08, "loss": 0.9017, "step": 6000 }, { "epoch": 19.0, "eval_accuracy": 0.5576622361219703, "eval_loss": 1.3216114044189453, "eval_runtime": 51.9014, "eval_samples_per_second": 98.572, "eval_steps_per_second": 3.083, "step": 6080 }, { "epoch": 20.0, "eval_accuracy": 0.5586395621579359, "eval_loss": 1.3211654424667358, "eval_runtime": 51.4519, "eval_samples_per_second": 99.433, "eval_steps_per_second": 3.11, "step": 6400 } ], "logging_steps": 500, "max_steps": 6400, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.171569043869614e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }