{ "best_metric": 1.1146559715270996, "best_model_checkpoint": "facial_age_image_detection/checkpoint-4560", "epoch": 19.0, "eval_steps": 500, "global_step": 4560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.6217378555370932, "eval_loss": 1.1227059364318848, "eval_runtime": 116.8455, "eval_samples_per_second": 87.56, "eval_steps_per_second": 2.739, "step": 240 }, { "epoch": 2.0, "eval_accuracy": 0.6248656045352361, "eval_loss": 1.1205345392227173, "eval_runtime": 101.698, "eval_samples_per_second": 100.602, "eval_steps_per_second": 3.147, "step": 480 }, { "epoch": 2.08, "grad_norm": 14.197029113769531, "learning_rate": 2.7157894736842105e-07, "loss": 0.7372, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.6255497996285798, "eval_loss": 1.1209293603897095, "eval_runtime": 102.6145, "eval_samples_per_second": 99.703, "eval_steps_per_second": 3.118, "step": 720 }, { "epoch": 4.0, "eval_accuracy": 0.6272114162838432, "eval_loss": 1.119669795036316, "eval_runtime": 101.8458, "eval_samples_per_second": 100.456, "eval_steps_per_second": 3.142, "step": 960 }, { "epoch": 4.17, "grad_norm": 13.897222518920898, "learning_rate": 2.4e-07, "loss": 0.7296, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.6242791515980842, "eval_loss": 1.1204010248184204, "eval_runtime": 103.9165, "eval_samples_per_second": 98.454, "eval_steps_per_second": 3.079, "step": 1200 }, { "epoch": 6.0, "eval_accuracy": 0.6246701202228521, "eval_loss": 1.1215792894363403, "eval_runtime": 106.6746, "eval_samples_per_second": 95.909, "eval_steps_per_second": 3.0, "step": 1440 }, { "epoch": 6.25, "grad_norm": 11.151677131652832, "learning_rate": 2.0842105263157896e-07, "loss": 0.72, "step": 1500 }, { "epoch": 7.0, "eval_accuracy": 0.6236926986609325, "eval_loss": 1.1201025247573853, "eval_runtime": 102.583, "eval_samples_per_second": 99.734, "eval_steps_per_second": 3.119, "step": 1680 }, { "epoch": 8.0, "eval_accuracy": 0.6247678623790441, "eval_loss": 1.1184368133544922, "eval_runtime": 102.2621, "eval_samples_per_second": 100.047, "eval_steps_per_second": 3.129, "step": 1920 }, { "epoch": 8.33, "grad_norm": 13.550016403198242, "learning_rate": 1.7684210526315786e-07, "loss": 0.7137, "step": 2000 }, { "epoch": 9.0, "eval_accuracy": 0.625158831003812, "eval_loss": 1.1169142723083496, "eval_runtime": 101.4179, "eval_samples_per_second": 100.88, "eval_steps_per_second": 3.155, "step": 2160 }, { "epoch": 10.0, "eval_accuracy": 0.6285798064705307, "eval_loss": 1.1156556606292725, "eval_runtime": 109.0217, "eval_samples_per_second": 93.844, "eval_steps_per_second": 2.935, "step": 2400 }, { "epoch": 10.42, "grad_norm": 17.68604850769043, "learning_rate": 1.4526315789473685e-07, "loss": 0.715, "step": 2500 }, { "epoch": 11.0, "eval_accuracy": 0.6254520574723879, "eval_loss": 1.1168755292892456, "eval_runtime": 112.108, "eval_samples_per_second": 91.26, "eval_steps_per_second": 2.854, "step": 2640 }, { "epoch": 12.0, "eval_accuracy": 0.6256475417847718, "eval_loss": 1.1173280477523804, "eval_runtime": 111.2518, "eval_samples_per_second": 91.963, "eval_steps_per_second": 2.876, "step": 2880 }, { "epoch": 12.5, "grad_norm": 13.063650131225586, "learning_rate": 1.1368421052631579e-07, "loss": 0.7087, "step": 3000 }, { "epoch": 13.0, "eval_accuracy": 0.6273091584400352, "eval_loss": 1.116219162940979, "eval_runtime": 102.5044, "eval_samples_per_second": 99.81, "eval_steps_per_second": 3.122, "step": 3120 }, { "epoch": 14.0, "eval_accuracy": 0.6236926986609325, "eval_loss": 1.1165720224380493, "eval_runtime": 117.8614, "eval_samples_per_second": 86.805, "eval_steps_per_second": 2.715, "step": 3360 }, { "epoch": 14.58, "grad_norm": 22.076231002807617, "learning_rate": 8.210526315789474e-08, "loss": 0.705, "step": 3500 }, { "epoch": 15.0, "eval_accuracy": 0.6240836672857003, "eval_loss": 1.1172147989273071, "eval_runtime": 107.0013, "eval_samples_per_second": 95.616, "eval_steps_per_second": 2.991, "step": 3600 }, { "epoch": 16.0, "eval_accuracy": 0.6259407682533477, "eval_loss": 1.1154069900512695, "eval_runtime": 107.8292, "eval_samples_per_second": 94.882, "eval_steps_per_second": 2.968, "step": 3840 }, { "epoch": 16.67, "grad_norm": 10.493719100952148, "learning_rate": 5.052631578947368e-08, "loss": 0.6994, "step": 4000 }, { "epoch": 17.0, "eval_accuracy": 0.6261362525657316, "eval_loss": 1.1150434017181396, "eval_runtime": 103.249, "eval_samples_per_second": 99.091, "eval_steps_per_second": 3.099, "step": 4080 }, { "epoch": 18.0, "eval_accuracy": 0.6241814094418923, "eval_loss": 1.1151177883148193, "eval_runtime": 102.8465, "eval_samples_per_second": 99.478, "eval_steps_per_second": 3.111, "step": 4320 }, { "epoch": 18.75, "grad_norm": 15.010687828063965, "learning_rate": 1.8947368421052632e-08, "loss": 0.7018, "step": 4500 }, { "epoch": 19.0, "eval_accuracy": 0.6252565731600039, "eval_loss": 1.1146559715270996, "eval_runtime": 104.246, "eval_samples_per_second": 98.143, "eval_steps_per_second": 3.07, "step": 4560 } ], "logging_steps": 500, "max_steps": 4800, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.2597429437571006e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }