{ "best_metric": 1.1997283697128296, "best_model_checkpoint": "facial_age_image_detection/checkpoint-4560", "epoch": 19.0, "eval_steps": 500, "global_step": 4560, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5973023164891017, "eval_loss": 1.2148407697677612, "eval_runtime": 107.0191, "eval_samples_per_second": 95.6, "eval_steps_per_second": 2.99, "step": 240 }, { "epoch": 2.0, "eval_accuracy": 0.6003323233310527, "eval_loss": 1.2124874591827393, "eval_runtime": 108.7931, "eval_samples_per_second": 94.041, "eval_steps_per_second": 2.941, "step": 480 }, { "epoch": 2.08, "grad_norm": 10.869218826293945, "learning_rate": 4.526315789473684e-07, "loss": 0.7792, "step": 500 }, { "epoch": 3.0, "eval_accuracy": 0.6001368390186688, "eval_loss": 1.2103016376495361, "eval_runtime": 108.2046, "eval_samples_per_second": 94.552, "eval_steps_per_second": 2.957, "step": 720 }, { "epoch": 4.0, "eval_accuracy": 0.5932948880852311, "eval_loss": 1.2177255153656006, "eval_runtime": 108.1913, "eval_samples_per_second": 94.564, "eval_steps_per_second": 2.958, "step": 960 }, { "epoch": 4.17, "grad_norm": 7.94642972946167, "learning_rate": 4e-07, "loss": 0.7693, "step": 1000 }, { "epoch": 5.0, "eval_accuracy": 0.5987684488319812, "eval_loss": 1.2087459564208984, "eval_runtime": 108.1422, "eval_samples_per_second": 94.607, "eval_steps_per_second": 2.959, "step": 1200 }, { "epoch": 6.0, "eval_accuracy": 0.5989639331443651, "eval_loss": 1.2106623649597168, "eval_runtime": 108.6434, "eval_samples_per_second": 94.17, "eval_steps_per_second": 2.945, "step": 1440 }, { "epoch": 6.25, "grad_norm": 12.206405639648438, "learning_rate": 3.4736842105263157e-07, "loss": 0.7579, "step": 1500 }, { "epoch": 7.0, "eval_accuracy": 0.5941745674909589, "eval_loss": 1.2116234302520752, "eval_runtime": 110.2341, "eval_samples_per_second": 92.812, "eval_steps_per_second": 2.903, "step": 1680 }, { "epoch": 8.0, "eval_accuracy": 0.596520379239566, "eval_loss": 1.209428071975708, "eval_runtime": 109.2117, "eval_samples_per_second": 93.68, "eval_steps_per_second": 2.93, "step": 1920 }, { "epoch": 8.33, "grad_norm": 13.978309631347656, "learning_rate": 2.947368421052631e-07, "loss": 0.7564, "step": 2000 }, { "epoch": 9.0, "eval_accuracy": 0.5945655361157267, "eval_loss": 1.207972764968872, "eval_runtime": 109.0595, "eval_samples_per_second": 93.811, "eval_steps_per_second": 2.934, "step": 2160 }, { "epoch": 10.0, "eval_accuracy": 0.5972045743329097, "eval_loss": 1.2053894996643066, "eval_runtime": 108.2001, "eval_samples_per_second": 94.556, "eval_steps_per_second": 2.957, "step": 2400 }, { "epoch": 10.42, "grad_norm": 13.184745788574219, "learning_rate": 2.4210526315789473e-07, "loss": 0.7462, "step": 2500 }, { "epoch": 11.0, "eval_accuracy": 0.596520379239566, "eval_loss": 1.2063862085342407, "eval_runtime": 109.0351, "eval_samples_per_second": 93.832, "eval_steps_per_second": 2.935, "step": 2640 }, { "epoch": 12.0, "eval_accuracy": 0.5967158635519499, "eval_loss": 1.205006718635559, "eval_runtime": 107.7737, "eval_samples_per_second": 94.93, "eval_steps_per_second": 2.969, "step": 2880 }, { "epoch": 12.5, "grad_norm": 13.500107765197754, "learning_rate": 1.8947368421052632e-07, "loss": 0.7444, "step": 3000 }, { "epoch": 13.0, "eval_accuracy": 0.5974000586452937, "eval_loss": 1.2029664516448975, "eval_runtime": 108.3584, "eval_samples_per_second": 94.418, "eval_steps_per_second": 2.953, "step": 3120 }, { "epoch": 14.0, "eval_accuracy": 0.5982797380510214, "eval_loss": 1.2019948959350586, "eval_runtime": 108.2177, "eval_samples_per_second": 94.541, "eval_steps_per_second": 2.957, "step": 3360 }, { "epoch": 14.58, "grad_norm": 12.818633079528809, "learning_rate": 1.368421052631579e-07, "loss": 0.7402, "step": 3500 }, { "epoch": 15.0, "eval_accuracy": 0.5969113478643339, "eval_loss": 1.2034169435501099, "eval_runtime": 107.9426, "eval_samples_per_second": 94.782, "eval_steps_per_second": 2.965, "step": 3600 }, { "epoch": 16.0, "eval_accuracy": 0.5980842537386375, "eval_loss": 1.2004482746124268, "eval_runtime": 108.7494, "eval_samples_per_second": 94.079, "eval_steps_per_second": 2.943, "step": 3840 }, { "epoch": 16.67, "grad_norm": 10.575230598449707, "learning_rate": 8.421052631578947e-08, "loss": 0.7357, "step": 4000 }, { "epoch": 17.0, "eval_accuracy": 0.5988661909881732, "eval_loss": 1.2010480165481567, "eval_runtime": 108.3609, "eval_samples_per_second": 94.416, "eval_steps_per_second": 2.953, "step": 4080 }, { "epoch": 18.0, "eval_accuracy": 0.5970090900205258, "eval_loss": 1.2006783485412598, "eval_runtime": 108.1842, "eval_samples_per_second": 94.57, "eval_steps_per_second": 2.958, "step": 4320 }, { "epoch": 18.75, "grad_norm": 10.900801658630371, "learning_rate": 3.157894736842105e-08, "loss": 0.7376, "step": 4500 }, { "epoch": 19.0, "eval_accuracy": 0.5974000586452937, "eval_loss": 1.1997283697128296, "eval_runtime": 108.4624, "eval_samples_per_second": 94.328, "eval_steps_per_second": 2.95, "step": 4560 } ], "logging_steps": 500, "max_steps": 4800, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.2597429437571006e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }