{ "best_metric": 1.257996678352356, "best_model_checkpoint": "facial_age_image_detection/checkpoint-5320", "epoch": 19.0, "eval_steps": 500, "global_step": 5320, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.5756548937833963, "eval_loss": 1.3048689365386963, "eval_runtime": 87.7602, "eval_samples_per_second": 87.431, "eval_steps_per_second": 2.735, "step": 280 }, { "epoch": 1.79, "grad_norm": 11.91701889038086, "learning_rate": 9.18918918918919e-07, "loss": 0.9042, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.5678352665189625, "eval_loss": 1.306204080581665, "eval_runtime": 81.8404, "eval_samples_per_second": 93.756, "eval_steps_per_second": 2.933, "step": 560 }, { "epoch": 3.0, "eval_accuracy": 0.576176202267692, "eval_loss": 1.300585150718689, "eval_runtime": 81.0324, "eval_samples_per_second": 94.691, "eval_steps_per_second": 2.962, "step": 840 }, { "epoch": 3.57, "grad_norm": 12.9658784866333, "learning_rate": 8.288288288288288e-07, "loss": 0.8924, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5730483513619185, "eval_loss": 1.2988545894622803, "eval_runtime": 82.0516, "eval_samples_per_second": 93.514, "eval_steps_per_second": 2.925, "step": 1120 }, { "epoch": 5.0, "eval_accuracy": 0.5746122768148052, "eval_loss": 1.2918657064437866, "eval_runtime": 85.7442, "eval_samples_per_second": 89.487, "eval_steps_per_second": 2.799, "step": 1400 }, { "epoch": 5.36, "grad_norm": 11.397974014282227, "learning_rate": 7.387387387387387e-07, "loss": 0.8708, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.5765671836309136, "eval_loss": 1.2833993434906006, "eval_runtime": 80.6066, "eval_samples_per_second": 95.191, "eval_steps_per_second": 2.977, "step": 1680 }, { "epoch": 7.0, "eval_accuracy": 0.5842564837742734, "eval_loss": 1.276853322982788, "eval_runtime": 82.4272, "eval_samples_per_second": 93.088, "eval_steps_per_second": 2.912, "step": 1960 }, { "epoch": 7.14, "grad_norm": 7.821016311645508, "learning_rate": 6.486486486486486e-07, "loss": 0.8563, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.5781311090838004, "eval_loss": 1.2758774757385254, "eval_runtime": 81.3168, "eval_samples_per_second": 94.359, "eval_steps_per_second": 2.951, "step": 2240 }, { "epoch": 8.93, "grad_norm": 9.624176025390625, "learning_rate": 5.585585585585585e-07, "loss": 0.8478, "step": 2500 }, { "epoch": 9.0, "eval_accuracy": 0.5789130718102438, "eval_loss": 1.2749879360198975, "eval_runtime": 80.8906, "eval_samples_per_second": 94.856, "eval_steps_per_second": 2.967, "step": 2520 }, { "epoch": 10.0, "eval_accuracy": 0.582301576958165, "eval_loss": 1.270504355430603, "eval_runtime": 83.0139, "eval_samples_per_second": 92.43, "eval_steps_per_second": 2.891, "step": 2800 }, { "epoch": 10.71, "grad_norm": 7.522056579589844, "learning_rate": 4.684684684684684e-07, "loss": 0.8341, "step": 3000 }, { "epoch": 11.0, "eval_accuracy": 0.5815196142317216, "eval_loss": 1.2686316967010498, "eval_runtime": 81.0469, "eval_samples_per_second": 94.674, "eval_steps_per_second": 2.961, "step": 3080 }, { "epoch": 12.0, "eval_accuracy": 0.5804769972631305, "eval_loss": 1.2680425643920898, "eval_runtime": 80.5295, "eval_samples_per_second": 95.282, "eval_steps_per_second": 2.98, "step": 3360 }, { "epoch": 12.5, "grad_norm": 14.59212589263916, "learning_rate": 3.783783783783784e-07, "loss": 0.8298, "step": 3500 }, { "epoch": 13.0, "eval_accuracy": 0.5786524175680959, "eval_loss": 1.2673722505569458, "eval_runtime": 84.1895, "eval_samples_per_second": 91.14, "eval_steps_per_second": 2.851, "step": 3640 }, { "epoch": 14.0, "eval_accuracy": 0.5802163430209827, "eval_loss": 1.2637258768081665, "eval_runtime": 82.7707, "eval_samples_per_second": 92.702, "eval_steps_per_second": 2.9, "step": 3920 }, { "epoch": 14.29, "grad_norm": 11.842473030090332, "learning_rate": 2.882882882882883e-07, "loss": 0.8226, "step": 4000 }, { "epoch": 15.0, "eval_accuracy": 0.5813892871106477, "eval_loss": 1.2624099254608154, "eval_runtime": 82.4012, "eval_samples_per_second": 93.118, "eval_steps_per_second": 2.913, "step": 4200 }, { "epoch": 16.0, "eval_accuracy": 0.5829532125635345, "eval_loss": 1.2608647346496582, "eval_runtime": 82.9157, "eval_samples_per_second": 92.54, "eval_steps_per_second": 2.895, "step": 4480 }, { "epoch": 16.07, "grad_norm": 7.111355304718018, "learning_rate": 1.9819819819819818e-07, "loss": 0.8104, "step": 4500 }, { "epoch": 17.0, "eval_accuracy": 0.5825622312003128, "eval_loss": 1.2595696449279785, "eval_runtime": 87.9608, "eval_samples_per_second": 87.232, "eval_steps_per_second": 2.728, "step": 4760 }, { "epoch": 17.86, "grad_norm": 10.99641227722168, "learning_rate": 1.0810810810810811e-07, "loss": 0.8098, "step": 5000 }, { "epoch": 18.0, "eval_accuracy": 0.5826925583213867, "eval_loss": 1.2582371234893799, "eval_runtime": 83.3175, "eval_samples_per_second": 92.093, "eval_steps_per_second": 2.881, "step": 5040 }, { "epoch": 19.0, "eval_accuracy": 0.5821712498370911, "eval_loss": 1.257996678352356, "eval_runtime": 83.5381, "eval_samples_per_second": 91.85, "eval_steps_per_second": 2.873, "step": 5320 } ], "logging_steps": 500, "max_steps": 5600, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "total_flos": 2.636440398962747e+19, "train_batch_size": 64, "trial_name": null, "trial_params": null }