{ "best_metric": 0.3781418800354004, "best_model_checkpoint": "facial_emotions_image_detection/checkpoint-15740", "epoch": 10.0, "eval_steps": 500, "global_step": 15740, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.32, "grad_norm": 11.892455101013184, "learning_rate": 3.885277246653919e-06, "loss": 0.5534, "step": 500 }, { "epoch": 0.64, "grad_norm": 12.262578964233398, "learning_rate": 3.7578075207138302e-06, "loss": 0.5424, "step": 1000 }, { "epoch": 0.95, "grad_norm": 11.160516738891602, "learning_rate": 3.630337794773741e-06, "loss": 0.5402, "step": 1500 }, { "epoch": 1.0, "eval_accuracy": 0.8678585617798967, "eval_loss": 0.4141866862773895, "eval_runtime": 113.7657, "eval_samples_per_second": 110.622, "eval_steps_per_second": 13.835, "step": 1574 }, { "epoch": 1.27, "grad_norm": 12.081930160522461, "learning_rate": 3.5028680688336517e-06, "loss": 0.509, "step": 2000 }, { "epoch": 1.59, "grad_norm": 8.023213386535645, "learning_rate": 3.375398342893563e-06, "loss": 0.4984, "step": 2500 }, { "epoch": 1.91, "grad_norm": 11.671684265136719, "learning_rate": 3.2479286169534735e-06, "loss": 0.4817, "step": 3000 }, { "epoch": 2.0, "eval_accuracy": 0.8661899086213747, "eval_loss": 0.41461509466171265, "eval_runtime": 111.4496, "eval_samples_per_second": 112.921, "eval_steps_per_second": 14.123, "step": 3148 }, { "epoch": 2.22, "grad_norm": 11.465852737426758, "learning_rate": 3.120458891013384e-06, "loss": 0.4686, "step": 3500 }, { "epoch": 2.54, "grad_norm": 13.02270221710205, "learning_rate": 2.992989165073295e-06, "loss": 0.451, "step": 4000 }, { "epoch": 2.86, "grad_norm": 7.672973155975342, "learning_rate": 2.8655194391332055e-06, "loss": 0.4459, "step": 4500 }, { "epoch": 3.0, "eval_accuracy": 0.8698450536352801, "eval_loss": 0.40479913353919983, "eval_runtime": 111.5158, "eval_samples_per_second": 112.854, "eval_steps_per_second": 14.115, "step": 4722 }, { "epoch": 3.18, "grad_norm": 12.763694763183594, "learning_rate": 2.7380497131931164e-06, "loss": 0.4335, "step": 5000 }, { "epoch": 3.49, "grad_norm": 10.964895248413086, "learning_rate": 2.6105799872530273e-06, "loss": 0.4325, "step": 5500 }, { "epoch": 3.81, "grad_norm": 12.650814056396484, "learning_rate": 2.4831102613129383e-06, "loss": 0.4031, "step": 6000 }, { "epoch": 4.0, "eval_accuracy": 0.8707985697258641, "eval_loss": 0.3987765610218048, "eval_runtime": 110.9162, "eval_samples_per_second": 113.464, "eval_steps_per_second": 14.191, "step": 6296 }, { "epoch": 4.13, "grad_norm": 7.15053653717041, "learning_rate": 2.3556405353728488e-06, "loss": 0.4112, "step": 6500 }, { "epoch": 4.45, "grad_norm": 17.31258773803711, "learning_rate": 2.2281708094327597e-06, "loss": 0.3886, "step": 7000 }, { "epoch": 4.76, "grad_norm": 7.938920497894287, "learning_rate": 2.1007010834926702e-06, "loss": 0.3804, "step": 7500 }, { "epoch": 5.0, "eval_accuracy": 0.8742947953913389, "eval_loss": 0.39545899629592896, "eval_runtime": 110.9484, "eval_samples_per_second": 113.431, "eval_steps_per_second": 14.187, "step": 7870 }, { "epoch": 5.08, "grad_norm": 11.187897682189941, "learning_rate": 1.973231357552581e-06, "loss": 0.3856, "step": 8000 }, { "epoch": 5.4, "grad_norm": 12.73786735534668, "learning_rate": 1.845761631612492e-06, "loss": 0.3732, "step": 8500 }, { "epoch": 5.72, "grad_norm": 6.2277374267578125, "learning_rate": 1.7182919056724028e-06, "loss": 0.3733, "step": 9000 }, { "epoch": 6.0, "eval_accuracy": 0.8767580452920143, "eval_loss": 0.3847721815109253, "eval_runtime": 113.9748, "eval_samples_per_second": 110.419, "eval_steps_per_second": 13.81, "step": 9444 }, { "epoch": 6.04, "grad_norm": 9.49077320098877, "learning_rate": 1.5908221797323135e-06, "loss": 0.3552, "step": 9500 }, { "epoch": 6.35, "grad_norm": 11.691384315490723, "learning_rate": 1.4633524537922245e-06, "loss": 0.355, "step": 10000 }, { "epoch": 6.67, "grad_norm": 7.24980354309082, "learning_rate": 1.335882727852135e-06, "loss": 0.347, "step": 10500 }, { "epoch": 6.99, "grad_norm": 15.983756065368652, "learning_rate": 1.2084130019120457e-06, "loss": 0.3462, "step": 11000 }, { "epoch": 7.0, "eval_accuracy": 0.877870480731029, "eval_loss": 0.38435637950897217, "eval_runtime": 114.6747, "eval_samples_per_second": 109.745, "eval_steps_per_second": 13.726, "step": 11018 }, { "epoch": 7.31, "grad_norm": 6.091674327850342, "learning_rate": 1.0809432759719566e-06, "loss": 0.3481, "step": 11500 }, { "epoch": 7.62, "grad_norm": 14.106987953186035, "learning_rate": 9.534735500318673e-07, "loss": 0.3373, "step": 12000 }, { "epoch": 7.94, "grad_norm": 8.448904991149902, "learning_rate": 8.260038240917782e-07, "loss": 0.324, "step": 12500 }, { "epoch": 8.0, "eval_accuracy": 0.876201827572507, "eval_loss": 0.3851085305213928, "eval_runtime": 113.668, "eval_samples_per_second": 110.717, "eval_steps_per_second": 13.847, "step": 12592 }, { "epoch": 8.26, "grad_norm": 6.959225177764893, "learning_rate": 6.98534098151689e-07, "loss": 0.3254, "step": 13000 }, { "epoch": 8.58, "grad_norm": 10.736397743225098, "learning_rate": 5.710643722115998e-07, "loss": 0.3179, "step": 13500 }, { "epoch": 8.89, "grad_norm": 10.716438293457031, "learning_rate": 4.435946462715105e-07, "loss": 0.3217, "step": 14000 }, { "epoch": 9.0, "eval_accuracy": 0.8787445371473976, "eval_loss": 0.3801954388618469, "eval_runtime": 113.1501, "eval_samples_per_second": 111.224, "eval_steps_per_second": 13.911, "step": 14166 }, { "epoch": 9.21, "grad_norm": 12.26369571685791, "learning_rate": 3.161249203314213e-07, "loss": 0.309, "step": 14500 }, { "epoch": 9.53, "grad_norm": 6.435930252075195, "learning_rate": 1.8865519439133203e-07, "loss": 0.3158, "step": 15000 }, { "epoch": 9.85, "grad_norm": 15.131240844726562, "learning_rate": 6.118546845124282e-08, "loss": 0.3105, "step": 15500 }, { "epoch": 10.0, "eval_accuracy": 0.8789829161700436, "eval_loss": 0.3781418800354004, "eval_runtime": 112.3381, "eval_samples_per_second": 112.028, "eval_steps_per_second": 14.011, "step": 15740 } ], "logging_steps": 500, "max_steps": 15740, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 3.900966581033497e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }