{ "best_metric": 0.9053803339517625, "best_model_checkpoint": "resnet-50-resnet50_fashion/checkpoint-692", "epoch": 9.933993399339935, "global_step": 860, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.12, "learning_rate": 4.994124559341951e-05, "loss": 0.6959, "step": 10 }, { "epoch": 0.23, "learning_rate": 4.935370152761457e-05, "loss": 0.6903, "step": 20 }, { "epoch": 0.35, "learning_rate": 4.876615746180964e-05, "loss": 0.6815, "step": 30 }, { "epoch": 0.46, "learning_rate": 4.81786133960047e-05, "loss": 0.6753, "step": 40 }, { "epoch": 0.58, "learning_rate": 4.759106933019977e-05, "loss": 0.6688, "step": 50 }, { "epoch": 0.69, "learning_rate": 4.700352526439483e-05, "loss": 0.6613, "step": 60 }, { "epoch": 0.81, "learning_rate": 4.6415981198589895e-05, "loss": 0.6599, "step": 70 }, { "epoch": 0.92, "learning_rate": 4.582843713278496e-05, "loss": 0.6396, "step": 80 }, { "epoch": 0.99, "eval_accuracy": 0.7346938775510204, "eval_loss": 0.7625104188919067, "eval_runtime": 58.978, "eval_samples_per_second": 9.139, "eval_steps_per_second": 1.153, "step": 86 }, { "epoch": 1.04, "learning_rate": 4.524089306698003e-05, "loss": 0.6247, "step": 90 }, { "epoch": 1.16, "learning_rate": 4.465334900117509e-05, "loss": 0.628, "step": 100 }, { "epoch": 1.27, "learning_rate": 4.4065804935370154e-05, "loss": 0.632, "step": 110 }, { "epoch": 1.39, "learning_rate": 4.347826086956522e-05, "loss": 0.6004, "step": 120 }, { "epoch": 1.5, "learning_rate": 4.289071680376029e-05, "loss": 0.5901, "step": 130 }, { "epoch": 1.62, "learning_rate": 4.2303172737955346e-05, "loss": 0.5947, "step": 140 }, { "epoch": 1.73, "learning_rate": 4.171562867215041e-05, "loss": 0.5753, "step": 150 }, { "epoch": 1.85, "learning_rate": 4.112808460634548e-05, "loss": 0.5851, "step": 160 }, { "epoch": 1.96, "learning_rate": 4.0540540540540545e-05, "loss": 0.5646, "step": 170 }, { "epoch": 2.0, "eval_accuracy": 0.8348794063079777, "eval_loss": 0.5780991911888123, "eval_runtime": 9.3608, "eval_samples_per_second": 57.581, "eval_steps_per_second": 7.264, "step": 173 }, { "epoch": 2.08, "learning_rate": 3.9952996474735605e-05, "loss": 0.547, "step": 180 }, { "epoch": 2.19, "learning_rate": 3.936545240893067e-05, "loss": 0.5548, "step": 190 }, { "epoch": 2.31, "learning_rate": 3.877790834312574e-05, "loss": 0.5498, "step": 200 }, { "epoch": 2.43, "learning_rate": 3.81903642773208e-05, "loss": 0.5291, "step": 210 }, { "epoch": 2.54, "learning_rate": 3.760282021151586e-05, "loss": 0.5145, "step": 220 }, { "epoch": 2.66, "learning_rate": 3.701527614571093e-05, "loss": 0.4981, "step": 230 }, { "epoch": 2.77, "learning_rate": 3.6427732079905996e-05, "loss": 0.5022, "step": 240 }, { "epoch": 2.89, "learning_rate": 3.584018801410106e-05, "loss": 0.4768, "step": 250 }, { "epoch": 2.99, "eval_accuracy": 0.8571428571428571, "eval_loss": 0.47913119196891785, "eval_runtime": 8.4707, "eval_samples_per_second": 63.631, "eval_steps_per_second": 8.028, "step": 259 }, { "epoch": 3.0, "learning_rate": 3.525264394829612e-05, "loss": 0.4758, "step": 260 }, { "epoch": 3.12, "learning_rate": 3.466509988249119e-05, "loss": 0.4699, "step": 270 }, { "epoch": 3.23, "learning_rate": 3.4077555816686255e-05, "loss": 0.4638, "step": 280 }, { "epoch": 3.35, "learning_rate": 3.3490011750881314e-05, "loss": 0.4378, "step": 290 }, { "epoch": 3.47, "learning_rate": 3.290246768507638e-05, "loss": 0.4795, "step": 300 }, { "epoch": 3.58, "learning_rate": 3.231492361927145e-05, "loss": 0.4522, "step": 310 }, { "epoch": 3.7, "learning_rate": 3.172737955346651e-05, "loss": 0.4456, "step": 320 }, { "epoch": 3.81, "learning_rate": 3.113983548766158e-05, "loss": 0.4489, "step": 330 }, { "epoch": 3.93, "learning_rate": 3.055229142185664e-05, "loss": 0.4161, "step": 340 }, { "epoch": 4.0, "eval_accuracy": 0.8905380333951762, "eval_loss": 0.38660189509391785, "eval_runtime": 8.0364, "eval_samples_per_second": 67.07, "eval_steps_per_second": 8.461, "step": 346 }, { "epoch": 4.04, "learning_rate": 2.9964747356051702e-05, "loss": 0.415, "step": 350 }, { "epoch": 4.16, "learning_rate": 2.9377203290246768e-05, "loss": 0.4333, "step": 360 }, { "epoch": 4.27, "learning_rate": 2.8789659224441835e-05, "loss": 0.4304, "step": 370 }, { "epoch": 4.39, "learning_rate": 2.82021151586369e-05, "loss": 0.4151, "step": 380 }, { "epoch": 4.5, "learning_rate": 2.7614571092831964e-05, "loss": 0.3846, "step": 390 }, { "epoch": 4.62, "learning_rate": 2.702702702702703e-05, "loss": 0.4447, "step": 400 }, { "epoch": 4.74, "learning_rate": 2.6439482961222096e-05, "loss": 0.4253, "step": 410 }, { "epoch": 4.85, "learning_rate": 2.5851938895417156e-05, "loss": 0.3669, "step": 420 }, { "epoch": 4.97, "learning_rate": 2.526439482961222e-05, "loss": 0.402, "step": 430 }, { "epoch": 4.99, "eval_accuracy": 0.9035250463821892, "eval_loss": 0.3293728232383728, "eval_runtime": 7.9465, "eval_samples_per_second": 67.829, "eval_steps_per_second": 8.557, "step": 432 }, { "epoch": 5.08, "learning_rate": 2.4676850763807285e-05, "loss": 0.4157, "step": 440 }, { "epoch": 5.2, "learning_rate": 2.408930669800235e-05, "loss": 0.372, "step": 450 }, { "epoch": 5.31, "learning_rate": 2.3501762632197415e-05, "loss": 0.4115, "step": 460 }, { "epoch": 5.43, "learning_rate": 2.291421856639248e-05, "loss": 0.3954, "step": 470 }, { "epoch": 5.54, "learning_rate": 2.2326674500587544e-05, "loss": 0.395, "step": 480 }, { "epoch": 5.66, "learning_rate": 2.173913043478261e-05, "loss": 0.378, "step": 490 }, { "epoch": 5.78, "learning_rate": 2.1151586368977673e-05, "loss": 0.354, "step": 500 }, { "epoch": 5.89, "learning_rate": 2.056404230317274e-05, "loss": 0.369, "step": 510 }, { "epoch": 6.0, "eval_accuracy": 0.8923933209647495, "eval_loss": 1.0405044555664062, "eval_runtime": 7.8895, "eval_samples_per_second": 68.318, "eval_steps_per_second": 8.619, "step": 519 }, { "epoch": 6.01, "learning_rate": 1.9976498237367802e-05, "loss": 0.3544, "step": 520 }, { "epoch": 6.12, "learning_rate": 1.938895417156287e-05, "loss": 0.3565, "step": 530 }, { "epoch": 6.24, "learning_rate": 1.880141010575793e-05, "loss": 0.3868, "step": 540 }, { "epoch": 6.35, "learning_rate": 1.8213866039952998e-05, "loss": 0.4005, "step": 550 }, { "epoch": 6.47, "learning_rate": 1.762632197414806e-05, "loss": 0.3524, "step": 560 }, { "epoch": 6.58, "learning_rate": 1.7038777908343127e-05, "loss": 0.4008, "step": 570 }, { "epoch": 6.7, "learning_rate": 1.645123384253819e-05, "loss": 0.3571, "step": 580 }, { "epoch": 6.82, "learning_rate": 1.5863689776733257e-05, "loss": 0.3398, "step": 590 }, { "epoch": 6.93, "learning_rate": 1.527614571092832e-05, "loss": 0.3512, "step": 600 }, { "epoch": 7.0, "eval_accuracy": 0.8905380333951762, "eval_loss": 1.4846545457839966, "eval_runtime": 8.0342, "eval_samples_per_second": 67.089, "eval_steps_per_second": 8.464, "step": 606 }, { "epoch": 7.05, "learning_rate": 1.4688601645123384e-05, "loss": 0.3451, "step": 610 }, { "epoch": 7.16, "learning_rate": 1.410105757931845e-05, "loss": 0.3489, "step": 620 }, { "epoch": 7.28, "learning_rate": 1.3513513513513515e-05, "loss": 0.3414, "step": 630 }, { "epoch": 7.39, "learning_rate": 1.2925969447708578e-05, "loss": 0.3867, "step": 640 }, { "epoch": 7.51, "learning_rate": 1.2338425381903643e-05, "loss": 0.3509, "step": 650 }, { "epoch": 7.62, "learning_rate": 1.1750881316098707e-05, "loss": 0.33, "step": 660 }, { "epoch": 7.74, "learning_rate": 1.1163337250293772e-05, "loss": 0.3678, "step": 670 }, { "epoch": 7.85, "learning_rate": 1.0575793184488837e-05, "loss": 0.3481, "step": 680 }, { "epoch": 7.97, "learning_rate": 9.988249118683901e-06, "loss": 0.3439, "step": 690 }, { "epoch": 7.99, "eval_accuracy": 0.9053803339517625, "eval_loss": 0.28196877241134644, "eval_runtime": 8.0187, "eval_samples_per_second": 67.218, "eval_steps_per_second": 8.48, "step": 692 }, { "epoch": 8.09, "learning_rate": 9.400705052878966e-06, "loss": 0.3763, "step": 700 }, { "epoch": 8.2, "learning_rate": 8.81316098707403e-06, "loss": 0.3435, "step": 710 }, { "epoch": 8.32, "learning_rate": 8.225616921269095e-06, "loss": 0.3455, "step": 720 }, { "epoch": 8.43, "learning_rate": 7.63807285546416e-06, "loss": 0.3422, "step": 730 }, { "epoch": 8.55, "learning_rate": 7.050528789659225e-06, "loss": 0.3484, "step": 740 }, { "epoch": 8.66, "learning_rate": 6.462984723854289e-06, "loss": 0.3342, "step": 750 }, { "epoch": 8.78, "learning_rate": 5.875440658049354e-06, "loss": 0.3657, "step": 760 }, { "epoch": 8.89, "learning_rate": 5.287896592244418e-06, "loss": 0.3306, "step": 770 }, { "epoch": 9.0, "eval_accuracy": 0.8849721706864564, "eval_loss": 0.3021787703037262, "eval_runtime": 8.1523, "eval_samples_per_second": 66.116, "eval_steps_per_second": 8.341, "step": 779 }, { "epoch": 9.01, "learning_rate": 4.700352526439483e-06, "loss": 0.3541, "step": 780 }, { "epoch": 9.13, "learning_rate": 4.1128084606345476e-06, "loss": 0.3233, "step": 790 }, { "epoch": 9.24, "learning_rate": 3.5252643948296126e-06, "loss": 0.3345, "step": 800 }, { "epoch": 9.36, "learning_rate": 2.937720329024677e-06, "loss": 0.3387, "step": 810 }, { "epoch": 9.47, "learning_rate": 2.3501762632197415e-06, "loss": 0.3477, "step": 820 }, { "epoch": 9.59, "learning_rate": 1.7626321974148063e-06, "loss": 0.3148, "step": 830 }, { "epoch": 9.7, "learning_rate": 1.1750881316098707e-06, "loss": 0.337, "step": 840 }, { "epoch": 9.82, "learning_rate": 5.875440658049354e-07, "loss": 0.3339, "step": 850 }, { "epoch": 9.93, "learning_rate": 0.0, "loss": 0.3691, "step": 860 }, { "epoch": 9.93, "eval_accuracy": 0.8163265306122449, "eval_loss": 0.7981617450714111, "eval_runtime": 9.506, "eval_samples_per_second": 56.701, "eval_steps_per_second": 7.153, "step": 860 }, { "epoch": 9.93, "step": 860, "total_flos": 1.0221236731922227e+18, "train_loss": 0.4434790275817694, "train_runtime": 1771.1031, "train_samples_per_second": 27.356, "train_steps_per_second": 0.486 } ], "max_steps": 860, "num_train_epochs": 10, "total_flos": 1.0221236731922227e+18, "trial_name": null, "trial_params": null }