{ "best_metric": 1.0, "best_model_checkpoint": "ViTFineTuned/checkpoint-603", "epoch": 14.988929889298893, "global_step": 1005, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.15, "learning_rate": 4.950495049504951e-05, "loss": 2.3113, "step": 10 }, { "epoch": 0.3, "learning_rate": 9.900990099009902e-05, "loss": 1.8782, "step": 20 }, { "epoch": 0.44, "learning_rate": 0.0001485148514851485, "loss": 1.2023, "step": 30 }, { "epoch": 0.59, "learning_rate": 0.00019801980198019803, "loss": 0.6597, "step": 40 }, { "epoch": 0.74, "learning_rate": 0.00024752475247524753, "loss": 0.4134, "step": 50 }, { "epoch": 0.89, "learning_rate": 0.000297029702970297, "loss": 0.2859, "step": 60 }, { "epoch": 0.99, "eval_accuracy": 0.9784172661870504, "eval_loss": 0.21802514791488647, "eval_runtime": 6.2997, "eval_samples_per_second": 66.193, "eval_steps_per_second": 4.286, "step": 67 }, { "epoch": 1.04, "learning_rate": 0.0003465346534653465, "loss": 0.285, "step": 70 }, { "epoch": 1.19, "learning_rate": 0.00039603960396039607, "loss": 0.261, "step": 80 }, { "epoch": 1.34, "learning_rate": 0.00044554455445544556, "loss": 0.2659, "step": 90 }, { "epoch": 1.49, "learning_rate": 0.0004950495049504951, "loss": 0.2812, "step": 100 }, { "epoch": 1.63, "learning_rate": 0.0004950221238938053, "loss": 0.2512, "step": 110 }, { "epoch": 1.78, "learning_rate": 0.0004894911504424779, "loss": 0.2033, "step": 120 }, { "epoch": 1.93, "learning_rate": 0.0004839601769911505, "loss": 0.293, "step": 130 }, { "epoch": 1.99, "eval_accuracy": 0.9184652278177458, "eval_loss": 0.3308344781398773, "eval_runtime": 6.9556, "eval_samples_per_second": 59.952, "eval_steps_per_second": 3.882, "step": 134 }, { "epoch": 2.09, "learning_rate": 0.000478429203539823, "loss": 0.2754, "step": 140 }, { "epoch": 2.24, "learning_rate": 0.0004728982300884956, "loss": 0.2746, "step": 150 }, { "epoch": 2.38, "learning_rate": 0.00046736725663716817, "loss": 0.1477, "step": 160 }, { "epoch": 2.53, "learning_rate": 0.0004618362831858407, "loss": 0.1984, "step": 170 }, { "epoch": 2.68, "learning_rate": 0.00045630530973451327, "loss": 0.279, "step": 180 }, { "epoch": 2.83, "learning_rate": 0.00045077433628318585, "loss": 0.1846, "step": 190 }, { "epoch": 2.97, "learning_rate": 0.0004452433628318584, "loss": 0.1444, "step": 200 }, { "epoch": 2.99, "eval_accuracy": 0.9568345323741008, "eval_loss": 0.15321111679077148, "eval_runtime": 6.3227, "eval_samples_per_second": 65.953, "eval_steps_per_second": 4.27, "step": 201 }, { "epoch": 3.13, "learning_rate": 0.000439712389380531, "loss": 0.1136, "step": 210 }, { "epoch": 3.28, "learning_rate": 0.0004341814159292036, "loss": 0.1482, "step": 220 }, { "epoch": 3.43, "learning_rate": 0.0004286504424778761, "loss": 0.1124, "step": 230 }, { "epoch": 3.58, "learning_rate": 0.0004231194690265487, "loss": 0.2128, "step": 240 }, { "epoch": 3.72, "learning_rate": 0.00041758849557522125, "loss": 0.0859, "step": 250 }, { "epoch": 3.87, "learning_rate": 0.0004120575221238938, "loss": 0.0833, "step": 260 }, { "epoch": 3.99, "eval_accuracy": 0.9856115107913669, "eval_loss": 0.051548197865486145, "eval_runtime": 6.2776, "eval_samples_per_second": 66.426, "eval_steps_per_second": 4.301, "step": 268 }, { "epoch": 4.03, "learning_rate": 0.0004065265486725664, "loss": 0.1025, "step": 270 }, { "epoch": 4.18, "learning_rate": 0.000400995575221239, "loss": 0.0917, "step": 280 }, { "epoch": 4.32, "learning_rate": 0.0003954646017699115, "loss": 0.0812, "step": 290 }, { "epoch": 4.47, "learning_rate": 0.0003899336283185841, "loss": 0.0879, "step": 300 }, { "epoch": 4.62, "learning_rate": 0.0003844026548672566, "loss": 0.0763, "step": 310 }, { "epoch": 4.77, "learning_rate": 0.0003788716814159292, "loss": 0.0582, "step": 320 }, { "epoch": 4.92, "learning_rate": 0.00037334070796460176, "loss": 0.1007, "step": 330 }, { "epoch": 4.99, "eval_accuracy": 0.9904076738609112, "eval_loss": 0.029486695304512978, "eval_runtime": 6.2974, "eval_samples_per_second": 66.218, "eval_steps_per_second": 4.288, "step": 335 }, { "epoch": 5.07, "learning_rate": 0.00036780973451327434, "loss": 0.0564, "step": 340 }, { "epoch": 5.22, "learning_rate": 0.0003622787610619469, "loss": 0.0506, "step": 350 }, { "epoch": 5.37, "learning_rate": 0.0003567477876106195, "loss": 0.0611, "step": 360 }, { "epoch": 5.52, "learning_rate": 0.000351216814159292, "loss": 0.0576, "step": 370 }, { "epoch": 5.66, "learning_rate": 0.0003456858407079646, "loss": 0.0832, "step": 380 }, { "epoch": 5.81, "learning_rate": 0.0003401548672566372, "loss": 0.0561, "step": 390 }, { "epoch": 5.96, "learning_rate": 0.0003346238938053097, "loss": 0.0372, "step": 400 }, { "epoch": 5.99, "eval_accuracy": 0.9808153477218226, "eval_loss": 0.05741920694708824, "eval_runtime": 6.2799, "eval_samples_per_second": 66.402, "eval_steps_per_second": 4.299, "step": 402 }, { "epoch": 6.12, "learning_rate": 0.00032909292035398233, "loss": 0.0749, "step": 410 }, { "epoch": 6.27, "learning_rate": 0.0003235619469026549, "loss": 0.0464, "step": 420 }, { "epoch": 6.41, "learning_rate": 0.00031803097345132743, "loss": 0.0538, "step": 430 }, { "epoch": 6.56, "learning_rate": 0.0003125, "loss": 0.073, "step": 440 }, { "epoch": 6.71, "learning_rate": 0.0003069690265486726, "loss": 0.0533, "step": 450 }, { "epoch": 6.86, "learning_rate": 0.0003014380530973451, "loss": 0.0919, "step": 460 }, { "epoch": 6.99, "eval_accuracy": 0.988009592326139, "eval_loss": 0.05372486636042595, "eval_runtime": 6.3165, "eval_samples_per_second": 66.017, "eval_steps_per_second": 4.275, "step": 469 }, { "epoch": 7.01, "learning_rate": 0.0002959070796460177, "loss": 0.0518, "step": 470 }, { "epoch": 7.16, "learning_rate": 0.0002903761061946903, "loss": 0.0233, "step": 480 }, { "epoch": 7.31, "learning_rate": 0.00028484513274336284, "loss": 0.0501, "step": 490 }, { "epoch": 7.46, "learning_rate": 0.0002793141592920354, "loss": 0.0685, "step": 500 }, { "epoch": 7.61, "learning_rate": 0.000273783185840708, "loss": 0.0116, "step": 510 }, { "epoch": 7.75, "learning_rate": 0.0002682522123893805, "loss": 0.0228, "step": 520 }, { "epoch": 7.9, "learning_rate": 0.0002627212389380531, "loss": 0.0135, "step": 530 }, { "epoch": 7.99, "eval_accuracy": 0.9952038369304557, "eval_loss": 0.011652274057269096, "eval_runtime": 6.3118, "eval_samples_per_second": 66.067, "eval_steps_per_second": 4.278, "step": 536 }, { "epoch": 8.06, "learning_rate": 0.00025719026548672567, "loss": 0.0091, "step": 540 }, { "epoch": 8.21, "learning_rate": 0.00025165929203539825, "loss": 0.0469, "step": 550 }, { "epoch": 8.35, "learning_rate": 0.0002461283185840708, "loss": 0.0157, "step": 560 }, { "epoch": 8.5, "learning_rate": 0.00024059734513274338, "loss": 0.0172, "step": 570 }, { "epoch": 8.65, "learning_rate": 0.00023506637168141593, "loss": 0.0202, "step": 580 }, { "epoch": 8.8, "learning_rate": 0.0002295353982300885, "loss": 0.0303, "step": 590 }, { "epoch": 8.94, "learning_rate": 0.00022400442477876108, "loss": 0.0472, "step": 600 }, { "epoch": 8.99, "eval_accuracy": 1.0, "eval_loss": 0.007474538870155811, "eval_runtime": 6.2502, "eval_samples_per_second": 66.718, "eval_steps_per_second": 4.32, "step": 603 }, { "epoch": 9.1, "learning_rate": 0.00021847345132743363, "loss": 0.0147, "step": 610 }, { "epoch": 9.25, "learning_rate": 0.0002129424778761062, "loss": 0.0133, "step": 620 }, { "epoch": 9.4, "learning_rate": 0.00020741150442477876, "loss": 0.0264, "step": 630 }, { "epoch": 9.55, "learning_rate": 0.00020188053097345134, "loss": 0.053, "step": 640 }, { "epoch": 9.69, "learning_rate": 0.0001963495575221239, "loss": 0.0056, "step": 650 }, { "epoch": 9.84, "learning_rate": 0.00019081858407079646, "loss": 0.0128, "step": 660 }, { "epoch": 9.99, "learning_rate": 0.00018528761061946904, "loss": 0.0151, "step": 670 }, { "epoch": 9.99, "eval_accuracy": 1.0, "eval_loss": 0.004758651368319988, "eval_runtime": 6.2595, "eval_samples_per_second": 66.619, "eval_steps_per_second": 4.313, "step": 670 }, { "epoch": 10.15, "learning_rate": 0.0001797566371681416, "loss": 0.0135, "step": 680 }, { "epoch": 10.3, "learning_rate": 0.00017422566371681417, "loss": 0.0394, "step": 690 }, { "epoch": 10.44, "learning_rate": 0.00016869469026548672, "loss": 0.0122, "step": 700 }, { "epoch": 10.59, "learning_rate": 0.0001631637168141593, "loss": 0.0136, "step": 710 }, { "epoch": 10.74, "learning_rate": 0.00015763274336283187, "loss": 0.01, "step": 720 }, { "epoch": 10.89, "learning_rate": 0.00015210176991150442, "loss": 0.0052, "step": 730 }, { "epoch": 10.99, "eval_accuracy": 0.9976019184652278, "eval_loss": 0.007309095934033394, "eval_runtime": 6.2533, "eval_samples_per_second": 66.685, "eval_steps_per_second": 4.318, "step": 737 }, { "epoch": 11.04, "learning_rate": 0.00014657079646017697, "loss": 0.0113, "step": 740 }, { "epoch": 11.19, "learning_rate": 0.00014103982300884958, "loss": 0.0325, "step": 750 }, { "epoch": 11.34, "learning_rate": 0.00013550884955752213, "loss": 0.012, "step": 760 }, { "epoch": 11.49, "learning_rate": 0.00012997787610619468, "loss": 0.0185, "step": 770 }, { "epoch": 11.63, "learning_rate": 0.00012444690265486725, "loss": 0.0049, "step": 780 }, { "epoch": 11.78, "learning_rate": 0.00011891592920353983, "loss": 0.0038, "step": 790 }, { "epoch": 11.93, "learning_rate": 0.00011338495575221238, "loss": 0.0109, "step": 800 }, { "epoch": 11.99, "eval_accuracy": 0.9952038369304557, "eval_loss": 0.01980188488960266, "eval_runtime": 6.279, "eval_samples_per_second": 66.411, "eval_steps_per_second": 4.3, "step": 804 }, { "epoch": 12.09, "learning_rate": 0.00010785398230088496, "loss": 0.0096, "step": 810 }, { "epoch": 12.24, "learning_rate": 0.00010232300884955754, "loss": 0.0115, "step": 820 }, { "epoch": 12.38, "learning_rate": 9.679203539823009e-05, "loss": 0.0171, "step": 830 }, { "epoch": 12.53, "learning_rate": 9.126106194690266e-05, "loss": 0.004, "step": 840 }, { "epoch": 12.68, "learning_rate": 8.573008849557521e-05, "loss": 0.0073, "step": 850 }, { "epoch": 12.83, "learning_rate": 8.019911504424779e-05, "loss": 0.0045, "step": 860 }, { "epoch": 12.97, "learning_rate": 7.466814159292036e-05, "loss": 0.0033, "step": 870 }, { "epoch": 12.99, "eval_accuracy": 0.9976019184652278, "eval_loss": 0.006568592973053455, "eval_runtime": 6.2861, "eval_samples_per_second": 66.336, "eval_steps_per_second": 4.295, "step": 871 }, { "epoch": 13.13, "learning_rate": 6.913716814159292e-05, "loss": 0.0077, "step": 880 }, { "epoch": 13.28, "learning_rate": 6.360619469026548e-05, "loss": 0.0027, "step": 890 }, { "epoch": 13.43, "learning_rate": 5.8075221238938054e-05, "loss": 0.0028, "step": 900 }, { "epoch": 13.58, "learning_rate": 5.254424778761062e-05, "loss": 0.0025, "step": 910 }, { "epoch": 13.72, "learning_rate": 4.701327433628318e-05, "loss": 0.0024, "step": 920 }, { "epoch": 13.87, "learning_rate": 4.148230088495576e-05, "loss": 0.011, "step": 930 }, { "epoch": 13.99, "eval_accuracy": 0.9976019184652278, "eval_loss": 0.006684896536171436, "eval_runtime": 6.258, "eval_samples_per_second": 66.635, "eval_steps_per_second": 4.314, "step": 938 }, { "epoch": 14.03, "learning_rate": 3.595132743362832e-05, "loss": 0.0027, "step": 940 }, { "epoch": 14.18, "learning_rate": 3.0420353982300886e-05, "loss": 0.0092, "step": 950 }, { "epoch": 14.32, "learning_rate": 2.4889380530973453e-05, "loss": 0.0025, "step": 960 }, { "epoch": 14.47, "learning_rate": 1.9358407079646017e-05, "loss": 0.0024, "step": 970 }, { "epoch": 14.62, "learning_rate": 1.3827433628318586e-05, "loss": 0.0061, "step": 980 }, { "epoch": 14.77, "learning_rate": 8.296460176991151e-06, "loss": 0.0028, "step": 990 }, { "epoch": 14.92, "learning_rate": 2.765486725663717e-06, "loss": 0.0032, "step": 1000 }, { "epoch": 14.99, "eval_accuracy": 0.9976019184652278, "eval_loss": 0.00603157514706254, "eval_runtime": 6.2764, "eval_samples_per_second": 66.44, "eval_steps_per_second": 4.302, "step": 1005 }, { "epoch": 14.99, "step": 1005, "total_flos": 5.035680667331113e+18, "train_loss": 0.13053628388996147, "train_runtime": 2520.4596, "train_samples_per_second": 25.799, "train_steps_per_second": 0.399 } ], "max_steps": 1005, "num_train_epochs": 15, "total_flos": 5.035680667331113e+18, "trial_name": null, "trial_params": null }