{ "best_metric": 0.08029154688119888, "best_model_checkpoint": "./beit-tiny-mask-finetuned/checkpoint-200", "epoch": 0.5543237250554324, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00019955654101995565, "loss": 0.1318, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.00019900221729490022, "loss": 0.0546, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00019850332594235036, "loss": 0.2395, "step": 30 }, { "epoch": 0.04, "learning_rate": 0.00019794900221729492, "loss": 0.0966, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.00019739467849223948, "loss": 0.1544, "step": 50 }, { "epoch": 0.07, "learning_rate": 0.00019684035476718405, "loss": 0.1734, "step": 60 }, { "epoch": 0.08, "learning_rate": 0.00019634146341463416, "loss": 0.1556, "step": 70 }, { "epoch": 0.09, "learning_rate": 0.00019578713968957873, "loss": 0.2154, "step": 80 }, { "epoch": 0.1, "learning_rate": 0.0001952328159645233, "loss": 0.1845, "step": 90 }, { "epoch": 0.11, "learning_rate": 0.0001947339246119734, "loss": 0.4978, "step": 100 }, { "epoch": 0.11, "eval_accuracy": 0.9402113459399333, "eval_loss": 0.17701464891433716, "eval_runtime": 192.8644, "eval_samples_per_second": 18.645, "eval_steps_per_second": 2.333, "step": 100 }, { "epoch": 0.12, "learning_rate": 0.00019417960088691797, "loss": 0.1897, "step": 110 }, { "epoch": 0.13, "learning_rate": 0.00019362527716186253, "loss": 0.1148, "step": 120 }, { "epoch": 0.14, "learning_rate": 0.0001930709534368071, "loss": 0.208, "step": 130 }, { "epoch": 0.16, "learning_rate": 0.00019251662971175165, "loss": 0.1142, "step": 140 }, { "epoch": 0.17, "learning_rate": 0.00019196230598669624, "loss": 0.1285, "step": 150 }, { "epoch": 0.18, "learning_rate": 0.0001914079822616408, "loss": 0.1383, "step": 160 }, { "epoch": 0.19, "learning_rate": 0.00019085365853658537, "loss": 0.1786, "step": 170 }, { "epoch": 0.2, "learning_rate": 0.00019029933481152996, "loss": 0.0892, "step": 180 }, { "epoch": 0.21, "learning_rate": 0.00018974501108647452, "loss": 0.2118, "step": 190 }, { "epoch": 0.22, "learning_rate": 0.00018919068736141908, "loss": 0.1863, "step": 200 }, { "epoch": 0.22, "eval_accuracy": 0.974972191323693, "eval_loss": 0.08029154688119888, "eval_runtime": 193.2131, "eval_samples_per_second": 18.612, "eval_steps_per_second": 2.329, "step": 200 }, { "epoch": 0.23, "learning_rate": 0.00018863636363636364, "loss": 0.0495, "step": 210 }, { "epoch": 0.24, "learning_rate": 0.0001880820399113082, "loss": 0.1476, "step": 220 }, { "epoch": 0.25, "learning_rate": 0.00018752771618625277, "loss": 0.1339, "step": 230 }, { "epoch": 0.27, "learning_rate": 0.00018697339246119733, "loss": 0.1734, "step": 240 }, { "epoch": 0.28, "learning_rate": 0.00018641906873614192, "loss": 0.2068, "step": 250 }, { "epoch": 0.29, "learning_rate": 0.00018586474501108648, "loss": 0.1427, "step": 260 }, { "epoch": 0.3, "learning_rate": 0.00018531042128603104, "loss": 0.182, "step": 270 }, { "epoch": 0.31, "learning_rate": 0.00018475609756097563, "loss": 0.1436, "step": 280 }, { "epoch": 0.32, "learning_rate": 0.0001842017738359202, "loss": 0.1319, "step": 290 }, { "epoch": 0.33, "learning_rate": 0.00018364745011086476, "loss": 0.0818, "step": 300 }, { "epoch": 0.33, "eval_accuracy": 0.9424360400444939, "eval_loss": 0.20646639168262482, "eval_runtime": 193.0709, "eval_samples_per_second": 18.625, "eval_steps_per_second": 2.331, "step": 300 }, { "epoch": 0.34, "learning_rate": 0.00018309312638580932, "loss": 0.2115, "step": 310 }, { "epoch": 0.35, "learning_rate": 0.00018253880266075388, "loss": 0.0725, "step": 320 }, { "epoch": 0.37, "learning_rate": 0.00018198447893569844, "loss": 0.0589, "step": 330 }, { "epoch": 0.38, "learning_rate": 0.000181430155210643, "loss": 0.2017, "step": 340 }, { "epoch": 0.39, "learning_rate": 0.0001808758314855876, "loss": 0.0678, "step": 350 }, { "epoch": 0.4, "learning_rate": 0.00018032150776053216, "loss": 0.2075, "step": 360 }, { "epoch": 0.41, "learning_rate": 0.00017976718403547672, "loss": 0.3123, "step": 370 }, { "epoch": 0.42, "learning_rate": 0.0001792128603104213, "loss": 0.0857, "step": 380 }, { "epoch": 0.43, "learning_rate": 0.00017865853658536587, "loss": 0.0958, "step": 390 }, { "epoch": 0.44, "learning_rate": 0.00017810421286031043, "loss": 0.1982, "step": 400 }, { "epoch": 0.44, "eval_accuracy": 0.9438264738598443, "eval_loss": 0.16823017597198486, "eval_runtime": 193.1602, "eval_samples_per_second": 18.617, "eval_steps_per_second": 2.33, "step": 400 }, { "epoch": 0.45, "learning_rate": 0.000177549889135255, "loss": 0.2038, "step": 410 }, { "epoch": 0.47, "learning_rate": 0.00017699556541019956, "loss": 0.1603, "step": 420 }, { "epoch": 0.48, "learning_rate": 0.00017644124168514412, "loss": 0.1035, "step": 430 }, { "epoch": 0.49, "learning_rate": 0.00017588691796008868, "loss": 0.0329, "step": 440 }, { "epoch": 0.5, "learning_rate": 0.00017533259423503327, "loss": 0.0392, "step": 450 }, { "epoch": 0.51, "learning_rate": 0.00017477827050997783, "loss": 0.1546, "step": 460 }, { "epoch": 0.52, "learning_rate": 0.0001742239467849224, "loss": 0.155, "step": 470 }, { "epoch": 0.53, "learning_rate": 0.00017366962305986699, "loss": 0.1521, "step": 480 }, { "epoch": 0.54, "learning_rate": 0.00017311529933481155, "loss": 0.1506, "step": 490 }, { "epoch": 0.55, "learning_rate": 0.0001725609756097561, "loss": 0.0293, "step": 500 }, { "epoch": 0.55, "eval_accuracy": 0.9824805339265851, "eval_loss": 0.08882919698953629, "eval_runtime": 193.0128, "eval_samples_per_second": 18.631, "eval_steps_per_second": 2.331, "step": 500 }, { "epoch": 0.55, "step": 500, "total_flos": 6.18978356527104e+17, "train_loss": 0.1509909844994545, "train_runtime": 3496.5643, "train_samples_per_second": 16.505, "train_steps_per_second": 1.032 } ], "max_steps": 3608, "num_train_epochs": 4, "total_flos": 6.18978356527104e+17, "trial_name": null, "trial_params": null }