{ "best_metric": 0.9161991584852734, "best_model_checkpoint": "swin-tiny-patch4-window7-224-classification/checkpoint-802", "epoch": 9.975062344139651, "eval_steps": 500, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "grad_norm": 4.872883319854736, "learning_rate": 1e-05, "loss": 0.2252, "step": 10 }, { "epoch": 0.2, "grad_norm": 4.769281387329102, "learning_rate": 2e-05, "loss": 0.2281, "step": 20 }, { "epoch": 0.3, "grad_norm": 4.990223407745361, "learning_rate": 3e-05, "loss": 0.2281, "step": 30 }, { "epoch": 0.4, "grad_norm": 4.2269463539123535, "learning_rate": 4e-05, "loss": 0.2359, "step": 40 }, { "epoch": 0.5, "grad_norm": 5.273340702056885, "learning_rate": 5e-05, "loss": 0.2345, "step": 50 }, { "epoch": 0.6, "grad_norm": 6.166986465454102, "learning_rate": 6e-05, "loss": 0.2625, "step": 60 }, { "epoch": 0.7, "grad_norm": 5.066626071929932, "learning_rate": 7e-05, "loss": 0.2738, "step": 70 }, { "epoch": 0.8, "grad_norm": 4.902966499328613, "learning_rate": 8e-05, "loss": 0.2793, "step": 80 }, { "epoch": 0.9, "grad_norm": 7.076936721801758, "learning_rate": 9e-05, "loss": 0.3018, "step": 90 }, { "epoch": 1.0, "grad_norm": 7.968544960021973, "learning_rate": 0.0001, "loss": 0.3193, "step": 100 }, { "epoch": 1.0, "eval_accuracy": 0.8716690042075736, "eval_loss": 0.3754436671733856, "eval_runtime": 39.3279, "eval_samples_per_second": 72.518, "eval_steps_per_second": 1.144, "step": 100 }, { "epoch": 1.1, "grad_norm": 6.099853992462158, "learning_rate": 9.888888888888889e-05, "loss": 0.2997, "step": 110 }, { "epoch": 1.2, "grad_norm": 8.707666397094727, "learning_rate": 9.777777777777778e-05, "loss": 0.3066, "step": 120 }, { "epoch": 1.3, "grad_norm": 6.50078010559082, "learning_rate": 9.666666666666667e-05, "loss": 0.2916, "step": 130 }, { "epoch": 1.4, "grad_norm": 6.648171901702881, "learning_rate": 9.555555555555557e-05, "loss": 0.318, "step": 140 }, { "epoch": 1.5, "grad_norm": 5.3696746826171875, "learning_rate": 9.444444444444444e-05, "loss": 0.2995, "step": 150 }, { "epoch": 1.6, "grad_norm": 4.172982692718506, "learning_rate": 9.333333333333334e-05, "loss": 0.2971, "step": 160 }, { "epoch": 1.7, "grad_norm": 5.951164245605469, "learning_rate": 9.222222222222223e-05, "loss": 0.3165, "step": 170 }, { "epoch": 1.8, "grad_norm": 5.611052513122559, "learning_rate": 9.111111111111112e-05, "loss": 0.2885, "step": 180 }, { "epoch": 1.9, "grad_norm": 6.186773777008057, "learning_rate": 9e-05, "loss": 0.2863, "step": 190 }, { "epoch": 2.0, "grad_norm": 8.462760925292969, "learning_rate": 8.888888888888889e-05, "loss": 0.292, "step": 200 }, { "epoch": 2.0, "eval_accuracy": 0.864656381486676, "eval_loss": 0.4085911512374878, "eval_runtime": 38.7867, "eval_samples_per_second": 73.53, "eval_steps_per_second": 1.16, "step": 200 }, { "epoch": 2.09, "grad_norm": 6.041274547576904, "learning_rate": 8.777777777777778e-05, "loss": 0.2882, "step": 210 }, { "epoch": 2.19, "grad_norm": 5.336978435516357, "learning_rate": 8.666666666666667e-05, "loss": 0.269, "step": 220 }, { "epoch": 2.29, "grad_norm": 5.773736953735352, "learning_rate": 8.555555555555556e-05, "loss": 0.2675, "step": 230 }, { "epoch": 2.39, "grad_norm": 5.7196784019470215, "learning_rate": 8.444444444444444e-05, "loss": 0.256, "step": 240 }, { "epoch": 2.49, "grad_norm": 4.928184986114502, "learning_rate": 8.333333333333334e-05, "loss": 0.2463, "step": 250 }, { "epoch": 2.59, "grad_norm": 7.451691627502441, "learning_rate": 8.222222222222222e-05, "loss": 0.2677, "step": 260 }, { "epoch": 2.69, "grad_norm": 5.755824089050293, "learning_rate": 8.111111111111112e-05, "loss": 0.2507, "step": 270 }, { "epoch": 2.79, "grad_norm": 5.09450101852417, "learning_rate": 8e-05, "loss": 0.2766, "step": 280 }, { "epoch": 2.89, "grad_norm": 5.240322589874268, "learning_rate": 7.88888888888889e-05, "loss": 0.2674, "step": 290 }, { "epoch": 2.99, "grad_norm": 4.865029811859131, "learning_rate": 7.777777777777778e-05, "loss": 0.2531, "step": 300 }, { "epoch": 2.99, "eval_accuracy": 0.894109396914446, "eval_loss": 0.34070128202438354, "eval_runtime": 39.4778, "eval_samples_per_second": 72.243, "eval_steps_per_second": 1.14, "step": 300 }, { "epoch": 3.09, "grad_norm": 4.403306007385254, "learning_rate": 7.666666666666667e-05, "loss": 0.2141, "step": 310 }, { "epoch": 3.19, "grad_norm": 4.91873836517334, "learning_rate": 7.555555555555556e-05, "loss": 0.2257, "step": 320 }, { "epoch": 3.29, "grad_norm": 5.522466659545898, "learning_rate": 7.444444444444444e-05, "loss": 0.2419, "step": 330 }, { "epoch": 3.39, "grad_norm": 4.829797267913818, "learning_rate": 7.333333333333333e-05, "loss": 0.2522, "step": 340 }, { "epoch": 3.49, "grad_norm": 6.162938117980957, "learning_rate": 7.222222222222222e-05, "loss": 0.2533, "step": 350 }, { "epoch": 3.59, "grad_norm": 5.2231340408325195, "learning_rate": 7.111111111111112e-05, "loss": 0.2284, "step": 360 }, { "epoch": 3.69, "grad_norm": 4.9288716316223145, "learning_rate": 7e-05, "loss": 0.244, "step": 370 }, { "epoch": 3.79, "grad_norm": 4.194519996643066, "learning_rate": 6.88888888888889e-05, "loss": 0.2433, "step": 380 }, { "epoch": 3.89, "grad_norm": 5.996556282043457, "learning_rate": 6.777777777777778e-05, "loss": 0.2428, "step": 390 }, { "epoch": 3.99, "grad_norm": 5.954503536224365, "learning_rate": 6.666666666666667e-05, "loss": 0.2242, "step": 400 }, { "epoch": 4.0, "eval_accuracy": 0.8927068723702665, "eval_loss": 0.33171677589416504, "eval_runtime": 39.2032, "eval_samples_per_second": 72.749, "eval_steps_per_second": 1.148, "step": 401 }, { "epoch": 4.09, "grad_norm": 5.060131072998047, "learning_rate": 6.555555555555556e-05, "loss": 0.209, "step": 410 }, { "epoch": 4.19, "grad_norm": 5.29101037979126, "learning_rate": 6.444444444444446e-05, "loss": 0.2045, "step": 420 }, { "epoch": 4.29, "grad_norm": 7.005785942077637, "learning_rate": 6.333333333333333e-05, "loss": 0.2063, "step": 430 }, { "epoch": 4.39, "grad_norm": 3.808072328567505, "learning_rate": 6.222222222222222e-05, "loss": 0.1935, "step": 440 }, { "epoch": 4.49, "grad_norm": 4.201041221618652, "learning_rate": 6.111111111111112e-05, "loss": 0.2062, "step": 450 }, { "epoch": 4.59, "grad_norm": 5.4039835929870605, "learning_rate": 6e-05, "loss": 0.1949, "step": 460 }, { "epoch": 4.69, "grad_norm": 6.116047382354736, "learning_rate": 5.8888888888888896e-05, "loss": 0.2042, "step": 470 }, { "epoch": 4.79, "grad_norm": 4.799409866333008, "learning_rate": 5.7777777777777776e-05, "loss": 0.2135, "step": 480 }, { "epoch": 4.89, "grad_norm": 5.615736484527588, "learning_rate": 5.666666666666667e-05, "loss": 0.2098, "step": 490 }, { "epoch": 4.99, "grad_norm": 4.442112922668457, "learning_rate": 5.555555555555556e-05, "loss": 0.2181, "step": 500 }, { "epoch": 5.0, "eval_accuracy": 0.8983169705469846, "eval_loss": 0.3073332607746124, "eval_runtime": 39.2249, "eval_samples_per_second": 72.709, "eval_steps_per_second": 1.147, "step": 501 }, { "epoch": 5.09, "grad_norm": 3.2966055870056152, "learning_rate": 5.4444444444444446e-05, "loss": 0.1809, "step": 510 }, { "epoch": 5.19, "grad_norm": 3.9496381282806396, "learning_rate": 5.333333333333333e-05, "loss": 0.1904, "step": 520 }, { "epoch": 5.29, "grad_norm": 4.369997978210449, "learning_rate": 5.222222222222223e-05, "loss": 0.1817, "step": 530 }, { "epoch": 5.39, "grad_norm": 4.793045520782471, "learning_rate": 5.111111111111111e-05, "loss": 0.1889, "step": 540 }, { "epoch": 5.49, "grad_norm": 3.8780226707458496, "learning_rate": 5e-05, "loss": 0.1713, "step": 550 }, { "epoch": 5.59, "grad_norm": 5.775850296020508, "learning_rate": 4.888888888888889e-05, "loss": 0.1916, "step": 560 }, { "epoch": 5.69, "grad_norm": 4.524603843688965, "learning_rate": 4.7777777777777784e-05, "loss": 0.1771, "step": 570 }, { "epoch": 5.79, "grad_norm": 4.741029262542725, "learning_rate": 4.666666666666667e-05, "loss": 0.1722, "step": 580 }, { "epoch": 5.89, "grad_norm": 4.951327323913574, "learning_rate": 4.555555555555556e-05, "loss": 0.156, "step": 590 }, { "epoch": 5.99, "grad_norm": 3.7907869815826416, "learning_rate": 4.4444444444444447e-05, "loss": 0.1829, "step": 600 }, { "epoch": 6.0, "eval_accuracy": 0.9032258064516129, "eval_loss": 0.31035441160202026, "eval_runtime": 38.6346, "eval_samples_per_second": 73.82, "eval_steps_per_second": 1.165, "step": 601 }, { "epoch": 6.08, "grad_norm": 4.384369850158691, "learning_rate": 4.3333333333333334e-05, "loss": 0.1656, "step": 610 }, { "epoch": 6.18, "grad_norm": 5.536717891693115, "learning_rate": 4.222222222222222e-05, "loss": 0.1573, "step": 620 }, { "epoch": 6.28, "grad_norm": 5.531068801879883, "learning_rate": 4.111111111111111e-05, "loss": 0.1585, "step": 630 }, { "epoch": 6.38, "grad_norm": 4.487759590148926, "learning_rate": 4e-05, "loss": 0.1618, "step": 640 }, { "epoch": 6.48, "grad_norm": 4.500083923339844, "learning_rate": 3.888888888888889e-05, "loss": 0.1567, "step": 650 }, { "epoch": 6.58, "grad_norm": 4.1614298820495605, "learning_rate": 3.777777777777778e-05, "loss": 0.1548, "step": 660 }, { "epoch": 6.68, "grad_norm": 4.733586311340332, "learning_rate": 3.6666666666666666e-05, "loss": 0.1607, "step": 670 }, { "epoch": 6.78, "grad_norm": 5.126717567443848, "learning_rate": 3.555555555555556e-05, "loss": 0.1542, "step": 680 }, { "epoch": 6.88, "grad_norm": 3.771245002746582, "learning_rate": 3.444444444444445e-05, "loss": 0.1455, "step": 690 }, { "epoch": 6.98, "grad_norm": 5.217066287994385, "learning_rate": 3.3333333333333335e-05, "loss": 0.1426, "step": 700 }, { "epoch": 6.99, "eval_accuracy": 0.9049789621318373, "eval_loss": 0.29764774441719055, "eval_runtime": 39.4594, "eval_samples_per_second": 72.277, "eval_steps_per_second": 1.14, "step": 701 }, { "epoch": 7.08, "grad_norm": 6.066614627838135, "learning_rate": 3.222222222222223e-05, "loss": 0.1429, "step": 710 }, { "epoch": 7.18, "grad_norm": 3.363190174102783, "learning_rate": 3.111111111111111e-05, "loss": 0.1385, "step": 720 }, { "epoch": 7.28, "grad_norm": 5.055250644683838, "learning_rate": 3e-05, "loss": 0.1373, "step": 730 }, { "epoch": 7.38, "grad_norm": 5.0901384353637695, "learning_rate": 2.8888888888888888e-05, "loss": 0.1392, "step": 740 }, { "epoch": 7.48, "grad_norm": 3.7394046783447266, "learning_rate": 2.777777777777778e-05, "loss": 0.1235, "step": 750 }, { "epoch": 7.58, "grad_norm": 4.67645263671875, "learning_rate": 2.6666666666666667e-05, "loss": 0.1466, "step": 760 }, { "epoch": 7.68, "grad_norm": 4.7584381103515625, "learning_rate": 2.5555555555555554e-05, "loss": 0.1228, "step": 770 }, { "epoch": 7.78, "grad_norm": 5.486098289489746, "learning_rate": 2.4444444444444445e-05, "loss": 0.1355, "step": 780 }, { "epoch": 7.88, "grad_norm": 5.71726131439209, "learning_rate": 2.3333333333333336e-05, "loss": 0.1299, "step": 790 }, { "epoch": 7.98, "grad_norm": 4.222195625305176, "learning_rate": 2.2222222222222223e-05, "loss": 0.1472, "step": 800 }, { "epoch": 8.0, "eval_accuracy": 0.9161991584852734, "eval_loss": 0.2736373543739319, "eval_runtime": 39.0364, "eval_samples_per_second": 73.06, "eval_steps_per_second": 1.153, "step": 802 }, { "epoch": 8.08, "grad_norm": 3.898097038269043, "learning_rate": 2.111111111111111e-05, "loss": 0.1214, "step": 810 }, { "epoch": 8.18, "grad_norm": 4.837826728820801, "learning_rate": 2e-05, "loss": 0.1176, "step": 820 }, { "epoch": 8.28, "grad_norm": 4.239289283752441, "learning_rate": 1.888888888888889e-05, "loss": 0.1191, "step": 830 }, { "epoch": 8.38, "grad_norm": 4.8459320068359375, "learning_rate": 1.777777777777778e-05, "loss": 0.1233, "step": 840 }, { "epoch": 8.48, "grad_norm": 4.943118095397949, "learning_rate": 1.6666666666666667e-05, "loss": 0.1274, "step": 850 }, { "epoch": 8.58, "grad_norm": 3.3787436485290527, "learning_rate": 1.5555555555555555e-05, "loss": 0.1277, "step": 860 }, { "epoch": 8.68, "grad_norm": 4.009765148162842, "learning_rate": 1.4444444444444444e-05, "loss": 0.1044, "step": 870 }, { "epoch": 8.78, "grad_norm": 3.8422799110412598, "learning_rate": 1.3333333333333333e-05, "loss": 0.1195, "step": 880 }, { "epoch": 8.88, "grad_norm": 2.927946090698242, "learning_rate": 1.2222222222222222e-05, "loss": 0.1077, "step": 890 }, { "epoch": 8.98, "grad_norm": 5.300693988800049, "learning_rate": 1.1111111111111112e-05, "loss": 0.1314, "step": 900 }, { "epoch": 9.0, "eval_accuracy": 0.9133941093969145, "eval_loss": 0.27998465299606323, "eval_runtime": 39.0226, "eval_samples_per_second": 73.086, "eval_steps_per_second": 1.153, "step": 902 }, { "epoch": 9.08, "grad_norm": 4.2179107666015625, "learning_rate": 1e-05, "loss": 0.1172, "step": 910 }, { "epoch": 9.18, "grad_norm": 4.46065092086792, "learning_rate": 8.88888888888889e-06, "loss": 0.1076, "step": 920 }, { "epoch": 9.28, "grad_norm": 3.4499449729919434, "learning_rate": 7.777777777777777e-06, "loss": 0.1103, "step": 930 }, { "epoch": 9.38, "grad_norm": 4.1871418952941895, "learning_rate": 6.666666666666667e-06, "loss": 0.1041, "step": 940 }, { "epoch": 9.48, "grad_norm": 4.666812896728516, "learning_rate": 5.555555555555556e-06, "loss": 0.1175, "step": 950 }, { "epoch": 9.58, "grad_norm": 3.4452712535858154, "learning_rate": 4.444444444444445e-06, "loss": 0.1143, "step": 960 }, { "epoch": 9.68, "grad_norm": 4.585890293121338, "learning_rate": 3.3333333333333333e-06, "loss": 0.1156, "step": 970 }, { "epoch": 9.78, "grad_norm": 3.672333240509033, "learning_rate": 2.2222222222222225e-06, "loss": 0.1157, "step": 980 }, { "epoch": 9.88, "grad_norm": 3.946244478225708, "learning_rate": 1.1111111111111112e-06, "loss": 0.1019, "step": 990 }, { "epoch": 9.98, "grad_norm": 2.6940207481384277, "learning_rate": 0.0, "loss": 0.1139, "step": 1000 }, { "epoch": 9.98, "eval_accuracy": 0.914796633941094, "eval_loss": 0.2748475670814514, "eval_runtime": 39.0773, "eval_samples_per_second": 72.984, "eval_steps_per_second": 1.152, "step": 1000 }, { "epoch": 9.98, "step": 1000, "total_flos": 6.364199987970048e+18, "train_loss": 0.19688368451595306, "train_runtime": 4893.4366, "train_samples_per_second": 52.446, "train_steps_per_second": 0.204 } ], "logging_steps": 10, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "total_flos": 6.364199987970048e+18, "train_batch_size": 64, "trial_name": null, "trial_params": null }