{ "best_metric": 0.7499688084840923, "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-eurosat\\checkpoint-1090", "epoch": 4.982857142857143, "eval_steps": 500, "global_step": 1090, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.045714285714285714, "grad_norm": 6.963473320007324, "learning_rate": 4.587155963302753e-06, "loss": 2.8055, "step": 10 }, { "epoch": 0.09142857142857143, "grad_norm": 11.695841789245605, "learning_rate": 9.174311926605506e-06, "loss": 2.7626, "step": 20 }, { "epoch": 0.13714285714285715, "grad_norm": 10.859987258911133, "learning_rate": 1.3761467889908258e-05, "loss": 2.6863, "step": 30 }, { "epoch": 0.18285714285714286, "grad_norm": 14.530256271362305, "learning_rate": 1.834862385321101e-05, "loss": 2.5932, "step": 40 }, { "epoch": 0.22857142857142856, "grad_norm": 22.234731674194336, "learning_rate": 2.2935779816513765e-05, "loss": 2.4006, "step": 50 }, { "epoch": 0.2742857142857143, "grad_norm": 21.946706771850586, "learning_rate": 2.7522935779816515e-05, "loss": 2.1545, "step": 60 }, { "epoch": 0.32, "grad_norm": 31.88918685913086, "learning_rate": 3.211009174311927e-05, "loss": 1.9868, "step": 70 }, { "epoch": 0.3657142857142857, "grad_norm": 17.8656063079834, "learning_rate": 3.669724770642202e-05, "loss": 1.8264, "step": 80 }, { "epoch": 0.4114285714285714, "grad_norm": 15.480560302734375, "learning_rate": 4.1284403669724776e-05, "loss": 1.8301, "step": 90 }, { "epoch": 0.45714285714285713, "grad_norm": 13.637527465820312, "learning_rate": 4.587155963302753e-05, "loss": 1.7708, "step": 100 }, { "epoch": 0.5028571428571429, "grad_norm": 16.25617218017578, "learning_rate": 4.994903160040775e-05, "loss": 1.7133, "step": 110 }, { "epoch": 0.5485714285714286, "grad_norm": 10.56847095489502, "learning_rate": 4.943934760448522e-05, "loss": 1.5611, "step": 120 }, { "epoch": 0.5942857142857143, "grad_norm": 16.05897331237793, "learning_rate": 4.892966360856269e-05, "loss": 1.6387, "step": 130 }, { "epoch": 0.64, "grad_norm": 17.286190032958984, "learning_rate": 4.8419979612640164e-05, "loss": 1.519, "step": 140 }, { "epoch": 0.6857142857142857, "grad_norm": 13.946508407592773, "learning_rate": 4.7910295616717635e-05, "loss": 1.4708, "step": 150 }, { "epoch": 0.7314285714285714, "grad_norm": 10.632002830505371, "learning_rate": 4.740061162079511e-05, "loss": 1.5119, "step": 160 }, { "epoch": 0.7771428571428571, "grad_norm": 11.511331558227539, "learning_rate": 4.6890927624872586e-05, "loss": 1.4476, "step": 170 }, { "epoch": 0.8228571428571428, "grad_norm": 9.265079498291016, "learning_rate": 4.638124362895006e-05, "loss": 1.4864, "step": 180 }, { "epoch": 0.8685714285714285, "grad_norm": 12.077649116516113, "learning_rate": 4.587155963302753e-05, "loss": 1.4542, "step": 190 }, { "epoch": 0.9142857142857143, "grad_norm": 8.974185943603516, "learning_rate": 4.5361875637104995e-05, "loss": 1.4505, "step": 200 }, { "epoch": 0.96, "grad_norm": 7.196322441101074, "learning_rate": 4.4852191641182466e-05, "loss": 1.4736, "step": 210 }, { "epoch": 0.9965714285714286, "eval_accuracy": 0.6177167810355584, "eval_loss": 1.2365009784698486, "eval_runtime": 74.6739, "eval_samples_per_second": 107.333, "eval_steps_per_second": 3.361, "step": 218 }, { "epoch": 1.0057142857142858, "grad_norm": 8.575163841247559, "learning_rate": 4.434250764525994e-05, "loss": 1.2932, "step": 220 }, { "epoch": 1.0514285714285714, "grad_norm": 9.491937637329102, "learning_rate": 4.383282364933741e-05, "loss": 1.3322, "step": 230 }, { "epoch": 1.0971428571428572, "grad_norm": 7.442806720733643, "learning_rate": 4.332313965341488e-05, "loss": 1.3634, "step": 240 }, { "epoch": 1.1428571428571428, "grad_norm": 11.594585418701172, "learning_rate": 4.281345565749236e-05, "loss": 1.3838, "step": 250 }, { "epoch": 1.1885714285714286, "grad_norm": 9.72411823272705, "learning_rate": 4.230377166156983e-05, "loss": 1.3695, "step": 260 }, { "epoch": 1.2342857142857142, "grad_norm": 8.765079498291016, "learning_rate": 4.1794087665647304e-05, "loss": 1.241, "step": 270 }, { "epoch": 1.28, "grad_norm": 7.949846267700195, "learning_rate": 4.1284403669724776e-05, "loss": 1.3099, "step": 280 }, { "epoch": 1.3257142857142856, "grad_norm": 7.360624313354492, "learning_rate": 4.077471967380224e-05, "loss": 1.3464, "step": 290 }, { "epoch": 1.3714285714285714, "grad_norm": 9.947721481323242, "learning_rate": 4.026503567787971e-05, "loss": 1.269, "step": 300 }, { "epoch": 1.4171428571428573, "grad_norm": 7.578132629394531, "learning_rate": 3.9755351681957185e-05, "loss": 1.2697, "step": 310 }, { "epoch": 1.4628571428571429, "grad_norm": 7.614137172698975, "learning_rate": 3.9245667686034656e-05, "loss": 1.2017, "step": 320 }, { "epoch": 1.5085714285714285, "grad_norm": 6.3952107429504395, "learning_rate": 3.8735983690112135e-05, "loss": 1.3131, "step": 330 }, { "epoch": 1.5542857142857143, "grad_norm": 11.049560546875, "learning_rate": 3.822629969418961e-05, "loss": 1.2547, "step": 340 }, { "epoch": 1.6, "grad_norm": 7.0264129638671875, "learning_rate": 3.771661569826708e-05, "loss": 1.2384, "step": 350 }, { "epoch": 1.6457142857142857, "grad_norm": 8.316610336303711, "learning_rate": 3.720693170234455e-05, "loss": 1.2679, "step": 360 }, { "epoch": 1.6914285714285713, "grad_norm": 7.154911041259766, "learning_rate": 3.669724770642202e-05, "loss": 1.3185, "step": 370 }, { "epoch": 1.737142857142857, "grad_norm": 7.319551944732666, "learning_rate": 3.6187563710499494e-05, "loss": 1.1742, "step": 380 }, { "epoch": 1.782857142857143, "grad_norm": 7.423411846160889, "learning_rate": 3.567787971457696e-05, "loss": 1.1981, "step": 390 }, { "epoch": 1.8285714285714287, "grad_norm": 9.163586616516113, "learning_rate": 3.516819571865443e-05, "loss": 1.2371, "step": 400 }, { "epoch": 1.8742857142857143, "grad_norm": 7.521251678466797, "learning_rate": 3.465851172273191e-05, "loss": 1.2717, "step": 410 }, { "epoch": 1.92, "grad_norm": 10.229044914245605, "learning_rate": 3.414882772680938e-05, "loss": 1.206, "step": 420 }, { "epoch": 1.9657142857142857, "grad_norm": 7.633674621582031, "learning_rate": 3.363914373088685e-05, "loss": 1.2161, "step": 430 }, { "epoch": 1.9977142857142858, "eval_accuracy": 0.6915782907049283, "eval_loss": 1.0157994031906128, "eval_runtime": 74.9081, "eval_samples_per_second": 106.998, "eval_steps_per_second": 3.351, "step": 437 }, { "epoch": 2.0114285714285716, "grad_norm": 8.755953788757324, "learning_rate": 3.3129459734964325e-05, "loss": 1.1613, "step": 440 }, { "epoch": 2.057142857142857, "grad_norm": 7.305307865142822, "learning_rate": 3.26197757390418e-05, "loss": 1.0986, "step": 450 }, { "epoch": 2.1028571428571428, "grad_norm": 8.31972885131836, "learning_rate": 3.211009174311927e-05, "loss": 1.1722, "step": 460 }, { "epoch": 2.1485714285714286, "grad_norm": 8.749483108520508, "learning_rate": 3.160040774719674e-05, "loss": 1.1203, "step": 470 }, { "epoch": 2.1942857142857144, "grad_norm": 6.05934476852417, "learning_rate": 3.1090723751274206e-05, "loss": 1.1225, "step": 480 }, { "epoch": 2.24, "grad_norm": 10.040249824523926, "learning_rate": 3.0581039755351684e-05, "loss": 1.2017, "step": 490 }, { "epoch": 2.2857142857142856, "grad_norm": 8.599287986755371, "learning_rate": 3.0071355759429153e-05, "loss": 1.1669, "step": 500 }, { "epoch": 2.3314285714285714, "grad_norm": 7.941746711730957, "learning_rate": 2.9561671763506628e-05, "loss": 1.1189, "step": 510 }, { "epoch": 2.3771428571428572, "grad_norm": 13.228888511657715, "learning_rate": 2.90519877675841e-05, "loss": 1.1882, "step": 520 }, { "epoch": 2.422857142857143, "grad_norm": 7.503291130065918, "learning_rate": 2.854230377166157e-05, "loss": 1.185, "step": 530 }, { "epoch": 2.4685714285714284, "grad_norm": 7.043280124664307, "learning_rate": 2.8032619775739043e-05, "loss": 1.1577, "step": 540 }, { "epoch": 2.5142857142857142, "grad_norm": 8.937540054321289, "learning_rate": 2.7522935779816515e-05, "loss": 1.1539, "step": 550 }, { "epoch": 2.56, "grad_norm": 8.076375961303711, "learning_rate": 2.701325178389399e-05, "loss": 1.1124, "step": 560 }, { "epoch": 2.605714285714286, "grad_norm": 7.4971442222595215, "learning_rate": 2.6503567787971462e-05, "loss": 1.0799, "step": 570 }, { "epoch": 2.6514285714285712, "grad_norm": 9.443109512329102, "learning_rate": 2.5993883792048927e-05, "loss": 1.0234, "step": 580 }, { "epoch": 2.697142857142857, "grad_norm": 6.811278820037842, "learning_rate": 2.5484199796126402e-05, "loss": 1.0423, "step": 590 }, { "epoch": 2.742857142857143, "grad_norm": 8.011152267456055, "learning_rate": 2.4974515800203874e-05, "loss": 1.1336, "step": 600 }, { "epoch": 2.7885714285714287, "grad_norm": 6.383072853088379, "learning_rate": 2.4464831804281346e-05, "loss": 1.1203, "step": 610 }, { "epoch": 2.8342857142857145, "grad_norm": 9.429741859436035, "learning_rate": 2.3955147808358818e-05, "loss": 1.091, "step": 620 }, { "epoch": 2.88, "grad_norm": 6.606307506561279, "learning_rate": 2.3445463812436293e-05, "loss": 1.058, "step": 630 }, { "epoch": 2.9257142857142857, "grad_norm": 8.834084510803223, "learning_rate": 2.2935779816513765e-05, "loss": 1.1266, "step": 640 }, { "epoch": 2.9714285714285715, "grad_norm": 7.674890518188477, "learning_rate": 2.2426095820591233e-05, "loss": 1.0807, "step": 650 }, { "epoch": 2.998857142857143, "eval_accuracy": 0.727386150966937, "eval_loss": 0.9072983264923096, "eval_runtime": 74.6334, "eval_samples_per_second": 107.392, "eval_steps_per_second": 3.363, "step": 656 }, { "epoch": 3.0171428571428573, "grad_norm": 7.3469624519348145, "learning_rate": 2.1916411824668705e-05, "loss": 1.0863, "step": 660 }, { "epoch": 3.0628571428571427, "grad_norm": 7.052463531494141, "learning_rate": 2.140672782874618e-05, "loss": 1.0188, "step": 670 }, { "epoch": 3.1085714285714285, "grad_norm": 7.651565074920654, "learning_rate": 2.0897043832823652e-05, "loss": 0.9874, "step": 680 }, { "epoch": 3.1542857142857144, "grad_norm": 11.283343315124512, "learning_rate": 2.038735983690112e-05, "loss": 1.0876, "step": 690 }, { "epoch": 3.2, "grad_norm": 7.787779331207275, "learning_rate": 1.9877675840978592e-05, "loss": 1.0479, "step": 700 }, { "epoch": 3.2457142857142856, "grad_norm": 8.140477180480957, "learning_rate": 1.9367991845056068e-05, "loss": 1.009, "step": 710 }, { "epoch": 3.2914285714285714, "grad_norm": 8.705915451049805, "learning_rate": 1.885830784913354e-05, "loss": 1.056, "step": 720 }, { "epoch": 3.337142857142857, "grad_norm": 7.134729385375977, "learning_rate": 1.834862385321101e-05, "loss": 1.0378, "step": 730 }, { "epoch": 3.382857142857143, "grad_norm": 7.588448524475098, "learning_rate": 1.783893985728848e-05, "loss": 1.0508, "step": 740 }, { "epoch": 3.4285714285714284, "grad_norm": 7.663401126861572, "learning_rate": 1.7329255861365955e-05, "loss": 1.0207, "step": 750 }, { "epoch": 3.474285714285714, "grad_norm": 6.939538955688477, "learning_rate": 1.6819571865443427e-05, "loss": 1.1108, "step": 760 }, { "epoch": 3.52, "grad_norm": 7.630512714385986, "learning_rate": 1.63098878695209e-05, "loss": 0.9975, "step": 770 }, { "epoch": 3.565714285714286, "grad_norm": 9.263322830200195, "learning_rate": 1.580020387359837e-05, "loss": 1.0372, "step": 780 }, { "epoch": 3.611428571428571, "grad_norm": 8.593291282653809, "learning_rate": 1.5290519877675842e-05, "loss": 1.0863, "step": 790 }, { "epoch": 3.657142857142857, "grad_norm": 7.475892543792725, "learning_rate": 1.4780835881753314e-05, "loss": 1.0021, "step": 800 }, { "epoch": 3.702857142857143, "grad_norm": 9.492719650268555, "learning_rate": 1.4271151885830786e-05, "loss": 1.0572, "step": 810 }, { "epoch": 3.7485714285714287, "grad_norm": 11.467538833618164, "learning_rate": 1.3761467889908258e-05, "loss": 1.0216, "step": 820 }, { "epoch": 3.7942857142857145, "grad_norm": 7.794005870819092, "learning_rate": 1.3251783893985731e-05, "loss": 1.0205, "step": 830 }, { "epoch": 3.84, "grad_norm": 6.822214603424072, "learning_rate": 1.2742099898063201e-05, "loss": 1.0281, "step": 840 }, { "epoch": 3.8857142857142857, "grad_norm": 6.747819423675537, "learning_rate": 1.2232415902140673e-05, "loss": 1.0237, "step": 850 }, { "epoch": 3.9314285714285715, "grad_norm": 7.018404006958008, "learning_rate": 1.1722731906218146e-05, "loss": 1.0446, "step": 860 }, { "epoch": 3.977142857142857, "grad_norm": 7.553677082061768, "learning_rate": 1.1213047910295617e-05, "loss": 0.9977, "step": 870 }, { "epoch": 4.0, "eval_accuracy": 0.7456019962570181, "eval_loss": 0.8551267385482788, "eval_runtime": 74.4356, "eval_samples_per_second": 107.677, "eval_steps_per_second": 3.372, "step": 875 }, { "epoch": 4.022857142857143, "grad_norm": 8.310098648071289, "learning_rate": 1.070336391437309e-05, "loss": 1.0626, "step": 880 }, { "epoch": 4.0685714285714285, "grad_norm": 7.971031665802002, "learning_rate": 1.019367991845056e-05, "loss": 1.0294, "step": 890 }, { "epoch": 4.114285714285714, "grad_norm": 8.742574691772461, "learning_rate": 9.683995922528034e-06, "loss": 1.0401, "step": 900 }, { "epoch": 4.16, "grad_norm": 7.5797553062438965, "learning_rate": 9.174311926605506e-06, "loss": 1.0194, "step": 910 }, { "epoch": 4.2057142857142855, "grad_norm": 10.30631160736084, "learning_rate": 8.664627930682977e-06, "loss": 1.0334, "step": 920 }, { "epoch": 4.251428571428572, "grad_norm": 8.128324508666992, "learning_rate": 8.15494393476045e-06, "loss": 1.0027, "step": 930 }, { "epoch": 4.297142857142857, "grad_norm": 7.413013935089111, "learning_rate": 7.645259938837921e-06, "loss": 0.9981, "step": 940 }, { "epoch": 4.3428571428571425, "grad_norm": 7.2544426918029785, "learning_rate": 7.135575942915393e-06, "loss": 0.9954, "step": 950 }, { "epoch": 4.388571428571429, "grad_norm": 6.764856815338135, "learning_rate": 6.6258919469928655e-06, "loss": 0.9712, "step": 960 }, { "epoch": 4.434285714285714, "grad_norm": 7.096611022949219, "learning_rate": 6.1162079510703365e-06, "loss": 0.9646, "step": 970 }, { "epoch": 4.48, "grad_norm": 7.685801982879639, "learning_rate": 5.606523955147808e-06, "loss": 1.0362, "step": 980 }, { "epoch": 4.525714285714286, "grad_norm": 7.416136741638184, "learning_rate": 5.09683995922528e-06, "loss": 1.0596, "step": 990 }, { "epoch": 4.571428571428571, "grad_norm": 7.128796100616455, "learning_rate": 4.587155963302753e-06, "loss": 0.9479, "step": 1000 }, { "epoch": 4.617142857142857, "grad_norm": 6.152943134307861, "learning_rate": 4.077471967380225e-06, "loss": 0.9503, "step": 1010 }, { "epoch": 4.662857142857143, "grad_norm": 8.12887191772461, "learning_rate": 3.5677879714576964e-06, "loss": 1.0071, "step": 1020 }, { "epoch": 4.708571428571428, "grad_norm": 5.702174186706543, "learning_rate": 3.0581039755351682e-06, "loss": 1.0149, "step": 1030 }, { "epoch": 4.7542857142857144, "grad_norm": 6.921694278717041, "learning_rate": 2.54841997961264e-06, "loss": 0.981, "step": 1040 }, { "epoch": 4.8, "grad_norm": 9.122147560119629, "learning_rate": 2.0387359836901123e-06, "loss": 0.9939, "step": 1050 }, { "epoch": 4.845714285714286, "grad_norm": 8.022086143493652, "learning_rate": 1.5290519877675841e-06, "loss": 0.8968, "step": 1060 }, { "epoch": 4.8914285714285715, "grad_norm": 7.508928298950195, "learning_rate": 1.0193679918450562e-06, "loss": 0.9698, "step": 1070 }, { "epoch": 4.937142857142857, "grad_norm": 9.233248710632324, "learning_rate": 5.096839959225281e-07, "loss": 0.9584, "step": 1080 }, { "epoch": 4.982857142857143, "grad_norm": 7.491491794586182, "learning_rate": 0.0, "loss": 0.9737, "step": 1090 }, { "epoch": 4.982857142857143, "eval_accuracy": 0.7499688084840923, "eval_loss": 0.8480741381645203, "eval_runtime": 71.0957, "eval_samples_per_second": 112.735, "eval_steps_per_second": 3.53, "step": 1090 }, { "epoch": 4.982857142857143, "step": 1090, "total_flos": 3.468170115610067e+18, "train_loss": 1.2575330436776537, "train_runtime": 2631.6311, "train_samples_per_second": 53.178, "train_steps_per_second": 0.414 } ], "logging_steps": 10, "max_steps": 1090, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 3.468170115610067e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }