{ "best_metric": 0.51348477602005, "best_model_checkpoint": "./training_output/swin-base-patch4-window7-224_11092024\\checkpoint-8000", "epoch": 20.0, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.25, "grad_norm": 4.37050724029541, "learning_rate": 0.00029624999999999996, "loss": 1.6781, "step": 100 }, { "epoch": 0.5, "grad_norm": 3.8442654609680176, "learning_rate": 0.00029249999999999995, "loss": 1.2684, "step": 200 }, { "epoch": 0.75, "grad_norm": 3.500859498977661, "learning_rate": 0.00028875, "loss": 1.0998, "step": 300 }, { "epoch": 1.0, "grad_norm": 5.394394874572754, "learning_rate": 0.000285, "loss": 1.0366, "step": 400 }, { "epoch": 1.0, "eval_accuracy": 0.72, "eval_loss": 0.9471067190170288, "eval_runtime": 191.6064, "eval_samples_per_second": 4.175, "eval_steps_per_second": 0.522, "step": 400 }, { "epoch": 1.25, "grad_norm": 5.127144813537598, "learning_rate": 0.00028125, "loss": 0.9272, "step": 500 }, { "epoch": 1.5, "grad_norm": 3.240920305252075, "learning_rate": 0.00027749999999999997, "loss": 0.8831, "step": 600 }, { "epoch": 1.75, "grad_norm": 2.9190714359283447, "learning_rate": 0.00027374999999999996, "loss": 0.8501, "step": 700 }, { "epoch": 2.0, "grad_norm": 4.049932956695557, "learning_rate": 0.00027, "loss": 0.8257, "step": 800 }, { "epoch": 2.0, "eval_accuracy": 0.75375, "eval_loss": 0.7888995409011841, "eval_runtime": 190.7471, "eval_samples_per_second": 4.194, "eval_steps_per_second": 0.524, "step": 800 }, { "epoch": 2.25, "grad_norm": 2.773448944091797, "learning_rate": 0.00026624999999999994, "loss": 0.804, "step": 900 }, { "epoch": 2.5, "grad_norm": 4.934525489807129, "learning_rate": 0.0002625, "loss": 0.7622, "step": 1000 }, { "epoch": 2.75, "grad_norm": 2.755113363265991, "learning_rate": 0.00025875, "loss": 0.7747, "step": 1100 }, { "epoch": 3.0, "grad_norm": 2.5829150676727295, "learning_rate": 0.00025499999999999996, "loss": 0.7119, "step": 1200 }, { "epoch": 3.0, "eval_accuracy": 0.7775, "eval_loss": 0.723233699798584, "eval_runtime": 192.2538, "eval_samples_per_second": 4.161, "eval_steps_per_second": 0.52, "step": 1200 }, { "epoch": 3.25, "grad_norm": 3.0522494316101074, "learning_rate": 0.00025125, "loss": 0.7059, "step": 1300 }, { "epoch": 3.5, "grad_norm": 2.336416482925415, "learning_rate": 0.00024749999999999994, "loss": 0.6803, "step": 1400 }, { "epoch": 3.75, "grad_norm": 2.500382900238037, "learning_rate": 0.00024375, "loss": 0.6944, "step": 1500 }, { "epoch": 4.0, "grad_norm": 2.7047812938690186, "learning_rate": 0.00023999999999999998, "loss": 0.6969, "step": 1600 }, { "epoch": 4.0, "eval_accuracy": 0.78375, "eval_loss": 0.673926830291748, "eval_runtime": 188.8251, "eval_samples_per_second": 4.237, "eval_steps_per_second": 0.53, "step": 1600 }, { "epoch": 4.25, "grad_norm": 1.99785315990448, "learning_rate": 0.00023624999999999997, "loss": 0.6529, "step": 1700 }, { "epoch": 4.5, "grad_norm": 2.6651065349578857, "learning_rate": 0.00023249999999999999, "loss": 0.6514, "step": 1800 }, { "epoch": 4.75, "grad_norm": 2.7539191246032715, "learning_rate": 0.00022874999999999998, "loss": 0.6553, "step": 1900 }, { "epoch": 5.0, "grad_norm": 2.1046321392059326, "learning_rate": 0.000225, "loss": 0.6487, "step": 2000 }, { "epoch": 5.0, "eval_accuracy": 0.78625, "eval_loss": 0.6371492743492126, "eval_runtime": 187.904, "eval_samples_per_second": 4.257, "eval_steps_per_second": 0.532, "step": 2000 }, { "epoch": 5.25, "grad_norm": 3.1880531311035156, "learning_rate": 0.00022124999999999998, "loss": 0.6217, "step": 2100 }, { "epoch": 5.5, "grad_norm": 2.7244873046875, "learning_rate": 0.00021749999999999997, "loss": 0.6168, "step": 2200 }, { "epoch": 5.75, "grad_norm": 3.3323047161102295, "learning_rate": 0.00021375, "loss": 0.6135, "step": 2300 }, { "epoch": 6.0, "grad_norm": 2.5545437335968018, "learning_rate": 0.00020999999999999998, "loss": 0.5956, "step": 2400 }, { "epoch": 6.0, "eval_accuracy": 0.78875, "eval_loss": 0.6197744607925415, "eval_runtime": 190.0271, "eval_samples_per_second": 4.21, "eval_steps_per_second": 0.526, "step": 2400 }, { "epoch": 6.25, "grad_norm": 2.105663299560547, "learning_rate": 0.00020624999999999997, "loss": 0.5865, "step": 2500 }, { "epoch": 6.5, "grad_norm": 3.24285626411438, "learning_rate": 0.0002025, "loss": 0.5933, "step": 2600 }, { "epoch": 6.75, "grad_norm": 2.4483067989349365, "learning_rate": 0.00019874999999999998, "loss": 0.6034, "step": 2700 }, { "epoch": 7.0, "grad_norm": 3.4036154747009277, "learning_rate": 0.000195, "loss": 0.5604, "step": 2800 }, { "epoch": 7.0, "eval_accuracy": 0.8025, "eval_loss": 0.5941271781921387, "eval_runtime": 188.5123, "eval_samples_per_second": 4.244, "eval_steps_per_second": 0.53, "step": 2800 }, { "epoch": 7.25, "grad_norm": 2.846735954284668, "learning_rate": 0.00019124999999999996, "loss": 0.5863, "step": 2900 }, { "epoch": 7.5, "grad_norm": 2.242002248764038, "learning_rate": 0.00018749999999999998, "loss": 0.5762, "step": 3000 }, { "epoch": 7.75, "grad_norm": 3.0385332107543945, "learning_rate": 0.00018375, "loss": 0.5552, "step": 3100 }, { "epoch": 8.0, "grad_norm": 4.019121170043945, "learning_rate": 0.00017999999999999998, "loss": 0.5732, "step": 3200 }, { "epoch": 8.0, "eval_accuracy": 0.795, "eval_loss": 0.5866623520851135, "eval_runtime": 189.5806, "eval_samples_per_second": 4.22, "eval_steps_per_second": 0.527, "step": 3200 }, { "epoch": 8.25, "grad_norm": 2.7213943004608154, "learning_rate": 0.00017625, "loss": 0.5913, "step": 3300 }, { "epoch": 8.5, "grad_norm": 2.490701675415039, "learning_rate": 0.00017249999999999996, "loss": 0.5469, "step": 3400 }, { "epoch": 8.75, "grad_norm": 2.9301528930664062, "learning_rate": 0.00016874999999999998, "loss": 0.5188, "step": 3500 }, { "epoch": 9.0, "grad_norm": 3.5669283866882324, "learning_rate": 0.000165, "loss": 0.5578, "step": 3600 }, { "epoch": 9.0, "eval_accuracy": 0.8025, "eval_loss": 0.570527195930481, "eval_runtime": 187.5953, "eval_samples_per_second": 4.265, "eval_steps_per_second": 0.533, "step": 3600 }, { "epoch": 9.25, "grad_norm": 2.9957895278930664, "learning_rate": 0.00016125, "loss": 0.532, "step": 3700 }, { "epoch": 9.5, "grad_norm": 3.6296515464782715, "learning_rate": 0.00015749999999999998, "loss": 0.5359, "step": 3800 }, { "epoch": 9.75, "grad_norm": 2.170017957687378, "learning_rate": 0.00015374999999999997, "loss": 0.5334, "step": 3900 }, { "epoch": 10.0, "grad_norm": 2.3260116577148438, "learning_rate": 0.00015, "loss": 0.5449, "step": 4000 }, { "epoch": 10.0, "eval_accuracy": 0.81125, "eval_loss": 0.5574603080749512, "eval_runtime": 186.6415, "eval_samples_per_second": 4.286, "eval_steps_per_second": 0.536, "step": 4000 }, { "epoch": 10.25, "grad_norm": 3.2211897373199463, "learning_rate": 0.00014624999999999998, "loss": 0.5333, "step": 4100 }, { "epoch": 10.5, "grad_norm": 1.6050548553466797, "learning_rate": 0.0001425, "loss": 0.5206, "step": 4200 }, { "epoch": 10.75, "grad_norm": 3.3255255222320557, "learning_rate": 0.00013874999999999998, "loss": 0.4932, "step": 4300 }, { "epoch": 11.0, "grad_norm": 2.705442190170288, "learning_rate": 0.000135, "loss": 0.5419, "step": 4400 }, { "epoch": 11.0, "eval_accuracy": 0.82125, "eval_loss": 0.5505005121231079, "eval_runtime": 186.7804, "eval_samples_per_second": 4.283, "eval_steps_per_second": 0.535, "step": 4400 }, { "epoch": 11.25, "grad_norm": 2.7391810417175293, "learning_rate": 0.00013125, "loss": 0.4945, "step": 4500 }, { "epoch": 11.5, "grad_norm": 2.746691942214966, "learning_rate": 0.00012749999999999998, "loss": 0.5196, "step": 4600 }, { "epoch": 11.75, "grad_norm": 3.134870767593384, "learning_rate": 0.00012374999999999997, "loss": 0.548, "step": 4700 }, { "epoch": 12.0, "grad_norm": 3.3967835903167725, "learning_rate": 0.00011999999999999999, "loss": 0.5086, "step": 4800 }, { "epoch": 12.0, "eval_accuracy": 0.82125, "eval_loss": 0.5385071039199829, "eval_runtime": 187.7731, "eval_samples_per_second": 4.26, "eval_steps_per_second": 0.533, "step": 4800 }, { "epoch": 12.25, "grad_norm": 2.0934133529663086, "learning_rate": 0.00011624999999999999, "loss": 0.5059, "step": 4900 }, { "epoch": 12.5, "grad_norm": 3.039590835571289, "learning_rate": 0.0001125, "loss": 0.5043, "step": 5000 }, { "epoch": 12.75, "grad_norm": 2.2998437881469727, "learning_rate": 0.00010874999999999999, "loss": 0.5084, "step": 5100 }, { "epoch": 13.0, "grad_norm": 1.5604509115219116, "learning_rate": 0.00010499999999999999, "loss": 0.4929, "step": 5200 }, { "epoch": 13.0, "eval_accuracy": 0.82125, "eval_loss": 0.5340179204940796, "eval_runtime": 189.2687, "eval_samples_per_second": 4.227, "eval_steps_per_second": 0.528, "step": 5200 }, { "epoch": 13.25, "grad_norm": 4.638809680938721, "learning_rate": 0.00010125, "loss": 0.5198, "step": 5300 }, { "epoch": 13.5, "grad_norm": 2.361114263534546, "learning_rate": 9.75e-05, "loss": 0.5016, "step": 5400 }, { "epoch": 13.75, "grad_norm": 1.9207624197006226, "learning_rate": 9.374999999999999e-05, "loss": 0.5064, "step": 5500 }, { "epoch": 14.0, "grad_norm": 2.6521153450012207, "learning_rate": 8.999999999999999e-05, "loss": 0.4701, "step": 5600 }, { "epoch": 14.0, "eval_accuracy": 0.81875, "eval_loss": 0.5296584963798523, "eval_runtime": 188.8651, "eval_samples_per_second": 4.236, "eval_steps_per_second": 0.529, "step": 5600 }, { "epoch": 14.25, "grad_norm": 1.9743889570236206, "learning_rate": 8.624999999999998e-05, "loss": 0.5078, "step": 5700 }, { "epoch": 14.5, "grad_norm": 2.232062578201294, "learning_rate": 8.25e-05, "loss": 0.4723, "step": 5800 }, { "epoch": 14.75, "grad_norm": 3.261901617050171, "learning_rate": 7.874999999999999e-05, "loss": 0.5011, "step": 5900 }, { "epoch": 15.0, "grad_norm": 2.4302258491516113, "learning_rate": 7.5e-05, "loss": 0.4803, "step": 6000 }, { "epoch": 15.0, "eval_accuracy": 0.8225, "eval_loss": 0.5240346789360046, "eval_runtime": 189.3391, "eval_samples_per_second": 4.225, "eval_steps_per_second": 0.528, "step": 6000 }, { "epoch": 15.25, "grad_norm": 2.341607093811035, "learning_rate": 7.125e-05, "loss": 0.4791, "step": 6100 }, { "epoch": 15.5, "grad_norm": 2.3899717330932617, "learning_rate": 6.75e-05, "loss": 0.4872, "step": 6200 }, { "epoch": 15.75, "grad_norm": 2.253009557723999, "learning_rate": 6.374999999999999e-05, "loss": 0.5044, "step": 6300 }, { "epoch": 16.0, "grad_norm": 3.2920048236846924, "learning_rate": 5.9999999999999995e-05, "loss": 0.4988, "step": 6400 }, { "epoch": 16.0, "eval_accuracy": 0.83, "eval_loss": 0.5196738243103027, "eval_runtime": 189.4702, "eval_samples_per_second": 4.222, "eval_steps_per_second": 0.528, "step": 6400 }, { "epoch": 16.25, "grad_norm": 4.486973762512207, "learning_rate": 5.625e-05, "loss": 0.4831, "step": 6500 }, { "epoch": 16.5, "grad_norm": 1.689079999923706, "learning_rate": 5.2499999999999995e-05, "loss": 0.4961, "step": 6600 }, { "epoch": 16.75, "grad_norm": 2.2517282962799072, "learning_rate": 4.875e-05, "loss": 0.489, "step": 6700 }, { "epoch": 17.0, "grad_norm": 2.318455219268799, "learning_rate": 4.4999999999999996e-05, "loss": 0.4842, "step": 6800 }, { "epoch": 17.0, "eval_accuracy": 0.83125, "eval_loss": 0.5164589881896973, "eval_runtime": 188.3077, "eval_samples_per_second": 4.248, "eval_steps_per_second": 0.531, "step": 6800 }, { "epoch": 17.25, "grad_norm": 3.113399028778076, "learning_rate": 4.125e-05, "loss": 0.4676, "step": 6900 }, { "epoch": 17.5, "grad_norm": 2.0549120903015137, "learning_rate": 3.75e-05, "loss": 0.4679, "step": 7000 }, { "epoch": 17.75, "grad_norm": 2.0566205978393555, "learning_rate": 3.375e-05, "loss": 0.4988, "step": 7100 }, { "epoch": 18.0, "grad_norm": 2.392482280731201, "learning_rate": 2.9999999999999997e-05, "loss": 0.4917, "step": 7200 }, { "epoch": 18.0, "eval_accuracy": 0.83125, "eval_loss": 0.5148141384124756, "eval_runtime": 188.4138, "eval_samples_per_second": 4.246, "eval_steps_per_second": 0.531, "step": 7200 }, { "epoch": 18.25, "grad_norm": 1.7607569694519043, "learning_rate": 2.6249999999999998e-05, "loss": 0.475, "step": 7300 }, { "epoch": 18.5, "grad_norm": 2.870453357696533, "learning_rate": 2.2499999999999998e-05, "loss": 0.469, "step": 7400 }, { "epoch": 18.75, "grad_norm": 2.928583860397339, "learning_rate": 1.875e-05, "loss": 0.4691, "step": 7500 }, { "epoch": 19.0, "grad_norm": 3.0488061904907227, "learning_rate": 1.4999999999999999e-05, "loss": 0.4734, "step": 7600 }, { "epoch": 19.0, "eval_accuracy": 0.8325, "eval_loss": 0.5140394568443298, "eval_runtime": 187.2755, "eval_samples_per_second": 4.272, "eval_steps_per_second": 0.534, "step": 7600 }, { "epoch": 19.25, "grad_norm": 3.021597146987915, "learning_rate": 1.1249999999999999e-05, "loss": 0.5122, "step": 7700 }, { "epoch": 19.5, "grad_norm": 1.8236939907073975, "learning_rate": 7.499999999999999e-06, "loss": 0.4582, "step": 7800 }, { "epoch": 19.75, "grad_norm": 1.9407800436019897, "learning_rate": 3.7499999999999997e-06, "loss": 0.4578, "step": 7900 }, { "epoch": 20.0, "grad_norm": 2.3664169311523438, "learning_rate": 0.0, "loss": 0.4714, "step": 8000 }, { "epoch": 20.0, "eval_accuracy": 0.83375, "eval_loss": 0.51348477602005, "eval_runtime": 192.831, "eval_samples_per_second": 4.149, "eval_steps_per_second": 0.519, "step": 8000 }, { "epoch": 20.0, "step": 8000, "total_flos": 1.0028942094237696e+19, "train_loss": 0.6026930379867553, "train_runtime": 35487.7868, "train_samples_per_second": 3.607, "train_steps_per_second": 0.225 } ], "logging_steps": 100, "max_steps": 8000, "num_input_tokens_seen": 0, "num_train_epochs": 20, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0028942094237696e+19, "train_batch_size": 16, "trial_name": null, "trial_params": null }