{ "best_metric": 0.22444555163383484, "best_model_checkpoint": "vit-base-tarsh/checkpoint-500", "epoch": 3.937007874015748, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 0.00019606299212598428, "loss": 1.623, "step": 10 }, { "epoch": 0.16, "learning_rate": 0.0001921259842519685, "loss": 1.2641, "step": 20 }, { "epoch": 0.24, "learning_rate": 0.00018818897637795277, "loss": 0.9268, "step": 30 }, { "epoch": 0.31, "learning_rate": 0.000184251968503937, "loss": 0.7366, "step": 40 }, { "epoch": 0.39, "learning_rate": 0.00018031496062992125, "loss": 0.7007, "step": 50 }, { "epoch": 0.47, "learning_rate": 0.00017637795275590552, "loss": 0.557, "step": 60 }, { "epoch": 0.55, "learning_rate": 0.00017244094488188977, "loss": 0.6111, "step": 70 }, { "epoch": 0.63, "learning_rate": 0.000168503937007874, "loss": 0.6436, "step": 80 }, { "epoch": 0.71, "learning_rate": 0.00016456692913385828, "loss": 0.4778, "step": 90 }, { "epoch": 0.79, "learning_rate": 0.00016062992125984252, "loss": 0.4692, "step": 100 }, { "epoch": 0.79, "eval_accuracy": 0.9018567639257294, "eval_loss": 0.4172811806201935, "eval_runtime": 347.1671, "eval_samples_per_second": 1.086, "eval_steps_per_second": 0.138, "step": 100 }, { "epoch": 0.87, "learning_rate": 0.0001566929133858268, "loss": 0.2606, "step": 110 }, { "epoch": 0.94, "learning_rate": 0.00015275590551181104, "loss": 0.2834, "step": 120 }, { "epoch": 1.02, "learning_rate": 0.00014881889763779528, "loss": 0.2804, "step": 130 }, { "epoch": 1.1, "learning_rate": 0.00014488188976377955, "loss": 0.2504, "step": 140 }, { "epoch": 1.18, "learning_rate": 0.00014094488188976377, "loss": 0.184, "step": 150 }, { "epoch": 1.26, "learning_rate": 0.00013700787401574804, "loss": 0.213, "step": 160 }, { "epoch": 1.34, "learning_rate": 0.0001330708661417323, "loss": 0.2614, "step": 170 }, { "epoch": 1.42, "learning_rate": 0.00012913385826771653, "loss": 0.2567, "step": 180 }, { "epoch": 1.5, "learning_rate": 0.0001251968503937008, "loss": 0.1094, "step": 190 }, { "epoch": 1.57, "learning_rate": 0.00012125984251968505, "loss": 0.2064, "step": 200 }, { "epoch": 1.57, "eval_accuracy": 0.9257294429708223, "eval_loss": 0.29030969738960266, "eval_runtime": 349.8071, "eval_samples_per_second": 1.078, "eval_steps_per_second": 0.137, "step": 200 }, { "epoch": 1.65, "learning_rate": 0.00011732283464566928, "loss": 0.1707, "step": 210 }, { "epoch": 1.73, "learning_rate": 0.00011338582677165355, "loss": 0.2581, "step": 220 }, { "epoch": 1.81, "learning_rate": 0.00010944881889763781, "loss": 0.0885, "step": 230 }, { "epoch": 1.89, "learning_rate": 0.00010551181102362204, "loss": 0.1996, "step": 240 }, { "epoch": 1.97, "learning_rate": 0.0001015748031496063, "loss": 0.1614, "step": 250 }, { "epoch": 2.05, "learning_rate": 9.763779527559055e-05, "loss": 0.1308, "step": 260 }, { "epoch": 2.13, "learning_rate": 9.370078740157481e-05, "loss": 0.0973, "step": 270 }, { "epoch": 2.2, "learning_rate": 8.976377952755905e-05, "loss": 0.0764, "step": 280 }, { "epoch": 2.28, "learning_rate": 8.582677165354331e-05, "loss": 0.039, "step": 290 }, { "epoch": 2.36, "learning_rate": 8.188976377952757e-05, "loss": 0.0748, "step": 300 }, { "epoch": 2.36, "eval_accuracy": 0.9363395225464191, "eval_loss": 0.2725047767162323, "eval_runtime": 349.7967, "eval_samples_per_second": 1.078, "eval_steps_per_second": 0.137, "step": 300 }, { "epoch": 2.44, "learning_rate": 7.795275590551181e-05, "loss": 0.0508, "step": 310 }, { "epoch": 2.52, "learning_rate": 7.401574803149607e-05, "loss": 0.0781, "step": 320 }, { "epoch": 2.6, "learning_rate": 7.007874015748031e-05, "loss": 0.1238, "step": 330 }, { "epoch": 2.68, "learning_rate": 6.614173228346457e-05, "loss": 0.0506, "step": 340 }, { "epoch": 2.76, "learning_rate": 6.220472440944882e-05, "loss": 0.05, "step": 350 }, { "epoch": 2.83, "learning_rate": 5.826771653543307e-05, "loss": 0.0748, "step": 360 }, { "epoch": 2.91, "learning_rate": 5.433070866141733e-05, "loss": 0.0257, "step": 370 }, { "epoch": 2.99, "learning_rate": 5.0393700787401575e-05, "loss": 0.0861, "step": 380 }, { "epoch": 3.07, "learning_rate": 4.645669291338583e-05, "loss": 0.029, "step": 390 }, { "epoch": 3.15, "learning_rate": 4.251968503937008e-05, "loss": 0.0343, "step": 400 }, { "epoch": 3.15, "eval_accuracy": 0.9283819628647215, "eval_loss": 0.2931118607521057, "eval_runtime": 343.908, "eval_samples_per_second": 1.096, "eval_steps_per_second": 0.14, "step": 400 }, { "epoch": 3.23, "learning_rate": 3.858267716535433e-05, "loss": 0.0234, "step": 410 }, { "epoch": 3.31, "learning_rate": 3.464566929133858e-05, "loss": 0.0208, "step": 420 }, { "epoch": 3.39, "learning_rate": 3.070866141732284e-05, "loss": 0.0186, "step": 430 }, { "epoch": 3.46, "learning_rate": 2.677165354330709e-05, "loss": 0.0398, "step": 440 }, { "epoch": 3.54, "learning_rate": 2.283464566929134e-05, "loss": 0.0262, "step": 450 }, { "epoch": 3.62, "learning_rate": 1.889763779527559e-05, "loss": 0.0482, "step": 460 }, { "epoch": 3.7, "learning_rate": 1.4960629921259845e-05, "loss": 0.0191, "step": 470 }, { "epoch": 3.78, "learning_rate": 1.1023622047244095e-05, "loss": 0.02, "step": 480 }, { "epoch": 3.86, "learning_rate": 7.086614173228347e-06, "loss": 0.0501, "step": 490 }, { "epoch": 3.94, "learning_rate": 3.1496062992125985e-06, "loss": 0.0176, "step": 500 }, { "epoch": 3.94, "eval_accuracy": 0.9442970822281167, "eval_loss": 0.22444555163383484, "eval_runtime": 342.5831, "eval_samples_per_second": 1.1, "eval_steps_per_second": 0.14, "step": 500 } ], "max_steps": 508, "num_train_epochs": 4, "total_flos": 6.169413753348895e+17, "trial_name": null, "trial_params": null }