{ "best_metric": 0.9985872380503885, "best_model_checkpoint": "vit_base_aihub_model_py/checkpoint-745", "epoch": 4.983277591973244, "global_step": 745, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 6.666666666666667e-06, "loss": 1.3773, "step": 10 }, { "epoch": 0.13, "learning_rate": 1.3333333333333333e-05, "loss": 1.2997, "step": 20 }, { "epoch": 0.2, "learning_rate": 2e-05, "loss": 1.134, "step": 30 }, { "epoch": 0.27, "learning_rate": 2.6666666666666667e-05, "loss": 0.9478, "step": 40 }, { "epoch": 0.33, "learning_rate": 3.3333333333333335e-05, "loss": 0.7246, "step": 50 }, { "epoch": 0.4, "learning_rate": 4e-05, "loss": 0.5165, "step": 60 }, { "epoch": 0.47, "learning_rate": 4.666666666666667e-05, "loss": 0.3778, "step": 70 }, { "epoch": 0.54, "learning_rate": 4.9626865671641794e-05, "loss": 0.2721, "step": 80 }, { "epoch": 0.6, "learning_rate": 4.888059701492538e-05, "loss": 0.217, "step": 90 }, { "epoch": 0.67, "learning_rate": 4.813432835820896e-05, "loss": 0.186, "step": 100 }, { "epoch": 0.74, "learning_rate": 4.738805970149254e-05, "loss": 0.1654, "step": 110 }, { "epoch": 0.8, "learning_rate": 4.664179104477612e-05, "loss": 0.1533, "step": 120 }, { "epoch": 0.87, "learning_rate": 4.58955223880597e-05, "loss": 0.1389, "step": 130 }, { "epoch": 0.94, "learning_rate": 4.5149253731343286e-05, "loss": 0.1235, "step": 140 }, { "epoch": 1.0, "eval_accuracy": 0.9857546503414175, "eval_f1": 0.9829586496374041, "eval_loss": 0.09358736127614975, "eval_precision": 0.9845287058827537, "eval_recall": 0.9814155430620309, "eval_runtime": 111.262, "eval_samples_per_second": 76.342, "eval_steps_per_second": 0.602, "step": 149 }, { "epoch": 1.0, "learning_rate": 4.440298507462687e-05, "loss": 0.1106, "step": 150 }, { "epoch": 1.07, "learning_rate": 4.3656716417910446e-05, "loss": 0.1162, "step": 160 }, { "epoch": 1.14, "learning_rate": 4.2910447761194036e-05, "loss": 0.1059, "step": 170 }, { "epoch": 1.2, "learning_rate": 4.216417910447761e-05, "loss": 0.1115, "step": 180 }, { "epoch": 1.27, "learning_rate": 4.1417910447761195e-05, "loss": 0.0924, "step": 190 }, { "epoch": 1.34, "learning_rate": 4.067164179104478e-05, "loss": 0.0972, "step": 200 }, { "epoch": 1.4, "learning_rate": 3.992537313432836e-05, "loss": 0.0909, "step": 210 }, { "epoch": 1.47, "learning_rate": 3.9179104477611945e-05, "loss": 0.0977, "step": 220 }, { "epoch": 1.54, "learning_rate": 3.843283582089552e-05, "loss": 0.0931, "step": 230 }, { "epoch": 1.61, "learning_rate": 3.7686567164179104e-05, "loss": 0.0756, "step": 240 }, { "epoch": 1.67, "learning_rate": 3.694029850746269e-05, "loss": 0.083, "step": 250 }, { "epoch": 1.74, "learning_rate": 3.619402985074627e-05, "loss": 0.0751, "step": 260 }, { "epoch": 1.81, "learning_rate": 3.5447761194029854e-05, "loss": 0.0666, "step": 270 }, { "epoch": 1.87, "learning_rate": 3.470149253731344e-05, "loss": 0.0673, "step": 280 }, { "epoch": 1.94, "learning_rate": 3.395522388059701e-05, "loss": 0.067, "step": 290 }, { "epoch": 2.0, "eval_accuracy": 0.987756063103367, "eval_f1": 0.9859294798786484, "eval_loss": 0.06216855347156525, "eval_precision": 0.990943040361106, "eval_recall": 0.9812800326186175, "eval_runtime": 108.9574, "eval_samples_per_second": 77.957, "eval_steps_per_second": 0.615, "step": 299 }, { "epoch": 2.01, "learning_rate": 3.32089552238806e-05, "loss": 0.0754, "step": 300 }, { "epoch": 2.07, "learning_rate": 3.246268656716418e-05, "loss": 0.0657, "step": 310 }, { "epoch": 2.14, "learning_rate": 3.171641791044776e-05, "loss": 0.0683, "step": 320 }, { "epoch": 2.21, "learning_rate": 3.0970149253731346e-05, "loss": 0.0569, "step": 330 }, { "epoch": 2.27, "learning_rate": 3.0223880597014926e-05, "loss": 0.0515, "step": 340 }, { "epoch": 2.34, "learning_rate": 2.9477611940298512e-05, "loss": 0.0557, "step": 350 }, { "epoch": 2.41, "learning_rate": 2.8731343283582092e-05, "loss": 0.0558, "step": 360 }, { "epoch": 2.47, "learning_rate": 2.7985074626865672e-05, "loss": 0.0592, "step": 370 }, { "epoch": 2.54, "learning_rate": 2.7238805970149255e-05, "loss": 0.0502, "step": 380 }, { "epoch": 2.61, "learning_rate": 2.6492537313432835e-05, "loss": 0.0567, "step": 390 }, { "epoch": 2.68, "learning_rate": 2.574626865671642e-05, "loss": 0.0539, "step": 400 }, { "epoch": 2.74, "learning_rate": 2.5e-05, "loss": 0.0512, "step": 410 }, { "epoch": 2.81, "learning_rate": 2.4253731343283584e-05, "loss": 0.0544, "step": 420 }, { "epoch": 2.88, "learning_rate": 2.3507462686567168e-05, "loss": 0.0561, "step": 430 }, { "epoch": 2.94, "learning_rate": 2.2761194029850747e-05, "loss": 0.049, "step": 440 }, { "epoch": 3.0, "eval_accuracy": 0.9968212856133741, "eval_f1": 0.9964206807723841, "eval_loss": 0.03217490762472153, "eval_precision": 0.9969469418365854, "eval_recall": 0.99589824183686, "eval_runtime": 106.3062, "eval_samples_per_second": 79.901, "eval_steps_per_second": 0.63, "step": 448 }, { "epoch": 3.01, "learning_rate": 2.201492537313433e-05, "loss": 0.0493, "step": 450 }, { "epoch": 3.08, "learning_rate": 2.126865671641791e-05, "loss": 0.0544, "step": 460 }, { "epoch": 3.14, "learning_rate": 2.0522388059701493e-05, "loss": 0.0495, "step": 470 }, { "epoch": 3.21, "learning_rate": 1.9776119402985073e-05, "loss": 0.044, "step": 480 }, { "epoch": 3.28, "learning_rate": 1.9029850746268656e-05, "loss": 0.0452, "step": 490 }, { "epoch": 3.34, "learning_rate": 1.828358208955224e-05, "loss": 0.0437, "step": 500 }, { "epoch": 3.41, "learning_rate": 1.7537313432835823e-05, "loss": 0.0374, "step": 510 }, { "epoch": 3.48, "learning_rate": 1.6791044776119406e-05, "loss": 0.0389, "step": 520 }, { "epoch": 3.55, "learning_rate": 1.6044776119402986e-05, "loss": 0.0321, "step": 530 }, { "epoch": 3.61, "learning_rate": 1.529850746268657e-05, "loss": 0.0347, "step": 540 }, { "epoch": 3.68, "learning_rate": 1.455223880597015e-05, "loss": 0.0359, "step": 550 }, { "epoch": 3.75, "learning_rate": 1.3805970149253733e-05, "loss": 0.0369, "step": 560 }, { "epoch": 3.81, "learning_rate": 1.3059701492537313e-05, "loss": 0.0367, "step": 570 }, { "epoch": 3.88, "learning_rate": 1.2313432835820896e-05, "loss": 0.0353, "step": 580 }, { "epoch": 3.95, "learning_rate": 1.1567164179104478e-05, "loss": 0.0477, "step": 590 }, { "epoch": 4.0, "eval_accuracy": 0.9977631269131152, "eval_f1": 0.997511338692231, "eval_loss": 0.024851497262716293, "eval_precision": 0.9985286326587551, "eval_recall": 0.9965020764725261, "eval_runtime": 103.7864, "eval_samples_per_second": 81.841, "eval_steps_per_second": 0.646, "step": 598 }, { "epoch": 4.01, "learning_rate": 1.082089552238806e-05, "loss": 0.0382, "step": 600 }, { "epoch": 4.08, "learning_rate": 1.0074626865671643e-05, "loss": 0.0347, "step": 610 }, { "epoch": 4.15, "learning_rate": 9.328358208955226e-06, "loss": 0.0304, "step": 620 }, { "epoch": 4.21, "learning_rate": 8.582089552238805e-06, "loss": 0.0373, "step": 630 }, { "epoch": 4.28, "learning_rate": 7.835820895522389e-06, "loss": 0.0311, "step": 640 }, { "epoch": 4.35, "learning_rate": 7.08955223880597e-06, "loss": 0.0332, "step": 650 }, { "epoch": 4.41, "learning_rate": 6.343283582089552e-06, "loss": 0.0334, "step": 660 }, { "epoch": 4.48, "learning_rate": 5.597014925373135e-06, "loss": 0.0329, "step": 670 }, { "epoch": 4.55, "learning_rate": 4.850746268656717e-06, "loss": 0.0332, "step": 680 }, { "epoch": 4.62, "learning_rate": 4.1044776119402985e-06, "loss": 0.0303, "step": 690 }, { "epoch": 4.68, "learning_rate": 3.358208955223881e-06, "loss": 0.0276, "step": 700 }, { "epoch": 4.75, "learning_rate": 2.6119402985074627e-06, "loss": 0.0332, "step": 710 }, { "epoch": 4.82, "learning_rate": 1.8656716417910446e-06, "loss": 0.0317, "step": 720 }, { "epoch": 4.88, "learning_rate": 1.119402985074627e-06, "loss": 0.0315, "step": 730 }, { "epoch": 4.95, "learning_rate": 3.7313432835820895e-07, "loss": 0.0336, "step": 740 }, { "epoch": 4.98, "eval_accuracy": 0.9985872380503885, "eval_f1": 0.9985770990024514, "eval_loss": 0.021681277081370354, "eval_precision": 0.9989954885489135, "eval_recall": 0.998161142953993, "eval_runtime": 106.6587, "eval_samples_per_second": 79.637, "eval_steps_per_second": 0.628, "step": 745 }, { "epoch": 4.98, "step": 745, "total_flos": 2.9526217173796848e+19, "train_loss": 0.14726725356690837, "train_runtime": 5241.8792, "train_samples_per_second": 72.911, "train_steps_per_second": 0.142 } ], "max_steps": 745, "num_train_epochs": 5, "total_flos": 2.9526217173796848e+19, "trial_name": null, "trial_params": null }