{ "best_metric": 0.8431372549019608, "best_model_checkpoint": "vit-base-patch16-224-U8-40d\\checkpoint-200", "epoch": 40.0, "eval_steps": 500, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "learning_rate": 1.5e-05, "loss": 1.3819, "step": 10 }, { "epoch": 1.0, "learning_rate": 3e-05, "loss": 1.3419, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.47058823529411764, "eval_loss": 1.299819827079773, "eval_runtime": 0.7952, "eval_samples_per_second": 64.133, "eval_steps_per_second": 2.515, "step": 20 }, { "epoch": 1.5, "learning_rate": 4.5e-05, "loss": 1.263, "step": 30 }, { "epoch": 2.0, "learning_rate": 6e-05, "loss": 1.1313, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.5686274509803921, "eval_loss": 1.0832325220108032, "eval_runtime": 0.8195, "eval_samples_per_second": 62.236, "eval_steps_per_second": 2.441, "step": 40 }, { "epoch": 2.5, "learning_rate": 5.921052631578947e-05, "loss": 0.978, "step": 50 }, { "epoch": 3.0, "learning_rate": 5.842105263157895e-05, "loss": 0.7969, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.809360682964325, "eval_runtime": 0.8396, "eval_samples_per_second": 60.742, "eval_steps_per_second": 2.382, "step": 60 }, { "epoch": 3.5, "learning_rate": 5.7631578947368423e-05, "loss": 0.6378, "step": 70 }, { "epoch": 4.0, "learning_rate": 5.684210526315789e-05, "loss": 0.5063, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.6573488116264343, "eval_runtime": 0.7944, "eval_samples_per_second": 64.199, "eval_steps_per_second": 2.518, "step": 80 }, { "epoch": 4.5, "learning_rate": 5.605263157894737e-05, "loss": 0.3989, "step": 90 }, { "epoch": 5.0, "learning_rate": 5.5263157894736845e-05, "loss": 0.3367, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.6389498114585876, "eval_runtime": 0.7895, "eval_samples_per_second": 64.594, "eval_steps_per_second": 2.533, "step": 100 }, { "epoch": 5.5, "learning_rate": 5.447368421052632e-05, "loss": 0.2707, "step": 110 }, { "epoch": 6.0, "learning_rate": 5.368421052631579e-05, "loss": 0.242, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 0.6878873705863953, "eval_runtime": 0.8009, "eval_samples_per_second": 63.676, "eval_steps_per_second": 2.497, "step": 120 }, { "epoch": 6.5, "learning_rate": 5.289473684210526e-05, "loss": 0.2263, "step": 130 }, { "epoch": 7.0, "learning_rate": 5.210526315789474e-05, "loss": 0.1881, "step": 140 }, { "epoch": 7.0, "eval_accuracy": 0.7058823529411765, "eval_loss": 0.7939884066581726, "eval_runtime": 0.8019, "eval_samples_per_second": 63.596, "eval_steps_per_second": 2.494, "step": 140 }, { "epoch": 7.5, "learning_rate": 5.131578947368421e-05, "loss": 0.2096, "step": 150 }, { "epoch": 8.0, "learning_rate": 5.052631578947368e-05, "loss": 0.1561, "step": 160 }, { "epoch": 8.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.8029699325561523, "eval_runtime": 0.7855, "eval_samples_per_second": 64.927, "eval_steps_per_second": 2.546, "step": 160 }, { "epoch": 8.5, "learning_rate": 4.973684210526316e-05, "loss": 0.1468, "step": 170 }, { "epoch": 9.0, "learning_rate": 4.8947368421052635e-05, "loss": 0.1557, "step": 180 }, { "epoch": 9.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.7004449367523193, "eval_runtime": 0.797, "eval_samples_per_second": 63.992, "eval_steps_per_second": 2.509, "step": 180 }, { "epoch": 9.5, "learning_rate": 4.815789473684211e-05, "loss": 0.1385, "step": 190 }, { "epoch": 10.0, "learning_rate": 4.736842105263158e-05, "loss": 0.1154, "step": 200 }, { "epoch": 10.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 0.649506688117981, "eval_runtime": 0.7901, "eval_samples_per_second": 64.552, "eval_steps_per_second": 2.531, "step": 200 }, { "epoch": 10.5, "learning_rate": 4.657894736842105e-05, "loss": 0.1235, "step": 210 }, { "epoch": 11.0, "learning_rate": 4.5789473684210527e-05, "loss": 0.1469, "step": 220 }, { "epoch": 11.0, "eval_accuracy": 0.7058823529411765, "eval_loss": 1.1387523412704468, "eval_runtime": 0.7986, "eval_samples_per_second": 63.858, "eval_steps_per_second": 2.504, "step": 220 }, { "epoch": 11.5, "learning_rate": 4.5e-05, "loss": 0.1223, "step": 230 }, { "epoch": 12.0, "learning_rate": 4.421052631578947e-05, "loss": 0.0898, "step": 240 }, { "epoch": 12.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.7966563105583191, "eval_runtime": 0.8161, "eval_samples_per_second": 62.495, "eval_steps_per_second": 2.451, "step": 240 }, { "epoch": 12.5, "learning_rate": 4.342105263157895e-05, "loss": 0.1042, "step": 250 }, { "epoch": 13.0, "learning_rate": 4.2631578947368425e-05, "loss": 0.0719, "step": 260 }, { "epoch": 13.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.8934146165847778, "eval_runtime": 0.7865, "eval_samples_per_second": 64.845, "eval_steps_per_second": 2.543, "step": 260 }, { "epoch": 13.5, "learning_rate": 4.1842105263157894e-05, "loss": 0.0927, "step": 270 }, { "epoch": 14.0, "learning_rate": 4.105263157894737e-05, "loss": 0.0739, "step": 280 }, { "epoch": 14.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.8476159572601318, "eval_runtime": 0.8141, "eval_samples_per_second": 62.643, "eval_steps_per_second": 2.457, "step": 280 }, { "epoch": 14.5, "learning_rate": 4.026315789473684e-05, "loss": 0.088, "step": 290 }, { "epoch": 15.0, "learning_rate": 3.9473684210526316e-05, "loss": 0.0823, "step": 300 }, { "epoch": 15.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.9692044854164124, "eval_runtime": 0.82, "eval_samples_per_second": 62.196, "eval_steps_per_second": 2.439, "step": 300 }, { "epoch": 15.5, "learning_rate": 3.868421052631579e-05, "loss": 0.0714, "step": 310 }, { "epoch": 16.0, "learning_rate": 3.789473684210526e-05, "loss": 0.0828, "step": 320 }, { "epoch": 16.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.9384645819664001, "eval_runtime": 0.8043, "eval_samples_per_second": 63.407, "eval_steps_per_second": 2.487, "step": 320 }, { "epoch": 16.5, "learning_rate": 3.710526315789474e-05, "loss": 0.0762, "step": 330 }, { "epoch": 17.0, "learning_rate": 3.6315789473684214e-05, "loss": 0.0761, "step": 340 }, { "epoch": 17.0, "eval_accuracy": 0.7254901960784313, "eval_loss": 1.1684223413467407, "eval_runtime": 0.8011, "eval_samples_per_second": 63.661, "eval_steps_per_second": 2.496, "step": 340 }, { "epoch": 17.5, "learning_rate": 3.5526315789473684e-05, "loss": 0.0925, "step": 350 }, { "epoch": 18.0, "learning_rate": 3.473684210526316e-05, "loss": 0.0597, "step": 360 }, { "epoch": 18.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.9413917660713196, "eval_runtime": 0.8568, "eval_samples_per_second": 59.521, "eval_steps_per_second": 2.334, "step": 360 }, { "epoch": 18.5, "learning_rate": 3.394736842105263e-05, "loss": 0.0806, "step": 370 }, { "epoch": 19.0, "learning_rate": 3.3157894736842106e-05, "loss": 0.0727, "step": 380 }, { "epoch": 19.0, "eval_accuracy": 0.7058823529411765, "eval_loss": 1.020107388496399, "eval_runtime": 0.8388, "eval_samples_per_second": 60.801, "eval_steps_per_second": 2.384, "step": 380 }, { "epoch": 19.5, "learning_rate": 3.236842105263158e-05, "loss": 0.0789, "step": 390 }, { "epoch": 20.0, "learning_rate": 3.157894736842105e-05, "loss": 0.0507, "step": 400 }, { "epoch": 20.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.8562985062599182, "eval_runtime": 0.7915, "eval_samples_per_second": 64.435, "eval_steps_per_second": 2.527, "step": 400 }, { "epoch": 20.5, "learning_rate": 3.078947368421053e-05, "loss": 0.0557, "step": 410 }, { "epoch": 21.0, "learning_rate": 3e-05, "loss": 0.0587, "step": 420 }, { "epoch": 21.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.8476255536079407, "eval_runtime": 0.7773, "eval_samples_per_second": 65.611, "eval_steps_per_second": 2.573, "step": 420 }, { "epoch": 21.5, "learning_rate": 2.9210526315789474e-05, "loss": 0.0615, "step": 430 }, { "epoch": 22.0, "learning_rate": 2.8421052631578946e-05, "loss": 0.0608, "step": 440 }, { "epoch": 22.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.9399316310882568, "eval_runtime": 0.8324, "eval_samples_per_second": 61.27, "eval_steps_per_second": 2.403, "step": 440 }, { "epoch": 22.5, "learning_rate": 2.7631578947368423e-05, "loss": 0.0505, "step": 450 }, { "epoch": 23.0, "learning_rate": 2.6842105263157896e-05, "loss": 0.055, "step": 460 }, { "epoch": 23.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 0.8819794654846191, "eval_runtime": 0.7867, "eval_samples_per_second": 64.824, "eval_steps_per_second": 2.542, "step": 460 }, { "epoch": 23.5, "learning_rate": 2.605263157894737e-05, "loss": 0.0682, "step": 470 }, { "epoch": 24.0, "learning_rate": 2.526315789473684e-05, "loss": 0.0619, "step": 480 }, { "epoch": 24.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.0459517240524292, "eval_runtime": 0.7938, "eval_samples_per_second": 64.25, "eval_steps_per_second": 2.52, "step": 480 }, { "epoch": 24.5, "learning_rate": 2.4473684210526318e-05, "loss": 0.0448, "step": 490 }, { "epoch": 25.0, "learning_rate": 2.368421052631579e-05, "loss": 0.0615, "step": 500 }, { "epoch": 25.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.9392306804656982, "eval_runtime": 0.7883, "eval_samples_per_second": 64.697, "eval_steps_per_second": 2.537, "step": 500 }, { "epoch": 25.5, "learning_rate": 2.2894736842105263e-05, "loss": 0.0488, "step": 510 }, { "epoch": 26.0, "learning_rate": 2.2105263157894736e-05, "loss": 0.0455, "step": 520 }, { "epoch": 26.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.9267483353614807, "eval_runtime": 0.7948, "eval_samples_per_second": 64.17, "eval_steps_per_second": 2.516, "step": 520 }, { "epoch": 26.5, "learning_rate": 2.1315789473684212e-05, "loss": 0.0493, "step": 530 }, { "epoch": 27.0, "learning_rate": 2.0526315789473685e-05, "loss": 0.0567, "step": 540 }, { "epoch": 27.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.9784489870071411, "eval_runtime": 0.8082, "eval_samples_per_second": 63.101, "eval_steps_per_second": 2.475, "step": 540 }, { "epoch": 27.5, "learning_rate": 1.9736842105263158e-05, "loss": 0.0467, "step": 550 }, { "epoch": 28.0, "learning_rate": 1.894736842105263e-05, "loss": 0.032, "step": 560 }, { "epoch": 28.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.1540778875350952, "eval_runtime": 0.7989, "eval_samples_per_second": 63.834, "eval_steps_per_second": 2.503, "step": 560 }, { "epoch": 28.5, "learning_rate": 1.8157894736842107e-05, "loss": 0.0242, "step": 570 }, { "epoch": 29.0, "learning_rate": 1.736842105263158e-05, "loss": 0.0276, "step": 580 }, { "epoch": 29.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.8864995837211609, "eval_runtime": 0.8109, "eval_samples_per_second": 62.894, "eval_steps_per_second": 2.466, "step": 580 }, { "epoch": 29.5, "learning_rate": 1.6578947368421053e-05, "loss": 0.058, "step": 590 }, { "epoch": 30.0, "learning_rate": 1.5789473684210526e-05, "loss": 0.0368, "step": 600 }, { "epoch": 30.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.0847781896591187, "eval_runtime": 0.7819, "eval_samples_per_second": 65.229, "eval_steps_per_second": 2.558, "step": 600 }, { "epoch": 30.5, "learning_rate": 1.5e-05, "loss": 0.0479, "step": 610 }, { "epoch": 31.0, "learning_rate": 1.4210526315789473e-05, "loss": 0.0342, "step": 620 }, { "epoch": 31.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.9638255834579468, "eval_runtime": 0.7841, "eval_samples_per_second": 65.044, "eval_steps_per_second": 2.551, "step": 620 }, { "epoch": 31.5, "learning_rate": 1.3421052631578948e-05, "loss": 0.0352, "step": 630 }, { "epoch": 32.0, "learning_rate": 1.263157894736842e-05, "loss": 0.037, "step": 640 }, { "epoch": 32.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.961588978767395, "eval_runtime": 0.7905, "eval_samples_per_second": 64.516, "eval_steps_per_second": 2.53, "step": 640 }, { "epoch": 32.5, "learning_rate": 1.1842105263157895e-05, "loss": 0.0295, "step": 650 }, { "epoch": 33.0, "learning_rate": 1.1052631578947368e-05, "loss": 0.0371, "step": 660 }, { "epoch": 33.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.0072985887527466, "eval_runtime": 0.7709, "eval_samples_per_second": 66.152, "eval_steps_per_second": 2.594, "step": 660 }, { "epoch": 33.5, "learning_rate": 1.0263157894736843e-05, "loss": 0.0299, "step": 670 }, { "epoch": 34.0, "learning_rate": 9.473684210526315e-06, "loss": 0.0371, "step": 680 }, { "epoch": 34.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.0493559837341309, "eval_runtime": 0.7834, "eval_samples_per_second": 65.102, "eval_steps_per_second": 2.553, "step": 680 }, { "epoch": 34.5, "learning_rate": 8.68421052631579e-06, "loss": 0.0217, "step": 690 }, { "epoch": 35.0, "learning_rate": 7.894736842105263e-06, "loss": 0.0359, "step": 700 }, { "epoch": 35.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 1.1287018060684204, "eval_runtime": 0.7799, "eval_samples_per_second": 65.39, "eval_steps_per_second": 2.564, "step": 700 }, { "epoch": 35.5, "learning_rate": 7.105263157894737e-06, "loss": 0.0483, "step": 710 }, { "epoch": 36.0, "learning_rate": 6.31578947368421e-06, "loss": 0.0255, "step": 720 }, { "epoch": 36.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.1830930709838867, "eval_runtime": 0.7736, "eval_samples_per_second": 65.928, "eval_steps_per_second": 2.585, "step": 720 }, { "epoch": 36.5, "learning_rate": 5.526315789473684e-06, "loss": 0.026, "step": 730 }, { "epoch": 37.0, "learning_rate": 4.736842105263158e-06, "loss": 0.0269, "step": 740 }, { "epoch": 37.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 1.1609560251235962, "eval_runtime": 0.8022, "eval_samples_per_second": 63.578, "eval_steps_per_second": 2.493, "step": 740 }, { "epoch": 37.5, "learning_rate": 3.9473684210526315e-06, "loss": 0.023, "step": 750 }, { "epoch": 38.0, "learning_rate": 3.157894736842105e-06, "loss": 0.0292, "step": 760 }, { "epoch": 38.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 1.1842255592346191, "eval_runtime": 0.815, "eval_samples_per_second": 62.574, "eval_steps_per_second": 2.454, "step": 760 }, { "epoch": 38.5, "learning_rate": 2.368421052631579e-06, "loss": 0.0328, "step": 770 }, { "epoch": 39.0, "learning_rate": 1.5789473684210526e-06, "loss": 0.0161, "step": 780 }, { "epoch": 39.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.109218716621399, "eval_runtime": 0.7881, "eval_samples_per_second": 64.712, "eval_steps_per_second": 2.538, "step": 780 }, { "epoch": 39.5, "learning_rate": 7.894736842105263e-07, "loss": 0.0197, "step": 790 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 0.0333, "step": 800 }, { "epoch": 40.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.1185595989227295, "eval_runtime": 0.7848, "eval_samples_per_second": 64.984, "eval_steps_per_second": 2.548, "step": 800 }, { "epoch": 40.0, "step": 800, "total_flos": 7.873327274596762e+18, "train_loss": 0.17486795043572784, "train_runtime": 1567.8548, "train_samples_per_second": 64.802, "train_steps_per_second": 0.51 } ], "logging_steps": 10, "max_steps": 800, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 7.873327274596762e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }