{ "best_metric": 0.8235294117647058, "best_model_checkpoint": "vit-base-patch16-224-U8-40c\\checkpoint-120", "epoch": 40.0, "eval_steps": 500, "global_step": 800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.5, "learning_rate": 1.25e-05, "loss": 1.3827, "step": 10 }, { "epoch": 1.0, "learning_rate": 2.5e-05, "loss": 1.3495, "step": 20 }, { "epoch": 1.0, "eval_accuracy": 0.47058823529411764, "eval_loss": 1.3142259120941162, "eval_runtime": 0.9121, "eval_samples_per_second": 55.917, "eval_steps_per_second": 2.193, "step": 20 }, { "epoch": 1.5, "learning_rate": 3.7500000000000003e-05, "loss": 1.2825, "step": 30 }, { "epoch": 2.0, "learning_rate": 5e-05, "loss": 1.1689, "step": 40 }, { "epoch": 2.0, "eval_accuracy": 0.5686274509803921, "eval_loss": 1.115267038345337, "eval_runtime": 0.8177, "eval_samples_per_second": 62.373, "eval_steps_per_second": 2.446, "step": 40 }, { "epoch": 2.5, "learning_rate": 4.9342105263157894e-05, "loss": 1.0345, "step": 50 }, { "epoch": 3.0, "learning_rate": 4.868421052631579e-05, "loss": 0.8673, "step": 60 }, { "epoch": 3.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.8497752547264099, "eval_runtime": 0.8953, "eval_samples_per_second": 56.964, "eval_steps_per_second": 2.234, "step": 60 }, { "epoch": 3.5, "learning_rate": 4.802631578947368e-05, "loss": 0.72, "step": 70 }, { "epoch": 4.0, "learning_rate": 4.736842105263158e-05, "loss": 0.5847, "step": 80 }, { "epoch": 4.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.7220016121864319, "eval_runtime": 0.8116, "eval_samples_per_second": 62.841, "eval_steps_per_second": 2.464, "step": 80 }, { "epoch": 4.5, "learning_rate": 4.671052631578948e-05, "loss": 0.4682, "step": 90 }, { "epoch": 5.0, "learning_rate": 4.605263157894737e-05, "loss": 0.4029, "step": 100 }, { "epoch": 5.0, "eval_accuracy": 0.6274509803921569, "eval_loss": 0.8654420971870422, "eval_runtime": 0.817, "eval_samples_per_second": 62.426, "eval_steps_per_second": 2.448, "step": 100 }, { "epoch": 5.5, "learning_rate": 4.539473684210527e-05, "loss": 0.3146, "step": 110 }, { "epoch": 6.0, "learning_rate": 4.473684210526316e-05, "loss": 0.2562, "step": 120 }, { "epoch": 6.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.5609031319618225, "eval_runtime": 0.7989, "eval_samples_per_second": 63.835, "eval_steps_per_second": 2.503, "step": 120 }, { "epoch": 6.5, "learning_rate": 4.407894736842105e-05, "loss": 0.2305, "step": 130 }, { "epoch": 7.0, "learning_rate": 4.342105263157895e-05, "loss": 0.2352, "step": 140 }, { "epoch": 7.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.727162778377533, "eval_runtime": 0.8282, "eval_samples_per_second": 61.579, "eval_steps_per_second": 2.415, "step": 140 }, { "epoch": 7.5, "learning_rate": 4.2763157894736847e-05, "loss": 0.2511, "step": 150 }, { "epoch": 8.0, "learning_rate": 4.210526315789474e-05, "loss": 0.2131, "step": 160 }, { "epoch": 8.0, "eval_accuracy": 0.7254901960784313, "eval_loss": 0.758063793182373, "eval_runtime": 0.8058, "eval_samples_per_second": 63.289, "eval_steps_per_second": 2.482, "step": 160 }, { "epoch": 8.5, "learning_rate": 4.1447368421052636e-05, "loss": 0.164, "step": 170 }, { "epoch": 9.0, "learning_rate": 4.078947368421053e-05, "loss": 0.1616, "step": 180 }, { "epoch": 9.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.5436545610427856, "eval_runtime": 0.8354, "eval_samples_per_second": 61.045, "eval_steps_per_second": 2.394, "step": 180 }, { "epoch": 9.5, "learning_rate": 4.0131578947368425e-05, "loss": 0.138, "step": 190 }, { "epoch": 10.0, "learning_rate": 3.9473684210526316e-05, "loss": 0.1266, "step": 200 }, { "epoch": 10.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.634531557559967, "eval_runtime": 0.8867, "eval_samples_per_second": 57.514, "eval_steps_per_second": 2.255, "step": 200 }, { "epoch": 10.5, "learning_rate": 3.8815789473684214e-05, "loss": 0.116, "step": 210 }, { "epoch": 11.0, "learning_rate": 3.815789473684211e-05, "loss": 0.1557, "step": 220 }, { "epoch": 11.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.8280105590820312, "eval_runtime": 0.8226, "eval_samples_per_second": 61.997, "eval_steps_per_second": 2.431, "step": 220 }, { "epoch": 11.5, "learning_rate": 3.7500000000000003e-05, "loss": 0.1116, "step": 230 }, { "epoch": 12.0, "learning_rate": 3.6842105263157895e-05, "loss": 0.0871, "step": 240 }, { "epoch": 12.0, "eval_accuracy": 0.7058823529411765, "eval_loss": 0.9015989899635315, "eval_runtime": 0.7897, "eval_samples_per_second": 64.581, "eval_steps_per_second": 2.533, "step": 240 }, { "epoch": 12.5, "learning_rate": 3.618421052631579e-05, "loss": 0.134, "step": 250 }, { "epoch": 13.0, "learning_rate": 3.5526315789473684e-05, "loss": 0.0879, "step": 260 }, { "epoch": 13.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.8098950982093811, "eval_runtime": 0.815, "eval_samples_per_second": 62.574, "eval_steps_per_second": 2.454, "step": 260 }, { "epoch": 13.5, "learning_rate": 3.4868421052631575e-05, "loss": 0.1101, "step": 270 }, { "epoch": 14.0, "learning_rate": 3.421052631578947e-05, "loss": 0.0844, "step": 280 }, { "epoch": 14.0, "eval_accuracy": 0.7254901960784313, "eval_loss": 0.8790603280067444, "eval_runtime": 0.825, "eval_samples_per_second": 61.819, "eval_steps_per_second": 2.424, "step": 280 }, { "epoch": 14.5, "learning_rate": 3.355263157894737e-05, "loss": 0.0758, "step": 290 }, { "epoch": 15.0, "learning_rate": 3.289473684210527e-05, "loss": 0.0865, "step": 300 }, { "epoch": 15.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.9712991118431091, "eval_runtime": 0.7949, "eval_samples_per_second": 64.158, "eval_steps_per_second": 2.516, "step": 300 }, { "epoch": 15.5, "learning_rate": 3.223684210526316e-05, "loss": 0.0572, "step": 310 }, { "epoch": 16.0, "learning_rate": 3.157894736842105e-05, "loss": 0.1005, "step": 320 }, { "epoch": 16.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.9965818524360657, "eval_runtime": 0.8138, "eval_samples_per_second": 62.668, "eval_steps_per_second": 2.458, "step": 320 }, { "epoch": 16.5, "learning_rate": 3.092105263157895e-05, "loss": 0.0876, "step": 330 }, { "epoch": 17.0, "learning_rate": 3.0263157894736844e-05, "loss": 0.0718, "step": 340 }, { "epoch": 17.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.046757459640503, "eval_runtime": 0.7997, "eval_samples_per_second": 63.777, "eval_steps_per_second": 2.501, "step": 340 }, { "epoch": 17.5, "learning_rate": 2.9605263157894735e-05, "loss": 0.0955, "step": 350 }, { "epoch": 18.0, "learning_rate": 2.8947368421052634e-05, "loss": 0.0591, "step": 360 }, { "epoch": 18.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.9471318125724792, "eval_runtime": 0.8393, "eval_samples_per_second": 60.762, "eval_steps_per_second": 2.383, "step": 360 }, { "epoch": 18.5, "learning_rate": 2.8289473684210528e-05, "loss": 0.0842, "step": 370 }, { "epoch": 19.0, "learning_rate": 2.7631578947368426e-05, "loss": 0.0641, "step": 380 }, { "epoch": 19.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 0.9904577136039734, "eval_runtime": 0.8249, "eval_samples_per_second": 61.825, "eval_steps_per_second": 2.425, "step": 380 }, { "epoch": 19.5, "learning_rate": 2.6973684210526317e-05, "loss": 0.0738, "step": 390 }, { "epoch": 20.0, "learning_rate": 2.6315789473684212e-05, "loss": 0.0542, "step": 400 }, { "epoch": 20.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 1.029980182647705, "eval_runtime": 0.8442, "eval_samples_per_second": 60.41, "eval_steps_per_second": 2.369, "step": 400 }, { "epoch": 20.5, "learning_rate": 2.565789473684211e-05, "loss": 0.0708, "step": 410 }, { "epoch": 21.0, "learning_rate": 2.5e-05, "loss": 0.0813, "step": 420 }, { "epoch": 21.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.0329537391662598, "eval_runtime": 0.819, "eval_samples_per_second": 62.27, "eval_steps_per_second": 2.442, "step": 420 }, { "epoch": 21.5, "learning_rate": 2.4342105263157896e-05, "loss": 0.0638, "step": 430 }, { "epoch": 22.0, "learning_rate": 2.368421052631579e-05, "loss": 0.059, "step": 440 }, { "epoch": 22.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.999519944190979, "eval_runtime": 0.8027, "eval_samples_per_second": 63.535, "eval_steps_per_second": 2.492, "step": 440 }, { "epoch": 22.5, "learning_rate": 2.3026315789473685e-05, "loss": 0.0518, "step": 450 }, { "epoch": 23.0, "learning_rate": 2.236842105263158e-05, "loss": 0.0679, "step": 460 }, { "epoch": 23.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 0.9327283501625061, "eval_runtime": 0.8141, "eval_samples_per_second": 62.642, "eval_steps_per_second": 2.457, "step": 460 }, { "epoch": 23.5, "learning_rate": 2.1710526315789474e-05, "loss": 0.087, "step": 470 }, { "epoch": 24.0, "learning_rate": 2.105263157894737e-05, "loss": 0.0611, "step": 480 }, { "epoch": 24.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.0073403120040894, "eval_runtime": 0.7891, "eval_samples_per_second": 64.627, "eval_steps_per_second": 2.534, "step": 480 }, { "epoch": 24.5, "learning_rate": 2.0394736842105264e-05, "loss": 0.0475, "step": 490 }, { "epoch": 25.0, "learning_rate": 1.9736842105263158e-05, "loss": 0.0694, "step": 500 }, { "epoch": 25.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.9348233342170715, "eval_runtime": 0.7727, "eval_samples_per_second": 66.0, "eval_steps_per_second": 2.588, "step": 500 }, { "epoch": 25.5, "learning_rate": 1.9078947368421056e-05, "loss": 0.0546, "step": 510 }, { "epoch": 26.0, "learning_rate": 1.8421052631578947e-05, "loss": 0.0454, "step": 520 }, { "epoch": 26.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.8550857901573181, "eval_runtime": 0.7842, "eval_samples_per_second": 65.031, "eval_steps_per_second": 2.55, "step": 520 }, { "epoch": 26.5, "learning_rate": 1.7763157894736842e-05, "loss": 0.0503, "step": 530 }, { "epoch": 27.0, "learning_rate": 1.7105263157894737e-05, "loss": 0.0536, "step": 540 }, { "epoch": 27.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.9782336354255676, "eval_runtime": 0.8246, "eval_samples_per_second": 61.852, "eval_steps_per_second": 2.426, "step": 540 }, { "epoch": 27.5, "learning_rate": 1.6447368421052635e-05, "loss": 0.0331, "step": 550 }, { "epoch": 28.0, "learning_rate": 1.5789473684210526e-05, "loss": 0.0429, "step": 560 }, { "epoch": 28.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.9203042984008789, "eval_runtime": 0.8373, "eval_samples_per_second": 60.907, "eval_steps_per_second": 2.389, "step": 560 }, { "epoch": 28.5, "learning_rate": 1.5131578947368422e-05, "loss": 0.0323, "step": 570 }, { "epoch": 29.0, "learning_rate": 1.4473684210526317e-05, "loss": 0.0386, "step": 580 }, { "epoch": 29.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.8732457160949707, "eval_runtime": 0.8127, "eval_samples_per_second": 62.756, "eval_steps_per_second": 2.461, "step": 580 }, { "epoch": 29.5, "learning_rate": 1.3815789473684213e-05, "loss": 0.0475, "step": 590 }, { "epoch": 30.0, "learning_rate": 1.3157894736842106e-05, "loss": 0.0433, "step": 600 }, { "epoch": 30.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 0.9375914931297302, "eval_runtime": 0.7855, "eval_samples_per_second": 64.925, "eval_steps_per_second": 2.546, "step": 600 }, { "epoch": 30.5, "learning_rate": 1.25e-05, "loss": 0.054, "step": 610 }, { "epoch": 31.0, "learning_rate": 1.1842105263157895e-05, "loss": 0.0353, "step": 620 }, { "epoch": 31.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 0.8531509041786194, "eval_runtime": 0.7884, "eval_samples_per_second": 64.687, "eval_steps_per_second": 2.537, "step": 620 }, { "epoch": 31.5, "learning_rate": 1.118421052631579e-05, "loss": 0.0412, "step": 630 }, { "epoch": 32.0, "learning_rate": 1.0526315789473684e-05, "loss": 0.0332, "step": 640 }, { "epoch": 32.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.9123408794403076, "eval_runtime": 0.8508, "eval_samples_per_second": 59.946, "eval_steps_per_second": 2.351, "step": 640 }, { "epoch": 32.5, "learning_rate": 9.868421052631579e-06, "loss": 0.0255, "step": 650 }, { "epoch": 33.0, "learning_rate": 9.210526315789474e-06, "loss": 0.0405, "step": 660 }, { "epoch": 33.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.9603415727615356, "eval_runtime": 0.772, "eval_samples_per_second": 66.066, "eval_steps_per_second": 2.591, "step": 660 }, { "epoch": 33.5, "learning_rate": 8.552631578947368e-06, "loss": 0.0445, "step": 670 }, { "epoch": 34.0, "learning_rate": 7.894736842105263e-06, "loss": 0.0423, "step": 680 }, { "epoch": 34.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.9424175024032593, "eval_runtime": 0.8978, "eval_samples_per_second": 56.806, "eval_steps_per_second": 2.228, "step": 680 }, { "epoch": 34.5, "learning_rate": 7.236842105263158e-06, "loss": 0.0317, "step": 690 }, { "epoch": 35.0, "learning_rate": 6.578947368421053e-06, "loss": 0.0383, "step": 700 }, { "epoch": 35.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.9687171578407288, "eval_runtime": 0.7905, "eval_samples_per_second": 64.514, "eval_steps_per_second": 2.53, "step": 700 }, { "epoch": 35.5, "learning_rate": 5.921052631578948e-06, "loss": 0.0435, "step": 710 }, { "epoch": 36.0, "learning_rate": 5.263157894736842e-06, "loss": 0.0245, "step": 720 }, { "epoch": 36.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.9509010314941406, "eval_runtime": 0.8121, "eval_samples_per_second": 62.798, "eval_steps_per_second": 2.463, "step": 720 }, { "epoch": 36.5, "learning_rate": 4.605263157894737e-06, "loss": 0.0236, "step": 730 }, { "epoch": 37.0, "learning_rate": 3.9473684210526315e-06, "loss": 0.0309, "step": 740 }, { "epoch": 37.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.8950434923171997, "eval_runtime": 0.8148, "eval_samples_per_second": 62.589, "eval_steps_per_second": 2.454, "step": 740 }, { "epoch": 37.5, "learning_rate": 3.2894736842105265e-06, "loss": 0.0306, "step": 750 }, { "epoch": 38.0, "learning_rate": 2.631578947368421e-06, "loss": 0.026, "step": 760 }, { "epoch": 38.0, "eval_accuracy": 0.803921568627451, "eval_loss": 0.9081793427467346, "eval_runtime": 0.8266, "eval_samples_per_second": 61.7, "eval_steps_per_second": 2.42, "step": 760 }, { "epoch": 38.5, "learning_rate": 1.9736842105263157e-06, "loss": 0.0335, "step": 770 }, { "epoch": 39.0, "learning_rate": 1.3157894736842106e-06, "loss": 0.0192, "step": 780 }, { "epoch": 39.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.8859331607818604, "eval_runtime": 0.8155, "eval_samples_per_second": 62.535, "eval_steps_per_second": 2.452, "step": 780 }, { "epoch": 39.5, "learning_rate": 6.578947368421053e-07, "loss": 0.0235, "step": 790 }, { "epoch": 40.0, "learning_rate": 0.0, "loss": 0.0322, "step": 800 }, { "epoch": 40.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 0.8968344926834106, "eval_runtime": 0.8319, "eval_samples_per_second": 61.307, "eval_steps_per_second": 2.404, "step": 800 }, { "epoch": 40.0, "step": 800, "total_flos": 7.873327274596762e+18, "train_loss": 0.1865533402003348, "train_runtime": 1567.0918, "train_samples_per_second": 64.833, "train_steps_per_second": 0.51 } ], "logging_steps": 10, "max_steps": 800, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 7.873327274596762e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }