{ "best_metric": 1.0, "best_model_checkpoint": "vit-base-patch16-224-Trial007-YEL_STEM4/checkpoint-13", "epoch": 44.44444444444444, "global_step": 100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.44, "learning_rate": 5e-06, "loss": 0.8105, "step": 1 }, { "epoch": 0.89, "learning_rate": 1e-05, "loss": 0.8588, "step": 2 }, { "epoch": 0.89, "eval_accuracy": 0.48148148148148145, "eval_loss": 0.7925114035606384, "eval_runtime": 0.2695, "eval_samples_per_second": 200.403, "eval_steps_per_second": 3.711, "step": 2 }, { "epoch": 1.33, "learning_rate": 1.5e-05, "loss": 0.8, "step": 3 }, { "epoch": 1.78, "learning_rate": 2e-05, "loss": 0.7235, "step": 4 }, { "epoch": 1.78, "eval_accuracy": 0.6851851851851852, "eval_loss": 0.6471065282821655, "eval_runtime": 0.2654, "eval_samples_per_second": 203.498, "eval_steps_per_second": 3.768, "step": 4 }, { "epoch": 2.22, "learning_rate": 2.5e-05, "loss": 0.624, "step": 5 }, { "epoch": 2.67, "learning_rate": 3e-05, "loss": 0.6009, "step": 6 }, { "epoch": 2.67, "eval_accuracy": 0.7222222222222222, "eval_loss": 0.5245712399482727, "eval_runtime": 0.2719, "eval_samples_per_second": 198.634, "eval_steps_per_second": 3.678, "step": 6 }, { "epoch": 3.11, "learning_rate": 3.5e-05, "loss": 0.5091, "step": 7 }, { "epoch": 3.56, "learning_rate": 4e-05, "loss": 0.5349, "step": 8 }, { "epoch": 4.0, "learning_rate": 4.5e-05, "loss": 0.4196, "step": 9 }, { "epoch": 4.0, "eval_accuracy": 0.9074074074074074, "eval_loss": 0.3422330617904663, "eval_runtime": 0.2687, "eval_samples_per_second": 200.971, "eval_steps_per_second": 3.722, "step": 9 }, { "epoch": 4.44, "learning_rate": 5e-05, "loss": 0.4408, "step": 10 }, { "epoch": 4.89, "learning_rate": 4.9444444444444446e-05, "loss": 0.4022, "step": 11 }, { "epoch": 4.89, "eval_accuracy": 0.9259259259259259, "eval_loss": 0.3213180601596832, "eval_runtime": 0.2605, "eval_samples_per_second": 207.264, "eval_steps_per_second": 3.838, "step": 11 }, { "epoch": 5.33, "learning_rate": 4.888888888888889e-05, "loss": 0.3015, "step": 12 }, { "epoch": 5.78, "learning_rate": 4.8333333333333334e-05, "loss": 0.3531, "step": 13 }, { "epoch": 5.78, "eval_accuracy": 1.0, "eval_loss": 0.19483274221420288, "eval_runtime": 0.2633, "eval_samples_per_second": 205.079, "eval_steps_per_second": 3.798, "step": 13 }, { "epoch": 6.22, "learning_rate": 4.7777777777777784e-05, "loss": 0.2652, "step": 14 }, { "epoch": 6.67, "learning_rate": 4.722222222222222e-05, "loss": 0.3095, "step": 15 }, { "epoch": 6.67, "eval_accuracy": 1.0, "eval_loss": 0.11961700022220612, "eval_runtime": 0.2623, "eval_samples_per_second": 205.894, "eval_steps_per_second": 3.813, "step": 15 }, { "epoch": 7.11, "learning_rate": 4.666666666666667e-05, "loss": 0.2041, "step": 16 }, { "epoch": 7.56, "learning_rate": 4.6111111111111115e-05, "loss": 0.2782, "step": 17 }, { "epoch": 8.0, "learning_rate": 4.555555555555556e-05, "loss": 0.283, "step": 18 }, { "epoch": 8.0, "eval_accuracy": 1.0, "eval_loss": 0.06661991775035858, "eval_runtime": 0.2844, "eval_samples_per_second": 189.904, "eval_steps_per_second": 3.517, "step": 18 }, { "epoch": 8.44, "learning_rate": 4.5e-05, "loss": 0.1837, "step": 19 }, { "epoch": 8.89, "learning_rate": 4.4444444444444447e-05, "loss": 0.1607, "step": 20 }, { "epoch": 8.89, "eval_accuracy": 1.0, "eval_loss": 0.04014136642217636, "eval_runtime": 0.2662, "eval_samples_per_second": 202.872, "eval_steps_per_second": 3.757, "step": 20 }, { "epoch": 9.33, "learning_rate": 4.388888888888889e-05, "loss": 0.427, "step": 21 }, { "epoch": 9.78, "learning_rate": 4.3333333333333334e-05, "loss": 0.1459, "step": 22 }, { "epoch": 9.78, "eval_accuracy": 1.0, "eval_loss": 0.03019571676850319, "eval_runtime": 0.2627, "eval_samples_per_second": 205.591, "eval_steps_per_second": 3.807, "step": 22 }, { "epoch": 10.22, "learning_rate": 4.277777777777778e-05, "loss": 0.1214, "step": 23 }, { "epoch": 10.67, "learning_rate": 4.222222222222222e-05, "loss": 0.1325, "step": 24 }, { "epoch": 10.67, "eval_accuracy": 1.0, "eval_loss": 0.022280599921941757, "eval_runtime": 0.2634, "eval_samples_per_second": 205.012, "eval_steps_per_second": 3.797, "step": 24 }, { "epoch": 11.11, "learning_rate": 4.166666666666667e-05, "loss": 0.0976, "step": 25 }, { "epoch": 11.56, "learning_rate": 4.111111111111111e-05, "loss": 0.1322, "step": 26 }, { "epoch": 12.0, "learning_rate": 4.055555555555556e-05, "loss": 0.1362, "step": 27 }, { "epoch": 12.0, "eval_accuracy": 1.0, "eval_loss": 0.020502379164099693, "eval_runtime": 0.264, "eval_samples_per_second": 204.523, "eval_steps_per_second": 3.787, "step": 27 }, { "epoch": 12.44, "learning_rate": 4e-05, "loss": 0.0831, "step": 28 }, { "epoch": 12.89, "learning_rate": 3.944444444444445e-05, "loss": 0.1623, "step": 29 }, { "epoch": 12.89, "eval_accuracy": 1.0, "eval_loss": 0.00942131970077753, "eval_runtime": 0.2683, "eval_samples_per_second": 201.286, "eval_steps_per_second": 3.728, "step": 29 }, { "epoch": 13.33, "learning_rate": 3.888888888888889e-05, "loss": 0.1171, "step": 30 }, { "epoch": 13.78, "learning_rate": 3.8333333333333334e-05, "loss": 0.0974, "step": 31 }, { "epoch": 13.78, "eval_accuracy": 1.0, "eval_loss": 0.004605439025908709, "eval_runtime": 0.2648, "eval_samples_per_second": 203.94, "eval_steps_per_second": 3.777, "step": 31 }, { "epoch": 14.22, "learning_rate": 3.777777777777778e-05, "loss": 0.0738, "step": 32 }, { "epoch": 14.67, "learning_rate": 3.722222222222222e-05, "loss": 0.1077, "step": 33 }, { "epoch": 14.67, "eval_accuracy": 1.0, "eval_loss": 0.005409094505012035, "eval_runtime": 0.2623, "eval_samples_per_second": 205.903, "eval_steps_per_second": 3.813, "step": 33 }, { "epoch": 15.11, "learning_rate": 3.6666666666666666e-05, "loss": 0.0845, "step": 34 }, { "epoch": 15.56, "learning_rate": 3.611111111111111e-05, "loss": 0.1003, "step": 35 }, { "epoch": 16.0, "learning_rate": 3.555555555555556e-05, "loss": 0.0742, "step": 36 }, { "epoch": 16.0, "eval_accuracy": 1.0, "eval_loss": 0.003965036477893591, "eval_runtime": 0.2673, "eval_samples_per_second": 202.006, "eval_steps_per_second": 3.741, "step": 36 }, { "epoch": 16.44, "learning_rate": 3.5e-05, "loss": 0.0894, "step": 37 }, { "epoch": 16.89, "learning_rate": 3.444444444444445e-05, "loss": 0.1468, "step": 38 }, { "epoch": 16.89, "eval_accuracy": 1.0, "eval_loss": 0.0029907028656452894, "eval_runtime": 0.2635, "eval_samples_per_second": 204.948, "eval_steps_per_second": 3.795, "step": 38 }, { "epoch": 17.33, "learning_rate": 3.388888888888889e-05, "loss": 0.0892, "step": 39 }, { "epoch": 17.78, "learning_rate": 3.3333333333333335e-05, "loss": 0.077, "step": 40 }, { "epoch": 17.78, "eval_accuracy": 1.0, "eval_loss": 0.004134837072342634, "eval_runtime": 0.2684, "eval_samples_per_second": 201.225, "eval_steps_per_second": 3.726, "step": 40 }, { "epoch": 18.22, "learning_rate": 3.277777777777778e-05, "loss": 0.0557, "step": 41 }, { "epoch": 18.67, "learning_rate": 3.222222222222223e-05, "loss": 0.0907, "step": 42 }, { "epoch": 18.67, "eval_accuracy": 1.0, "eval_loss": 0.010868255980312824, "eval_runtime": 0.2644, "eval_samples_per_second": 204.245, "eval_steps_per_second": 3.782, "step": 42 }, { "epoch": 19.11, "learning_rate": 3.1666666666666666e-05, "loss": 0.0606, "step": 43 }, { "epoch": 19.56, "learning_rate": 3.111111111111111e-05, "loss": 0.0898, "step": 44 }, { "epoch": 20.0, "learning_rate": 3.055555555555556e-05, "loss": 0.0363, "step": 45 }, { "epoch": 20.0, "eval_accuracy": 1.0, "eval_loss": 0.0022966607939451933, "eval_runtime": 0.2667, "eval_samples_per_second": 202.454, "eval_steps_per_second": 3.749, "step": 45 }, { "epoch": 20.44, "learning_rate": 3e-05, "loss": 0.0661, "step": 46 }, { "epoch": 20.89, "learning_rate": 2.9444444444444448e-05, "loss": 0.0519, "step": 47 }, { "epoch": 20.89, "eval_accuracy": 1.0, "eval_loss": 0.0016263651195913553, "eval_runtime": 0.2632, "eval_samples_per_second": 205.141, "eval_steps_per_second": 3.799, "step": 47 }, { "epoch": 21.33, "learning_rate": 2.8888888888888888e-05, "loss": 0.0675, "step": 48 }, { "epoch": 21.78, "learning_rate": 2.8333333333333335e-05, "loss": 0.0672, "step": 49 }, { "epoch": 21.78, "eval_accuracy": 1.0, "eval_loss": 0.0014932247577235103, "eval_runtime": 0.2829, "eval_samples_per_second": 190.887, "eval_steps_per_second": 3.535, "step": 49 }, { "epoch": 22.22, "learning_rate": 2.777777777777778e-05, "loss": 0.072, "step": 50 }, { "epoch": 22.67, "learning_rate": 2.7222222222222223e-05, "loss": 0.0894, "step": 51 }, { "epoch": 22.67, "eval_accuracy": 1.0, "eval_loss": 0.0020148803014308214, "eval_runtime": 0.2666, "eval_samples_per_second": 202.549, "eval_steps_per_second": 3.751, "step": 51 }, { "epoch": 23.11, "learning_rate": 2.6666666666666667e-05, "loss": 0.0508, "step": 52 }, { "epoch": 23.56, "learning_rate": 2.6111111111111114e-05, "loss": 0.0838, "step": 53 }, { "epoch": 24.0, "learning_rate": 2.5555555555555554e-05, "loss": 0.0267, "step": 54 }, { "epoch": 24.0, "eval_accuracy": 1.0, "eval_loss": 0.0019541929941624403, "eval_runtime": 0.2644, "eval_samples_per_second": 204.256, "eval_steps_per_second": 3.783, "step": 54 }, { "epoch": 24.44, "learning_rate": 2.5e-05, "loss": 0.0468, "step": 55 }, { "epoch": 24.89, "learning_rate": 2.4444444444444445e-05, "loss": 0.0639, "step": 56 }, { "epoch": 24.89, "eval_accuracy": 1.0, "eval_loss": 0.0019170470768585801, "eval_runtime": 0.2753, "eval_samples_per_second": 196.133, "eval_steps_per_second": 3.632, "step": 56 }, { "epoch": 25.33, "learning_rate": 2.3888888888888892e-05, "loss": 0.0601, "step": 57 }, { "epoch": 25.78, "learning_rate": 2.3333333333333336e-05, "loss": 0.0675, "step": 58 }, { "epoch": 25.78, "eval_accuracy": 1.0, "eval_loss": 0.0023251723032444715, "eval_runtime": 0.2637, "eval_samples_per_second": 204.786, "eval_steps_per_second": 3.792, "step": 58 }, { "epoch": 26.22, "learning_rate": 2.277777777777778e-05, "loss": 0.1053, "step": 59 }, { "epoch": 26.67, "learning_rate": 2.2222222222222223e-05, "loss": 0.0508, "step": 60 }, { "epoch": 26.67, "eval_accuracy": 1.0, "eval_loss": 0.0019600405357778072, "eval_runtime": 0.2632, "eval_samples_per_second": 205.143, "eval_steps_per_second": 3.799, "step": 60 }, { "epoch": 27.11, "learning_rate": 2.1666666666666667e-05, "loss": 0.0574, "step": 61 }, { "epoch": 27.56, "learning_rate": 2.111111111111111e-05, "loss": 0.0738, "step": 62 }, { "epoch": 28.0, "learning_rate": 2.0555555555555555e-05, "loss": 0.0509, "step": 63 }, { "epoch": 28.0, "eval_accuracy": 1.0, "eval_loss": 0.0014345721574500203, "eval_runtime": 0.264, "eval_samples_per_second": 204.556, "eval_steps_per_second": 3.788, "step": 63 }, { "epoch": 28.44, "learning_rate": 2e-05, "loss": 0.073, "step": 64 }, { "epoch": 28.89, "learning_rate": 1.9444444444444445e-05, "loss": 0.0573, "step": 65 }, { "epoch": 28.89, "eval_accuracy": 1.0, "eval_loss": 0.0018337038345634937, "eval_runtime": 0.2658, "eval_samples_per_second": 203.143, "eval_steps_per_second": 3.762, "step": 65 }, { "epoch": 29.33, "learning_rate": 1.888888888888889e-05, "loss": 0.0369, "step": 66 }, { "epoch": 29.78, "learning_rate": 1.8333333333333333e-05, "loss": 0.0584, "step": 67 }, { "epoch": 29.78, "eval_accuracy": 1.0, "eval_loss": 0.0013740634312853217, "eval_runtime": 0.2689, "eval_samples_per_second": 200.843, "eval_steps_per_second": 3.719, "step": 67 }, { "epoch": 30.22, "learning_rate": 1.777777777777778e-05, "loss": 0.1127, "step": 68 }, { "epoch": 30.67, "learning_rate": 1.7222222222222224e-05, "loss": 0.0657, "step": 69 }, { "epoch": 30.67, "eval_accuracy": 1.0, "eval_loss": 0.0011925003491342068, "eval_runtime": 0.2655, "eval_samples_per_second": 203.376, "eval_steps_per_second": 3.766, "step": 69 }, { "epoch": 31.11, "learning_rate": 1.6666666666666667e-05, "loss": 0.0286, "step": 70 }, { "epoch": 31.56, "learning_rate": 1.6111111111111115e-05, "loss": 0.0655, "step": 71 }, { "epoch": 32.0, "learning_rate": 1.5555555555555555e-05, "loss": 0.0635, "step": 72 }, { "epoch": 32.0, "eval_accuracy": 1.0, "eval_loss": 0.0008685937500558794, "eval_runtime": 0.2664, "eval_samples_per_second": 202.738, "eval_steps_per_second": 3.754, "step": 72 }, { "epoch": 32.44, "learning_rate": 1.5e-05, "loss": 0.0897, "step": 73 }, { "epoch": 32.89, "learning_rate": 1.4444444444444444e-05, "loss": 0.0617, "step": 74 }, { "epoch": 32.89, "eval_accuracy": 1.0, "eval_loss": 0.000824602844659239, "eval_runtime": 0.2666, "eval_samples_per_second": 202.534, "eval_steps_per_second": 3.751, "step": 74 }, { "epoch": 33.33, "learning_rate": 1.388888888888889e-05, "loss": 0.1261, "step": 75 }, { "epoch": 33.78, "learning_rate": 1.3333333333333333e-05, "loss": 0.0614, "step": 76 }, { "epoch": 33.78, "eval_accuracy": 1.0, "eval_loss": 0.0008151546935550869, "eval_runtime": 0.2757, "eval_samples_per_second": 195.883, "eval_steps_per_second": 3.627, "step": 76 }, { "epoch": 34.22, "learning_rate": 1.2777777777777777e-05, "loss": 0.0437, "step": 77 }, { "epoch": 34.67, "learning_rate": 1.2222222222222222e-05, "loss": 0.0614, "step": 78 }, { "epoch": 34.67, "eval_accuracy": 1.0, "eval_loss": 0.0008515185909345746, "eval_runtime": 0.2867, "eval_samples_per_second": 188.348, "eval_steps_per_second": 3.488, "step": 78 }, { "epoch": 35.11, "learning_rate": 1.1666666666666668e-05, "loss": 0.4154, "step": 79 }, { "epoch": 35.56, "learning_rate": 1.1111111111111112e-05, "loss": 0.0741, "step": 80 }, { "epoch": 36.0, "learning_rate": 1.0555555555555555e-05, "loss": 0.0618, "step": 81 }, { "epoch": 36.0, "eval_accuracy": 1.0, "eval_loss": 0.0008116821409203112, "eval_runtime": 0.2688, "eval_samples_per_second": 200.888, "eval_steps_per_second": 3.72, "step": 81 }, { "epoch": 36.44, "learning_rate": 1e-05, "loss": 0.0478, "step": 82 }, { "epoch": 36.89, "learning_rate": 9.444444444444445e-06, "loss": 0.0384, "step": 83 }, { "epoch": 36.89, "eval_accuracy": 1.0, "eval_loss": 0.0007847616798244417, "eval_runtime": 0.2653, "eval_samples_per_second": 203.518, "eval_steps_per_second": 3.769, "step": 83 }, { "epoch": 37.33, "learning_rate": 8.88888888888889e-06, "loss": 0.0435, "step": 84 }, { "epoch": 37.78, "learning_rate": 8.333333333333334e-06, "loss": 0.0565, "step": 85 }, { "epoch": 37.78, "eval_accuracy": 1.0, "eval_loss": 0.0007751730154268444, "eval_runtime": 0.2683, "eval_samples_per_second": 201.275, "eval_steps_per_second": 3.727, "step": 85 }, { "epoch": 38.22, "learning_rate": 7.777777777777777e-06, "loss": 0.0791, "step": 86 }, { "epoch": 38.67, "learning_rate": 7.222222222222222e-06, "loss": 0.0784, "step": 87 }, { "epoch": 38.67, "eval_accuracy": 1.0, "eval_loss": 0.0007627953891642392, "eval_runtime": 0.2764, "eval_samples_per_second": 195.336, "eval_steps_per_second": 3.617, "step": 87 }, { "epoch": 39.11, "learning_rate": 6.666666666666667e-06, "loss": 0.0511, "step": 88 }, { "epoch": 39.56, "learning_rate": 6.111111111111111e-06, "loss": 0.0758, "step": 89 }, { "epoch": 40.0, "learning_rate": 5.555555555555556e-06, "loss": 0.0313, "step": 90 }, { "epoch": 40.0, "eval_accuracy": 1.0, "eval_loss": 0.0007455433369614184, "eval_runtime": 0.2654, "eval_samples_per_second": 203.503, "eval_steps_per_second": 3.769, "step": 90 }, { "epoch": 40.44, "learning_rate": 5e-06, "loss": 0.0486, "step": 91 }, { "epoch": 40.89, "learning_rate": 4.444444444444445e-06, "loss": 0.0496, "step": 92 }, { "epoch": 40.89, "eval_accuracy": 1.0, "eval_loss": 0.0007441618363372982, "eval_runtime": 0.262, "eval_samples_per_second": 206.068, "eval_steps_per_second": 3.816, "step": 92 }, { "epoch": 41.33, "learning_rate": 3.888888888888889e-06, "loss": 0.0519, "step": 93 }, { "epoch": 41.78, "learning_rate": 3.3333333333333333e-06, "loss": 0.0273, "step": 94 }, { "epoch": 41.78, "eval_accuracy": 1.0, "eval_loss": 0.0007541766972281039, "eval_runtime": 0.2661, "eval_samples_per_second": 202.899, "eval_steps_per_second": 3.757, "step": 94 }, { "epoch": 42.22, "learning_rate": 2.777777777777778e-06, "loss": 0.0432, "step": 95 }, { "epoch": 42.67, "learning_rate": 2.2222222222222225e-06, "loss": 0.0448, "step": 96 }, { "epoch": 42.67, "eval_accuracy": 1.0, "eval_loss": 0.0007555445190519094, "eval_runtime": 0.2674, "eval_samples_per_second": 201.916, "eval_steps_per_second": 3.739, "step": 96 }, { "epoch": 43.11, "learning_rate": 1.6666666666666667e-06, "loss": 0.0526, "step": 97 }, { "epoch": 43.56, "learning_rate": 1.1111111111111112e-06, "loss": 0.0518, "step": 98 }, { "epoch": 44.0, "learning_rate": 5.555555555555556e-07, "loss": 0.0948, "step": 99 }, { "epoch": 44.0, "eval_accuracy": 1.0, "eval_loss": 0.0007408310775645077, "eval_runtime": 0.2652, "eval_samples_per_second": 203.585, "eval_steps_per_second": 3.77, "step": 99 }, { "epoch": 44.44, "learning_rate": 0.0, "loss": 0.0371, "step": 100 }, { "epoch": 44.44, "eval_accuracy": 1.0, "eval_loss": 0.0007394535932689905, "eval_runtime": 0.2915, "eval_samples_per_second": 185.274, "eval_steps_per_second": 3.431, "step": 100 }, { "epoch": 44.44, "step": 100, "total_flos": 1.6586385457107272e+18, "train_loss": 0.1560394330881536, "train_runtime": 688.8603, "train_samples_per_second": 34.913, "train_steps_per_second": 0.145 } ], "max_steps": 100, "num_train_epochs": 50, "total_flos": 1.6586385457107272e+18, "trial_name": null, "trial_params": null }