{ "best_metric": 1.1135263442993164, "best_model_checkpoint": "./drive/MyDrive/repositories/torch_example_image-classification/outputs2/checkpoint-720", "epoch": 120.0, "eval_steps": 500, "global_step": 720, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_accuracy": 0.08823529411764706, "eval_loss": 3.2330594062805176, "eval_runtime": 47.8321, "eval_samples_per_second": 2.132, "eval_steps_per_second": 0.021, "step": 6 }, { "epoch": 1.67, "learning_rate": 1.9722222222222224e-05, "loss": 3.2363, "step": 10 }, { "epoch": 2.0, "eval_accuracy": 0.13725490196078433, "eval_loss": 3.2025418281555176, "eval_runtime": 0.6721, "eval_samples_per_second": 151.767, "eval_steps_per_second": 1.488, "step": 12 }, { "epoch": 3.0, "eval_accuracy": 0.18627450980392157, "eval_loss": 3.176102876663208, "eval_runtime": 0.7424, "eval_samples_per_second": 137.386, "eval_steps_per_second": 1.347, "step": 18 }, { "epoch": 3.33, "learning_rate": 1.9444444444444445e-05, "loss": 3.1622, "step": 20 }, { "epoch": 4.0, "eval_accuracy": 0.22549019607843138, "eval_loss": 3.1237781047821045, "eval_runtime": 0.911, "eval_samples_per_second": 111.961, "eval_steps_per_second": 1.098, "step": 24 }, { "epoch": 5.0, "learning_rate": 1.916666666666667e-05, "loss": 3.0918, "step": 30 }, { "epoch": 5.0, "eval_accuracy": 0.3137254901960784, "eval_loss": 3.078857183456421, "eval_runtime": 0.8136, "eval_samples_per_second": 125.367, "eval_steps_per_second": 1.229, "step": 30 }, { "epoch": 6.0, "eval_accuracy": 0.3235294117647059, "eval_loss": 3.028043508529663, "eval_runtime": 0.7696, "eval_samples_per_second": 132.528, "eval_steps_per_second": 1.299, "step": 36 }, { "epoch": 6.67, "learning_rate": 1.888888888888889e-05, "loss": 3.0081, "step": 40 }, { "epoch": 7.0, "eval_accuracy": 0.3431372549019608, "eval_loss": 2.9877936840057373, "eval_runtime": 0.695, "eval_samples_per_second": 146.754, "eval_steps_per_second": 1.439, "step": 42 }, { "epoch": 8.0, "eval_accuracy": 0.38235294117647056, "eval_loss": 2.931645393371582, "eval_runtime": 0.8566, "eval_samples_per_second": 119.07, "eval_steps_per_second": 1.167, "step": 48 }, { "epoch": 8.33, "learning_rate": 1.8611111111111114e-05, "loss": 2.9118, "step": 50 }, { "epoch": 9.0, "eval_accuracy": 0.43137254901960786, "eval_loss": 2.8864240646362305, "eval_runtime": 0.7935, "eval_samples_per_second": 128.548, "eval_steps_per_second": 1.26, "step": 54 }, { "epoch": 10.0, "learning_rate": 1.8333333333333333e-05, "loss": 2.8231, "step": 60 }, { "epoch": 10.0, "eval_accuracy": 0.45098039215686275, "eval_loss": 2.831418752670288, "eval_runtime": 0.7311, "eval_samples_per_second": 139.516, "eval_steps_per_second": 1.368, "step": 60 }, { "epoch": 11.0, "eval_accuracy": 0.5196078431372549, "eval_loss": 2.781670331954956, "eval_runtime": 1.0842, "eval_samples_per_second": 94.082, "eval_steps_per_second": 0.922, "step": 66 }, { "epoch": 11.67, "learning_rate": 1.8055555555555558e-05, "loss": 2.7149, "step": 70 }, { "epoch": 12.0, "eval_accuracy": 0.5196078431372549, "eval_loss": 2.7278239727020264, "eval_runtime": 0.6684, "eval_samples_per_second": 152.601, "eval_steps_per_second": 1.496, "step": 72 }, { "epoch": 13.0, "eval_accuracy": 0.5588235294117647, "eval_loss": 2.679572343826294, "eval_runtime": 0.6624, "eval_samples_per_second": 153.989, "eval_steps_per_second": 1.51, "step": 78 }, { "epoch": 13.33, "learning_rate": 1.7777777777777777e-05, "loss": 2.6202, "step": 80 }, { "epoch": 14.0, "eval_accuracy": 0.5882352941176471, "eval_loss": 2.6203176975250244, "eval_runtime": 0.7886, "eval_samples_per_second": 129.335, "eval_steps_per_second": 1.268, "step": 84 }, { "epoch": 15.0, "learning_rate": 1.7500000000000002e-05, "loss": 2.5243, "step": 90 }, { "epoch": 15.0, "eval_accuracy": 0.5882352941176471, "eval_loss": 2.5673654079437256, "eval_runtime": 0.8258, "eval_samples_per_second": 123.523, "eval_steps_per_second": 1.211, "step": 90 }, { "epoch": 16.0, "eval_accuracy": 0.6078431372549019, "eval_loss": 2.5169873237609863, "eval_runtime": 0.8815, "eval_samples_per_second": 115.707, "eval_steps_per_second": 1.134, "step": 96 }, { "epoch": 16.67, "learning_rate": 1.7222222222222224e-05, "loss": 2.4279, "step": 100 }, { "epoch": 17.0, "eval_accuracy": 0.6176470588235294, "eval_loss": 2.4672491550445557, "eval_runtime": 0.728, "eval_samples_per_second": 140.105, "eval_steps_per_second": 1.374, "step": 102 }, { "epoch": 18.0, "eval_accuracy": 0.5980392156862745, "eval_loss": 2.428467035293579, "eval_runtime": 1.0034, "eval_samples_per_second": 101.658, "eval_steps_per_second": 0.997, "step": 108 }, { "epoch": 18.33, "learning_rate": 1.6944444444444446e-05, "loss": 2.3404, "step": 110 }, { "epoch": 19.0, "eval_accuracy": 0.6568627450980392, "eval_loss": 2.3784313201904297, "eval_runtime": 0.9761, "eval_samples_per_second": 104.5, "eval_steps_per_second": 1.025, "step": 114 }, { "epoch": 20.0, "learning_rate": 1.6666666666666667e-05, "loss": 2.2633, "step": 120 }, { "epoch": 20.0, "eval_accuracy": 0.6470588235294118, "eval_loss": 2.3347628116607666, "eval_runtime": 1.0063, "eval_samples_per_second": 101.36, "eval_steps_per_second": 0.994, "step": 120 }, { "epoch": 21.0, "eval_accuracy": 0.6666666666666666, "eval_loss": 2.2872164249420166, "eval_runtime": 0.9826, "eval_samples_per_second": 103.811, "eval_steps_per_second": 1.018, "step": 126 }, { "epoch": 21.67, "learning_rate": 1.638888888888889e-05, "loss": 2.1838, "step": 130 }, { "epoch": 22.0, "eval_accuracy": 0.6568627450980392, "eval_loss": 2.2538723945617676, "eval_runtime": 0.7468, "eval_samples_per_second": 136.583, "eval_steps_per_second": 1.339, "step": 132 }, { "epoch": 23.0, "eval_accuracy": 0.6764705882352942, "eval_loss": 2.2231638431549072, "eval_runtime": 0.6591, "eval_samples_per_second": 154.75, "eval_steps_per_second": 1.517, "step": 138 }, { "epoch": 23.33, "learning_rate": 1.6111111111111115e-05, "loss": 2.1022, "step": 140 }, { "epoch": 24.0, "eval_accuracy": 0.6470588235294118, "eval_loss": 2.186713218688965, "eval_runtime": 0.668, "eval_samples_per_second": 152.702, "eval_steps_per_second": 1.497, "step": 144 }, { "epoch": 25.0, "learning_rate": 1.5833333333333333e-05, "loss": 2.0364, "step": 150 }, { "epoch": 25.0, "eval_accuracy": 0.6862745098039216, "eval_loss": 2.1489007472991943, "eval_runtime": 0.6659, "eval_samples_per_second": 153.174, "eval_steps_per_second": 1.502, "step": 150 }, { "epoch": 26.0, "eval_accuracy": 0.7254901960784313, "eval_loss": 2.109919309616089, "eval_runtime": 0.9945, "eval_samples_per_second": 102.565, "eval_steps_per_second": 1.006, "step": 156 }, { "epoch": 26.67, "learning_rate": 1.555555555555556e-05, "loss": 1.96, "step": 160 }, { "epoch": 27.0, "eval_accuracy": 0.7156862745098039, "eval_loss": 2.0766732692718506, "eval_runtime": 1.474, "eval_samples_per_second": 69.198, "eval_steps_per_second": 0.678, "step": 162 }, { "epoch": 28.0, "eval_accuracy": 0.7156862745098039, "eval_loss": 2.0417261123657227, "eval_runtime": 0.9315, "eval_samples_per_second": 109.504, "eval_steps_per_second": 1.074, "step": 168 }, { "epoch": 28.33, "learning_rate": 1.5277777777777777e-05, "loss": 1.9235, "step": 170 }, { "epoch": 29.0, "eval_accuracy": 0.7352941176470589, "eval_loss": 2.0162079334259033, "eval_runtime": 1.6546, "eval_samples_per_second": 61.648, "eval_steps_per_second": 0.604, "step": 174 }, { "epoch": 30.0, "learning_rate": 1.5000000000000002e-05, "loss": 1.8484, "step": 180 }, { "epoch": 30.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 1.9787482023239136, "eval_runtime": 0.9212, "eval_samples_per_second": 110.726, "eval_steps_per_second": 1.086, "step": 180 }, { "epoch": 31.0, "eval_accuracy": 0.7450980392156863, "eval_loss": 1.9547613859176636, "eval_runtime": 0.7852, "eval_samples_per_second": 129.907, "eval_steps_per_second": 1.274, "step": 186 }, { "epoch": 31.67, "learning_rate": 1.4722222222222224e-05, "loss": 1.7971, "step": 190 }, { "epoch": 32.0, "eval_accuracy": 0.7549019607843137, "eval_loss": 1.9329289197921753, "eval_runtime": 1.6855, "eval_samples_per_second": 60.517, "eval_steps_per_second": 0.593, "step": 192 }, { "epoch": 33.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.9051672220230103, "eval_runtime": 0.999, "eval_samples_per_second": 102.103, "eval_steps_per_second": 1.001, "step": 198 }, { "epoch": 33.33, "learning_rate": 1.4444444444444446e-05, "loss": 1.7409, "step": 200 }, { "epoch": 34.0, "eval_accuracy": 0.7549019607843137, "eval_loss": 1.8827149868011475, "eval_runtime": 0.6689, "eval_samples_per_second": 152.484, "eval_steps_per_second": 1.495, "step": 204 }, { "epoch": 35.0, "learning_rate": 1.416666666666667e-05, "loss": 1.7006, "step": 210 }, { "epoch": 35.0, "eval_accuracy": 0.7745098039215687, "eval_loss": 1.8589290380477905, "eval_runtime": 0.8033, "eval_samples_per_second": 126.972, "eval_steps_per_second": 1.245, "step": 210 }, { "epoch": 36.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 1.8293644189834595, "eval_runtime": 1.6485, "eval_samples_per_second": 61.875, "eval_steps_per_second": 0.607, "step": 216 }, { "epoch": 36.67, "learning_rate": 1.388888888888889e-05, "loss": 1.6426, "step": 220 }, { "epoch": 37.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 1.8097856044769287, "eval_runtime": 1.5961, "eval_samples_per_second": 63.905, "eval_steps_per_second": 0.627, "step": 222 }, { "epoch": 38.0, "eval_accuracy": 0.7647058823529411, "eval_loss": 1.7808825969696045, "eval_runtime": 0.7953, "eval_samples_per_second": 128.247, "eval_steps_per_second": 1.257, "step": 228 }, { "epoch": 38.33, "learning_rate": 1.3611111111111113e-05, "loss": 1.6102, "step": 230 }, { "epoch": 39.0, "eval_accuracy": 0.7843137254901961, "eval_loss": 1.764255404472351, "eval_runtime": 0.9811, "eval_samples_per_second": 103.969, "eval_steps_per_second": 1.019, "step": 234 }, { "epoch": 40.0, "learning_rate": 1.3333333333333333e-05, "loss": 1.5704, "step": 240 }, { "epoch": 40.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.7398592233657837, "eval_runtime": 0.9069, "eval_samples_per_second": 112.465, "eval_steps_per_second": 1.103, "step": 240 }, { "epoch": 41.0, "eval_accuracy": 0.8137254901960784, "eval_loss": 1.7192758321762085, "eval_runtime": 0.9941, "eval_samples_per_second": 102.601, "eval_steps_per_second": 1.006, "step": 246 }, { "epoch": 41.67, "learning_rate": 1.3055555555555557e-05, "loss": 1.5264, "step": 250 }, { "epoch": 42.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.6979612112045288, "eval_runtime": 0.8321, "eval_samples_per_second": 122.58, "eval_steps_per_second": 1.202, "step": 252 }, { "epoch": 43.0, "eval_accuracy": 0.803921568627451, "eval_loss": 1.6840269565582275, "eval_runtime": 0.8256, "eval_samples_per_second": 123.549, "eval_steps_per_second": 1.211, "step": 258 }, { "epoch": 43.33, "learning_rate": 1.2777777777777777e-05, "loss": 1.4821, "step": 260 }, { "epoch": 44.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.6644328832626343, "eval_runtime": 0.8752, "eval_samples_per_second": 116.547, "eval_steps_per_second": 1.143, "step": 264 }, { "epoch": 45.0, "learning_rate": 1.25e-05, "loss": 1.4506, "step": 270 }, { "epoch": 45.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.646732211112976, "eval_runtime": 0.8249, "eval_samples_per_second": 123.658, "eval_steps_per_second": 1.212, "step": 270 }, { "epoch": 46.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.6333067417144775, "eval_runtime": 0.7966, "eval_samples_per_second": 128.049, "eval_steps_per_second": 1.255, "step": 276 }, { "epoch": 46.67, "learning_rate": 1.2222222222222224e-05, "loss": 1.4358, "step": 280 }, { "epoch": 47.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.6094907522201538, "eval_runtime": 1.3349, "eval_samples_per_second": 76.412, "eval_steps_per_second": 0.749, "step": 282 }, { "epoch": 48.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.5905718803405762, "eval_runtime": 0.8441, "eval_samples_per_second": 120.832, "eval_steps_per_second": 1.185, "step": 288 }, { "epoch": 48.33, "learning_rate": 1.1944444444444444e-05, "loss": 1.3695, "step": 290 }, { "epoch": 49.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.571975827217102, "eval_runtime": 0.8382, "eval_samples_per_second": 121.692, "eval_steps_per_second": 1.193, "step": 294 }, { "epoch": 50.0, "learning_rate": 1.1666666666666668e-05, "loss": 1.367, "step": 300 }, { "epoch": 50.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.5610417127609253, "eval_runtime": 0.7505, "eval_samples_per_second": 135.913, "eval_steps_per_second": 1.332, "step": 300 }, { "epoch": 51.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.5439658164978027, "eval_runtime": 1.0176, "eval_samples_per_second": 100.232, "eval_steps_per_second": 0.983, "step": 306 }, { "epoch": 51.67, "learning_rate": 1.138888888888889e-05, "loss": 1.3299, "step": 310 }, { "epoch": 52.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.5359292030334473, "eval_runtime": 0.7662, "eval_samples_per_second": 133.117, "eval_steps_per_second": 1.305, "step": 312 }, { "epoch": 53.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.5128930807113647, "eval_runtime": 0.7573, "eval_samples_per_second": 134.684, "eval_steps_per_second": 1.32, "step": 318 }, { "epoch": 53.33, "learning_rate": 1.1111111111111113e-05, "loss": 1.2765, "step": 320 }, { "epoch": 54.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.5056854486465454, "eval_runtime": 0.8808, "eval_samples_per_second": 115.804, "eval_steps_per_second": 1.135, "step": 324 }, { "epoch": 55.0, "learning_rate": 1.0833333333333334e-05, "loss": 1.2785, "step": 330 }, { "epoch": 55.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.4866654872894287, "eval_runtime": 0.7751, "eval_samples_per_second": 131.599, "eval_steps_per_second": 1.29, "step": 330 }, { "epoch": 56.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.475063443183899, "eval_runtime": 0.7201, "eval_samples_per_second": 141.644, "eval_steps_per_second": 1.389, "step": 336 }, { "epoch": 56.67, "learning_rate": 1.0555555555555557e-05, "loss": 1.2355, "step": 340 }, { "epoch": 57.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.4553169012069702, "eval_runtime": 0.7072, "eval_samples_per_second": 144.23, "eval_steps_per_second": 1.414, "step": 342 }, { "epoch": 58.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.4491091966629028, "eval_runtime": 0.6583, "eval_samples_per_second": 154.94, "eval_steps_per_second": 1.519, "step": 348 }, { "epoch": 58.33, "learning_rate": 1.0277777777777777e-05, "loss": 1.2418, "step": 350 }, { "epoch": 59.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.428889513015747, "eval_runtime": 0.6735, "eval_samples_per_second": 151.449, "eval_steps_per_second": 1.485, "step": 354 }, { "epoch": 60.0, "learning_rate": 1e-05, "loss": 1.2058, "step": 360 }, { "epoch": 60.0, "eval_accuracy": 0.8235294117647058, "eval_loss": 1.4184516668319702, "eval_runtime": 0.8642, "eval_samples_per_second": 118.027, "eval_steps_per_second": 1.157, "step": 360 }, { "epoch": 61.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.4103975296020508, "eval_runtime": 0.6996, "eval_samples_per_second": 145.797, "eval_steps_per_second": 1.429, "step": 366 }, { "epoch": 61.67, "learning_rate": 9.722222222222223e-06, "loss": 1.164, "step": 370 }, { "epoch": 62.0, "eval_accuracy": 0.8333333333333334, "eval_loss": 1.396785855293274, "eval_runtime": 0.7378, "eval_samples_per_second": 138.245, "eval_steps_per_second": 1.355, "step": 372 }, { "epoch": 63.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.3845832347869873, "eval_runtime": 0.9059, "eval_samples_per_second": 112.599, "eval_steps_per_second": 1.104, "step": 378 }, { "epoch": 63.33, "learning_rate": 9.444444444444445e-06, "loss": 1.1529, "step": 380 }, { "epoch": 64.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.3697338104248047, "eval_runtime": 0.9585, "eval_samples_per_second": 106.42, "eval_steps_per_second": 1.043, "step": 384 }, { "epoch": 65.0, "learning_rate": 9.166666666666666e-06, "loss": 1.1408, "step": 390 }, { "epoch": 65.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.36329185962677, "eval_runtime": 0.8799, "eval_samples_per_second": 115.918, "eval_steps_per_second": 1.136, "step": 390 }, { "epoch": 66.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.3505079746246338, "eval_runtime": 0.8666, "eval_samples_per_second": 117.702, "eval_steps_per_second": 1.154, "step": 396 }, { "epoch": 66.67, "learning_rate": 8.888888888888888e-06, "loss": 1.1102, "step": 400 }, { "epoch": 67.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.3371140956878662, "eval_runtime": 0.6813, "eval_samples_per_second": 149.711, "eval_steps_per_second": 1.468, "step": 402 }, { "epoch": 68.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.328187108039856, "eval_runtime": 0.9696, "eval_samples_per_second": 105.199, "eval_steps_per_second": 1.031, "step": 408 }, { "epoch": 68.33, "learning_rate": 8.611111111111112e-06, "loss": 1.0906, "step": 410 }, { "epoch": 69.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.3239920139312744, "eval_runtime": 0.9604, "eval_samples_per_second": 106.207, "eval_steps_per_second": 1.041, "step": 414 }, { "epoch": 70.0, "learning_rate": 8.333333333333334e-06, "loss": 1.0759, "step": 420 }, { "epoch": 70.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.3162660598754883, "eval_runtime": 0.7343, "eval_samples_per_second": 138.913, "eval_steps_per_second": 1.362, "step": 420 }, { "epoch": 71.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.3044177293777466, "eval_runtime": 0.9811, "eval_samples_per_second": 103.966, "eval_steps_per_second": 1.019, "step": 426 }, { "epoch": 71.67, "learning_rate": 8.055555555555557e-06, "loss": 1.0651, "step": 430 }, { "epoch": 72.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.2924034595489502, "eval_runtime": 0.9153, "eval_samples_per_second": 111.445, "eval_steps_per_second": 1.093, "step": 432 }, { "epoch": 73.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2867342233657837, "eval_runtime": 0.9693, "eval_samples_per_second": 105.231, "eval_steps_per_second": 1.032, "step": 438 }, { "epoch": 73.33, "learning_rate": 7.77777777777778e-06, "loss": 1.0501, "step": 440 }, { "epoch": 74.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2749128341674805, "eval_runtime": 1.1764, "eval_samples_per_second": 86.703, "eval_steps_per_second": 0.85, "step": 444 }, { "epoch": 75.0, "learning_rate": 7.500000000000001e-06, "loss": 1.0238, "step": 450 }, { "epoch": 75.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.2687901258468628, "eval_runtime": 0.6685, "eval_samples_per_second": 152.571, "eval_steps_per_second": 1.496, "step": 450 }, { "epoch": 76.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2567930221557617, "eval_runtime": 0.7274, "eval_samples_per_second": 140.221, "eval_steps_per_second": 1.375, "step": 456 }, { "epoch": 76.67, "learning_rate": 7.222222222222223e-06, "loss": 1.0046, "step": 460 }, { "epoch": 77.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2502152919769287, "eval_runtime": 0.7589, "eval_samples_per_second": 134.406, "eval_steps_per_second": 1.318, "step": 462 }, { "epoch": 78.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2459862232208252, "eval_runtime": 1.3626, "eval_samples_per_second": 74.857, "eval_steps_per_second": 0.734, "step": 468 }, { "epoch": 78.33, "learning_rate": 6.944444444444445e-06, "loss": 0.9946, "step": 470 }, { "epoch": 79.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.2455214262008667, "eval_runtime": 0.7802, "eval_samples_per_second": 130.729, "eval_steps_per_second": 1.282, "step": 474 }, { "epoch": 80.0, "learning_rate": 6.666666666666667e-06, "loss": 0.9998, "step": 480 }, { "epoch": 80.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2342967987060547, "eval_runtime": 0.7842, "eval_samples_per_second": 130.063, "eval_steps_per_second": 1.275, "step": 480 }, { "epoch": 81.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2286028861999512, "eval_runtime": 0.6729, "eval_samples_per_second": 151.583, "eval_steps_per_second": 1.486, "step": 486 }, { "epoch": 81.67, "learning_rate": 6.3888888888888885e-06, "loss": 0.9709, "step": 490 }, { "epoch": 82.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.2194892168045044, "eval_runtime": 0.8526, "eval_samples_per_second": 119.634, "eval_steps_per_second": 1.173, "step": 492 }, { "epoch": 83.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.2126073837280273, "eval_runtime": 0.9257, "eval_samples_per_second": 110.192, "eval_steps_per_second": 1.08, "step": 498 }, { "epoch": 83.33, "learning_rate": 6.111111111111112e-06, "loss": 0.963, "step": 500 }, { "epoch": 84.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.2101882696151733, "eval_runtime": 1.5366, "eval_samples_per_second": 66.38, "eval_steps_per_second": 0.651, "step": 504 }, { "epoch": 85.0, "learning_rate": 5.833333333333334e-06, "loss": 0.9499, "step": 510 }, { "epoch": 85.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.2024163007736206, "eval_runtime": 1.2053, "eval_samples_per_second": 84.623, "eval_steps_per_second": 0.83, "step": 510 }, { "epoch": 86.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1980304718017578, "eval_runtime": 0.6794, "eval_samples_per_second": 150.138, "eval_steps_per_second": 1.472, "step": 516 }, { "epoch": 86.67, "learning_rate": 5.555555555555557e-06, "loss": 0.937, "step": 520 }, { "epoch": 87.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1911908388137817, "eval_runtime": 0.9503, "eval_samples_per_second": 107.339, "eval_steps_per_second": 1.052, "step": 522 }, { "epoch": 88.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.1882524490356445, "eval_runtime": 0.6719, "eval_samples_per_second": 151.805, "eval_steps_per_second": 1.488, "step": 528 }, { "epoch": 88.33, "learning_rate": 5.2777777777777785e-06, "loss": 0.9389, "step": 530 }, { "epoch": 89.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1845465898513794, "eval_runtime": 1.3029, "eval_samples_per_second": 78.288, "eval_steps_per_second": 0.768, "step": 534 }, { "epoch": 90.0, "learning_rate": 5e-06, "loss": 0.9181, "step": 540 }, { "epoch": 90.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1810888051986694, "eval_runtime": 1.3666, "eval_samples_per_second": 74.637, "eval_steps_per_second": 0.732, "step": 540 }, { "epoch": 91.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.177653193473816, "eval_runtime": 0.7195, "eval_samples_per_second": 141.761, "eval_steps_per_second": 1.39, "step": 546 }, { "epoch": 91.67, "learning_rate": 4.722222222222222e-06, "loss": 0.9219, "step": 550 }, { "epoch": 92.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1742655038833618, "eval_runtime": 0.9915, "eval_samples_per_second": 102.872, "eval_steps_per_second": 1.009, "step": 552 }, { "epoch": 93.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1674777269363403, "eval_runtime": 1.1959, "eval_samples_per_second": 85.292, "eval_steps_per_second": 0.836, "step": 558 }, { "epoch": 93.33, "learning_rate": 4.444444444444444e-06, "loss": 0.9067, "step": 560 }, { "epoch": 94.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.159796118736267, "eval_runtime": 0.6745, "eval_samples_per_second": 151.226, "eval_steps_per_second": 1.483, "step": 564 }, { "epoch": 95.0, "learning_rate": 4.166666666666667e-06, "loss": 0.9009, "step": 570 }, { "epoch": 95.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1600637435913086, "eval_runtime": 0.9647, "eval_samples_per_second": 105.735, "eval_steps_per_second": 1.037, "step": 570 }, { "epoch": 96.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.156423807144165, "eval_runtime": 0.7689, "eval_samples_per_second": 132.666, "eval_steps_per_second": 1.301, "step": 576 }, { "epoch": 96.67, "learning_rate": 3.88888888888889e-06, "loss": 0.8914, "step": 580 }, { "epoch": 97.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.150528073310852, "eval_runtime": 0.6817, "eval_samples_per_second": 149.634, "eval_steps_per_second": 1.467, "step": 582 }, { "epoch": 98.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1487188339233398, "eval_runtime": 1.6683, "eval_samples_per_second": 61.142, "eval_steps_per_second": 0.599, "step": 588 }, { "epoch": 98.33, "learning_rate": 3.6111111111111115e-06, "loss": 0.8739, "step": 590 }, { "epoch": 99.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.147989273071289, "eval_runtime": 0.9001, "eval_samples_per_second": 113.319, "eval_steps_per_second": 1.111, "step": 594 }, { "epoch": 100.0, "learning_rate": 3.3333333333333333e-06, "loss": 0.8742, "step": 600 }, { "epoch": 100.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.141270637512207, "eval_runtime": 0.9491, "eval_samples_per_second": 107.465, "eval_steps_per_second": 1.054, "step": 600 }, { "epoch": 101.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1367518901824951, "eval_runtime": 0.6665, "eval_samples_per_second": 153.047, "eval_steps_per_second": 1.5, "step": 606 }, { "epoch": 101.67, "learning_rate": 3.055555555555556e-06, "loss": 0.8679, "step": 610 }, { "epoch": 102.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.136061668395996, "eval_runtime": 0.6609, "eval_samples_per_second": 154.346, "eval_steps_per_second": 1.513, "step": 612 }, { "epoch": 103.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1316593885421753, "eval_runtime": 0.8305, "eval_samples_per_second": 122.817, "eval_steps_per_second": 1.204, "step": 618 }, { "epoch": 103.33, "learning_rate": 2.7777777777777783e-06, "loss": 0.8516, "step": 620 }, { "epoch": 104.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1296062469482422, "eval_runtime": 0.7328, "eval_samples_per_second": 139.191, "eval_steps_per_second": 1.365, "step": 624 }, { "epoch": 105.0, "learning_rate": 2.5e-06, "loss": 0.876, "step": 630 }, { "epoch": 105.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1287850141525269, "eval_runtime": 0.8191, "eval_samples_per_second": 124.532, "eval_steps_per_second": 1.221, "step": 630 }, { "epoch": 106.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.126371145248413, "eval_runtime": 0.9539, "eval_samples_per_second": 106.934, "eval_steps_per_second": 1.048, "step": 636 }, { "epoch": 106.67, "learning_rate": 2.222222222222222e-06, "loss": 0.8591, "step": 640 }, { "epoch": 107.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1238181591033936, "eval_runtime": 1.9507, "eval_samples_per_second": 52.288, "eval_steps_per_second": 0.513, "step": 642 }, { "epoch": 108.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1227307319641113, "eval_runtime": 0.8133, "eval_samples_per_second": 125.408, "eval_steps_per_second": 1.229, "step": 648 }, { "epoch": 108.33, "learning_rate": 1.944444444444445e-06, "loss": 0.8586, "step": 650 }, { "epoch": 109.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1207623481750488, "eval_runtime": 0.9855, "eval_samples_per_second": 103.502, "eval_steps_per_second": 1.015, "step": 654 }, { "epoch": 110.0, "learning_rate": 1.6666666666666667e-06, "loss": 0.8415, "step": 660 }, { "epoch": 110.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1194456815719604, "eval_runtime": 1.6344, "eval_samples_per_second": 62.409, "eval_steps_per_second": 0.612, "step": 660 }, { "epoch": 111.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1184879541397095, "eval_runtime": 0.8861, "eval_samples_per_second": 115.105, "eval_steps_per_second": 1.128, "step": 666 }, { "epoch": 111.67, "learning_rate": 1.3888888888888892e-06, "loss": 0.8465, "step": 670 }, { "epoch": 112.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1177918910980225, "eval_runtime": 0.9577, "eval_samples_per_second": 106.503, "eval_steps_per_second": 1.044, "step": 672 }, { "epoch": 113.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1184459924697876, "eval_runtime": 1.7722, "eval_samples_per_second": 57.556, "eval_steps_per_second": 0.564, "step": 678 }, { "epoch": 113.33, "learning_rate": 1.111111111111111e-06, "loss": 0.8503, "step": 680 }, { "epoch": 114.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.1182842254638672, "eval_runtime": 0.7295, "eval_samples_per_second": 139.825, "eval_steps_per_second": 1.371, "step": 684 }, { "epoch": 115.0, "learning_rate": 8.333333333333333e-07, "loss": 0.8332, "step": 690 }, { "epoch": 115.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.1173955202102661, "eval_runtime": 0.9783, "eval_samples_per_second": 104.259, "eval_steps_per_second": 1.022, "step": 690 }, { "epoch": 116.0, "eval_accuracy": 0.8431372549019608, "eval_loss": 1.1165024042129517, "eval_runtime": 1.541, "eval_samples_per_second": 66.193, "eval_steps_per_second": 0.649, "step": 696 }, { "epoch": 116.67, "learning_rate": 5.555555555555555e-07, "loss": 0.8476, "step": 700 }, { "epoch": 117.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1152539253234863, "eval_runtime": 0.7315, "eval_samples_per_second": 139.446, "eval_steps_per_second": 1.367, "step": 702 }, { "epoch": 118.0, "eval_accuracy": 0.8529411764705882, "eval_loss": 1.1142131090164185, "eval_runtime": 1.4479, "eval_samples_per_second": 70.448, "eval_steps_per_second": 0.691, "step": 708 }, { "epoch": 118.33, "learning_rate": 2.7777777777777776e-07, "loss": 0.8382, "step": 710 }, { "epoch": 119.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1136654615402222, "eval_runtime": 1.4591, "eval_samples_per_second": 69.906, "eval_steps_per_second": 0.685, "step": 714 }, { "epoch": 120.0, "learning_rate": 0.0, "loss": 0.8527, "step": 720 }, { "epoch": 120.0, "eval_accuracy": 0.8627450980392157, "eval_loss": 1.1135263442993164, "eval_runtime": 0.7983, "eval_samples_per_second": 127.768, "eval_steps_per_second": 1.253, "step": 720 }, { "epoch": 120.0, "step": 720, "total_flos": 5.338796396322816e+18, "train_loss": 1.4664161258273654, "train_runtime": 3110.7108, "train_samples_per_second": 22.143, "train_steps_per_second": 0.231 } ], "logging_steps": 10, "max_steps": 720, "num_input_tokens_seen": 0, "num_train_epochs": 120, "save_steps": 500, "total_flos": 5.338796396322816e+18, "train_batch_size": 100, "trial_name": null, "trial_params": null }