{ "best_metric": 0.7540983606557377, "best_model_checkpoint": "vit-base-patch16-224-R1-40\\checkpoint-994", "epoch": 39.73856209150327, "eval_steps": 500, "global_step": 1520, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.26, "learning_rate": 7.2368421052631575e-06, "loss": 1.3841, "step": 10 }, { "epoch": 0.52, "learning_rate": 1.4473684210526315e-05, "loss": 1.3644, "step": 20 }, { "epoch": 0.78, "learning_rate": 2.1710526315789474e-05, "loss": 1.3233, "step": 30 }, { "epoch": 0.99, "eval_accuracy": 0.5573770491803278, "eval_loss": 1.2354576587677002, "eval_runtime": 1.2109, "eval_samples_per_second": 50.376, "eval_steps_per_second": 1.652, "step": 38 }, { "epoch": 1.05, "learning_rate": 2.894736842105263e-05, "loss": 1.2568, "step": 40 }, { "epoch": 1.31, "learning_rate": 3.618421052631579e-05, "loss": 1.1698, "step": 50 }, { "epoch": 1.57, "learning_rate": 4.342105263157895e-05, "loss": 1.0595, "step": 60 }, { "epoch": 1.83, "learning_rate": 5.0657894736842104e-05, "loss": 0.8643, "step": 70 }, { "epoch": 1.99, "eval_accuracy": 0.5901639344262295, "eval_loss": 0.9296518564224243, "eval_runtime": 0.9419, "eval_samples_per_second": 64.761, "eval_steps_per_second": 2.123, "step": 76 }, { "epoch": 2.09, "learning_rate": 5.484764542936288e-05, "loss": 0.7065, "step": 80 }, { "epoch": 2.35, "learning_rate": 5.446675900277008e-05, "loss": 0.5891, "step": 90 }, { "epoch": 2.61, "learning_rate": 5.408587257617729e-05, "loss": 0.4685, "step": 100 }, { "epoch": 2.88, "learning_rate": 5.370498614958449e-05, "loss": 0.4464, "step": 110 }, { "epoch": 2.98, "eval_accuracy": 0.639344262295082, "eval_loss": 1.1189780235290527, "eval_runtime": 0.9517, "eval_samples_per_second": 64.093, "eval_steps_per_second": 2.101, "step": 114 }, { "epoch": 3.14, "learning_rate": 5.332409972299169e-05, "loss": 0.3343, "step": 120 }, { "epoch": 3.4, "learning_rate": 5.294321329639889e-05, "loss": 0.2516, "step": 130 }, { "epoch": 3.66, "learning_rate": 5.256232686980609e-05, "loss": 0.2748, "step": 140 }, { "epoch": 3.92, "learning_rate": 5.21814404432133e-05, "loss": 0.3092, "step": 150 }, { "epoch": 4.0, "eval_accuracy": 0.7049180327868853, "eval_loss": 0.986064612865448, "eval_runtime": 0.9785, "eval_samples_per_second": 62.339, "eval_steps_per_second": 2.044, "step": 153 }, { "epoch": 4.18, "learning_rate": 5.18005540166205e-05, "loss": 0.1909, "step": 160 }, { "epoch": 4.44, "learning_rate": 5.14196675900277e-05, "loss": 0.2265, "step": 170 }, { "epoch": 4.71, "learning_rate": 5.1038781163434903e-05, "loss": 0.1834, "step": 180 }, { "epoch": 4.97, "learning_rate": 5.0657894736842104e-05, "loss": 0.1628, "step": 190 }, { "epoch": 4.99, "eval_accuracy": 0.6721311475409836, "eval_loss": 1.1220643520355225, "eval_runtime": 0.9649, "eval_samples_per_second": 63.216, "eval_steps_per_second": 2.073, "step": 191 }, { "epoch": 5.23, "learning_rate": 5.027700831024931e-05, "loss": 0.1763, "step": 200 }, { "epoch": 5.49, "learning_rate": 4.989612188365651e-05, "loss": 0.13, "step": 210 }, { "epoch": 5.75, "learning_rate": 4.9515235457063714e-05, "loss": 0.121, "step": 220 }, { "epoch": 5.99, "eval_accuracy": 0.6885245901639344, "eval_loss": 1.170951247215271, "eval_runtime": 0.977, "eval_samples_per_second": 62.435, "eval_steps_per_second": 2.047, "step": 229 }, { "epoch": 6.01, "learning_rate": 4.9134349030470915e-05, "loss": 0.1121, "step": 230 }, { "epoch": 6.27, "learning_rate": 4.8753462603878116e-05, "loss": 0.1008, "step": 240 }, { "epoch": 6.54, "learning_rate": 4.837257617728532e-05, "loss": 0.12, "step": 250 }, { "epoch": 6.8, "learning_rate": 4.7991689750692524e-05, "loss": 0.1138, "step": 260 }, { "epoch": 6.98, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.199324607849121, "eval_runtime": 0.9471, "eval_samples_per_second": 64.409, "eval_steps_per_second": 2.112, "step": 267 }, { "epoch": 7.06, "learning_rate": 4.7610803324099725e-05, "loss": 0.1016, "step": 270 }, { "epoch": 7.32, "learning_rate": 4.7229916897506926e-05, "loss": 0.0953, "step": 280 }, { "epoch": 7.58, "learning_rate": 4.684903047091413e-05, "loss": 0.1073, "step": 290 }, { "epoch": 7.84, "learning_rate": 4.6468144044321335e-05, "loss": 0.1124, "step": 300 }, { "epoch": 8.0, "eval_accuracy": 0.6885245901639344, "eval_loss": 1.2635865211486816, "eval_runtime": 0.9388, "eval_samples_per_second": 64.974, "eval_steps_per_second": 2.13, "step": 306 }, { "epoch": 8.1, "learning_rate": 4.6087257617728535e-05, "loss": 0.0796, "step": 310 }, { "epoch": 8.37, "learning_rate": 4.570637119113573e-05, "loss": 0.0944, "step": 320 }, { "epoch": 8.63, "learning_rate": 4.532548476454294e-05, "loss": 0.0701, "step": 330 }, { "epoch": 8.89, "learning_rate": 4.494459833795014e-05, "loss": 0.0748, "step": 340 }, { "epoch": 8.99, "eval_accuracy": 0.7049180327868853, "eval_loss": 1.388106346130371, "eval_runtime": 0.9485, "eval_samples_per_second": 64.314, "eval_steps_per_second": 2.109, "step": 344 }, { "epoch": 9.15, "learning_rate": 4.4563711911357346e-05, "loss": 0.0708, "step": 350 }, { "epoch": 9.41, "learning_rate": 4.418282548476455e-05, "loss": 0.1, "step": 360 }, { "epoch": 9.67, "learning_rate": 4.380193905817174e-05, "loss": 0.0566, "step": 370 }, { "epoch": 9.93, "learning_rate": 4.342105263157895e-05, "loss": 0.0877, "step": 380 }, { "epoch": 9.99, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.2892450094223022, "eval_runtime": 0.9419, "eval_samples_per_second": 64.76, "eval_steps_per_second": 2.123, "step": 382 }, { "epoch": 10.2, "learning_rate": 4.304016620498615e-05, "loss": 0.0929, "step": 390 }, { "epoch": 10.46, "learning_rate": 4.265927977839336e-05, "loss": 0.1008, "step": 400 }, { "epoch": 10.72, "learning_rate": 4.227839335180056e-05, "loss": 0.0762, "step": 410 }, { "epoch": 10.98, "learning_rate": 4.189750692520776e-05, "loss": 0.0642, "step": 420 }, { "epoch": 10.98, "eval_accuracy": 0.7049180327868853, "eval_loss": 1.3759475946426392, "eval_runtime": 0.9604, "eval_samples_per_second": 63.518, "eval_steps_per_second": 2.083, "step": 420 }, { "epoch": 11.24, "learning_rate": 4.151662049861496e-05, "loss": 0.0588, "step": 430 }, { "epoch": 11.5, "learning_rate": 4.113573407202216e-05, "loss": 0.0582, "step": 440 }, { "epoch": 11.76, "learning_rate": 4.075484764542937e-05, "loss": 0.0675, "step": 450 }, { "epoch": 12.0, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.4282864332199097, "eval_runtime": 0.9099, "eval_samples_per_second": 67.038, "eval_steps_per_second": 2.198, "step": 459 }, { "epoch": 12.03, "learning_rate": 4.037396121883656e-05, "loss": 0.066, "step": 460 }, { "epoch": 12.29, "learning_rate": 3.999307479224377e-05, "loss": 0.0565, "step": 470 }, { "epoch": 12.55, "learning_rate": 3.961218836565097e-05, "loss": 0.0619, "step": 480 }, { "epoch": 12.81, "learning_rate": 3.923130193905817e-05, "loss": 0.0694, "step": 490 }, { "epoch": 12.99, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.361587643623352, "eval_runtime": 0.9412, "eval_samples_per_second": 64.813, "eval_steps_per_second": 2.125, "step": 497 }, { "epoch": 13.07, "learning_rate": 3.885041551246538e-05, "loss": 0.074, "step": 500 }, { "epoch": 13.33, "learning_rate": 3.8469529085872574e-05, "loss": 0.0706, "step": 510 }, { "epoch": 13.59, "learning_rate": 3.808864265927978e-05, "loss": 0.0495, "step": 520 }, { "epoch": 13.86, "learning_rate": 3.770775623268698e-05, "loss": 0.0689, "step": 530 }, { "epoch": 13.99, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.3863976001739502, "eval_runtime": 1.0042, "eval_samples_per_second": 60.748, "eval_steps_per_second": 1.992, "step": 535 }, { "epoch": 14.12, "learning_rate": 3.732686980609418e-05, "loss": 0.049, "step": 540 }, { "epoch": 14.38, "learning_rate": 3.694598337950139e-05, "loss": 0.0489, "step": 550 }, { "epoch": 14.64, "learning_rate": 3.6565096952908585e-05, "loss": 0.0458, "step": 560 }, { "epoch": 14.9, "learning_rate": 3.618421052631579e-05, "loss": 0.0378, "step": 570 }, { "epoch": 14.98, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.4321750402450562, "eval_runtime": 0.9754, "eval_samples_per_second": 62.537, "eval_steps_per_second": 2.05, "step": 573 }, { "epoch": 15.16, "learning_rate": 3.5803324099722994e-05, "loss": 0.0311, "step": 580 }, { "epoch": 15.42, "learning_rate": 3.5422437673130195e-05, "loss": 0.0626, "step": 590 }, { "epoch": 15.69, "learning_rate": 3.5041551246537395e-05, "loss": 0.0515, "step": 600 }, { "epoch": 15.95, "learning_rate": 3.4660664819944596e-05, "loss": 0.0472, "step": 610 }, { "epoch": 16.0, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.600350260734558, "eval_runtime": 0.907, "eval_samples_per_second": 67.252, "eval_steps_per_second": 2.205, "step": 612 }, { "epoch": 16.21, "learning_rate": 3.4279778393351804e-05, "loss": 0.0378, "step": 620 }, { "epoch": 16.47, "learning_rate": 3.3898891966759005e-05, "loss": 0.0345, "step": 630 }, { "epoch": 16.73, "learning_rate": 3.3518005540166206e-05, "loss": 0.0591, "step": 640 }, { "epoch": 16.99, "learning_rate": 3.313711911357341e-05, "loss": 0.044, "step": 650 }, { "epoch": 16.99, "eval_accuracy": 0.7049180327868853, "eval_loss": 1.5810414552688599, "eval_runtime": 0.9357, "eval_samples_per_second": 65.194, "eval_steps_per_second": 2.138, "step": 650 }, { "epoch": 17.25, "learning_rate": 3.275623268698061e-05, "loss": 0.0495, "step": 660 }, { "epoch": 17.52, "learning_rate": 3.2375346260387815e-05, "loss": 0.0409, "step": 670 }, { "epoch": 17.78, "learning_rate": 3.1994459833795016e-05, "loss": 0.0386, "step": 680 }, { "epoch": 17.99, "eval_accuracy": 0.6885245901639344, "eval_loss": 1.640351414680481, "eval_runtime": 0.9225, "eval_samples_per_second": 66.127, "eval_steps_per_second": 2.168, "step": 688 }, { "epoch": 18.04, "learning_rate": 3.161357340720222e-05, "loss": 0.0323, "step": 690 }, { "epoch": 18.3, "learning_rate": 3.123268698060942e-05, "loss": 0.0399, "step": 700 }, { "epoch": 18.56, "learning_rate": 3.085180055401662e-05, "loss": 0.0475, "step": 710 }, { "epoch": 18.82, "learning_rate": 3.0470914127423827e-05, "loss": 0.0341, "step": 720 }, { "epoch": 18.98, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.5698273181915283, "eval_runtime": 0.9437, "eval_samples_per_second": 64.641, "eval_steps_per_second": 2.119, "step": 726 }, { "epoch": 19.08, "learning_rate": 3.0090027700831024e-05, "loss": 0.0457, "step": 730 }, { "epoch": 19.35, "learning_rate": 2.9709141274238225e-05, "loss": 0.0337, "step": 740 }, { "epoch": 19.61, "learning_rate": 2.9328254847645433e-05, "loss": 0.0291, "step": 750 }, { "epoch": 19.87, "learning_rate": 2.894736842105263e-05, "loss": 0.0328, "step": 760 }, { "epoch": 20.0, "eval_accuracy": 0.6885245901639344, "eval_loss": 1.6720421314239502, "eval_runtime": 0.925, "eval_samples_per_second": 65.945, "eval_steps_per_second": 2.162, "step": 765 }, { "epoch": 20.13, "learning_rate": 2.8566481994459838e-05, "loss": 0.0226, "step": 770 }, { "epoch": 20.39, "learning_rate": 2.8185595567867035e-05, "loss": 0.027, "step": 780 }, { "epoch": 20.65, "learning_rate": 2.7804709141274236e-05, "loss": 0.0241, "step": 790 }, { "epoch": 20.92, "learning_rate": 2.742382271468144e-05, "loss": 0.0444, "step": 800 }, { "epoch": 20.99, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.6269422769546509, "eval_runtime": 0.9772, "eval_samples_per_second": 62.423, "eval_steps_per_second": 2.047, "step": 803 }, { "epoch": 21.18, "learning_rate": 2.7042936288088645e-05, "loss": 0.0312, "step": 810 }, { "epoch": 21.44, "learning_rate": 2.6662049861495846e-05, "loss": 0.0254, "step": 820 }, { "epoch": 21.7, "learning_rate": 2.6281163434903047e-05, "loss": 0.0239, "step": 830 }, { "epoch": 21.96, "learning_rate": 2.590027700831025e-05, "loss": 0.0342, "step": 840 }, { "epoch": 21.99, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.6344714164733887, "eval_runtime": 0.938, "eval_samples_per_second": 65.032, "eval_steps_per_second": 2.132, "step": 841 }, { "epoch": 22.22, "learning_rate": 2.5519390581717452e-05, "loss": 0.0203, "step": 850 }, { "epoch": 22.48, "learning_rate": 2.5138504155124656e-05, "loss": 0.0249, "step": 860 }, { "epoch": 22.75, "learning_rate": 2.4757617728531857e-05, "loss": 0.0324, "step": 870 }, { "epoch": 22.98, "eval_accuracy": 0.7049180327868853, "eval_loss": 1.7916051149368286, "eval_runtime": 0.9514, "eval_samples_per_second": 64.117, "eval_steps_per_second": 2.102, "step": 879 }, { "epoch": 23.01, "learning_rate": 2.4376731301939058e-05, "loss": 0.0287, "step": 880 }, { "epoch": 23.27, "learning_rate": 2.3995844875346262e-05, "loss": 0.0389, "step": 890 }, { "epoch": 23.53, "learning_rate": 2.3614958448753463e-05, "loss": 0.0333, "step": 900 }, { "epoch": 23.79, "learning_rate": 2.3234072022160667e-05, "loss": 0.023, "step": 910 }, { "epoch": 24.0, "eval_accuracy": 0.6885245901639344, "eval_loss": 1.8753165006637573, "eval_runtime": 0.9604, "eval_samples_per_second": 63.517, "eval_steps_per_second": 2.083, "step": 918 }, { "epoch": 24.05, "learning_rate": 2.2853185595567865e-05, "loss": 0.0264, "step": 920 }, { "epoch": 24.31, "learning_rate": 2.247229916897507e-05, "loss": 0.0305, "step": 930 }, { "epoch": 24.58, "learning_rate": 2.2091412742382273e-05, "loss": 0.0265, "step": 940 }, { "epoch": 24.84, "learning_rate": 2.1710526315789474e-05, "loss": 0.048, "step": 950 }, { "epoch": 24.99, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.767918586730957, "eval_runtime": 0.9362, "eval_samples_per_second": 65.159, "eval_steps_per_second": 2.136, "step": 956 }, { "epoch": 25.1, "learning_rate": 2.132963988919668e-05, "loss": 0.0378, "step": 960 }, { "epoch": 25.36, "learning_rate": 2.094875346260388e-05, "loss": 0.0326, "step": 970 }, { "epoch": 25.62, "learning_rate": 2.056786703601108e-05, "loss": 0.0228, "step": 980 }, { "epoch": 25.88, "learning_rate": 2.018698060941828e-05, "loss": 0.0202, "step": 990 }, { "epoch": 25.99, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.7211973667144775, "eval_runtime": 0.9572, "eval_samples_per_second": 63.731, "eval_steps_per_second": 2.09, "step": 994 }, { "epoch": 26.14, "learning_rate": 1.9806094182825486e-05, "loss": 0.023, "step": 1000 }, { "epoch": 26.41, "learning_rate": 1.942520775623269e-05, "loss": 0.0152, "step": 1010 }, { "epoch": 26.67, "learning_rate": 1.904432132963989e-05, "loss": 0.0267, "step": 1020 }, { "epoch": 26.93, "learning_rate": 1.866343490304709e-05, "loss": 0.0336, "step": 1030 }, { "epoch": 26.98, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.7305350303649902, "eval_runtime": 0.9424, "eval_samples_per_second": 64.731, "eval_steps_per_second": 2.122, "step": 1032 }, { "epoch": 27.19, "learning_rate": 1.8282548476454293e-05, "loss": 0.0205, "step": 1040 }, { "epoch": 27.45, "learning_rate": 1.7901662049861497e-05, "loss": 0.0316, "step": 1050 }, { "epoch": 27.71, "learning_rate": 1.7520775623268698e-05, "loss": 0.023, "step": 1060 }, { "epoch": 27.97, "learning_rate": 1.7139889196675902e-05, "loss": 0.0163, "step": 1070 }, { "epoch": 28.0, "eval_accuracy": 0.7049180327868853, "eval_loss": 1.7576478719711304, "eval_runtime": 1.0009, "eval_samples_per_second": 60.943, "eval_steps_per_second": 1.998, "step": 1071 }, { "epoch": 28.24, "learning_rate": 1.6759002770083103e-05, "loss": 0.0183, "step": 1080 }, { "epoch": 28.5, "learning_rate": 1.6378116343490304e-05, "loss": 0.0432, "step": 1090 }, { "epoch": 28.76, "learning_rate": 1.5997229916897508e-05, "loss": 0.0186, "step": 1100 }, { "epoch": 28.99, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.7540286779403687, "eval_runtime": 0.9541, "eval_samples_per_second": 63.935, "eval_steps_per_second": 2.096, "step": 1109 }, { "epoch": 29.02, "learning_rate": 1.561634349030471e-05, "loss": 0.0181, "step": 1110 }, { "epoch": 29.28, "learning_rate": 1.5235457063711913e-05, "loss": 0.0186, "step": 1120 }, { "epoch": 29.54, "learning_rate": 1.4854570637119112e-05, "loss": 0.0112, "step": 1130 }, { "epoch": 29.8, "learning_rate": 1.4473684210526315e-05, "loss": 0.0189, "step": 1140 }, { "epoch": 29.99, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.6593692302703857, "eval_runtime": 0.9808, "eval_samples_per_second": 62.194, "eval_steps_per_second": 2.039, "step": 1147 }, { "epoch": 30.07, "learning_rate": 1.4092797783933518e-05, "loss": 0.0205, "step": 1150 }, { "epoch": 30.33, "learning_rate": 1.371191135734072e-05, "loss": 0.0135, "step": 1160 }, { "epoch": 30.59, "learning_rate": 1.3331024930747923e-05, "loss": 0.0116, "step": 1170 }, { "epoch": 30.85, "learning_rate": 1.2950138504155125e-05, "loss": 0.039, "step": 1180 }, { "epoch": 30.98, "eval_accuracy": 0.7213114754098361, "eval_loss": 1.7422717809677124, "eval_runtime": 0.9576, "eval_samples_per_second": 63.702, "eval_steps_per_second": 2.089, "step": 1185 }, { "epoch": 31.11, "learning_rate": 1.2569252077562328e-05, "loss": 0.0209, "step": 1190 }, { "epoch": 31.37, "learning_rate": 1.2188365650969529e-05, "loss": 0.0217, "step": 1200 }, { "epoch": 31.63, "learning_rate": 1.1807479224376732e-05, "loss": 0.0242, "step": 1210 }, { "epoch": 31.9, "learning_rate": 1.1426592797783932e-05, "loss": 0.0194, "step": 1220 }, { "epoch": 32.0, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.7148489952087402, "eval_runtime": 1.0439, "eval_samples_per_second": 58.435, "eval_steps_per_second": 1.916, "step": 1224 }, { "epoch": 32.16, "learning_rate": 1.1045706371191137e-05, "loss": 0.0153, "step": 1230 }, { "epoch": 32.42, "learning_rate": 1.066481994459834e-05, "loss": 0.0214, "step": 1240 }, { "epoch": 32.68, "learning_rate": 1.028393351800554e-05, "loss": 0.017, "step": 1250 }, { "epoch": 32.94, "learning_rate": 9.903047091412743e-06, "loss": 0.0205, "step": 1260 }, { "epoch": 32.99, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.6965419054031372, "eval_runtime": 0.9377, "eval_samples_per_second": 65.055, "eval_steps_per_second": 2.133, "step": 1262 }, { "epoch": 33.2, "learning_rate": 9.522160664819945e-06, "loss": 0.0069, "step": 1270 }, { "epoch": 33.46, "learning_rate": 9.141274238227146e-06, "loss": 0.0153, "step": 1280 }, { "epoch": 33.73, "learning_rate": 8.760387811634349e-06, "loss": 0.024, "step": 1290 }, { "epoch": 33.99, "learning_rate": 8.379501385041551e-06, "loss": 0.0186, "step": 1300 }, { "epoch": 33.99, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.7553398609161377, "eval_runtime": 0.9513, "eval_samples_per_second": 64.125, "eval_steps_per_second": 2.102, "step": 1300 }, { "epoch": 34.25, "learning_rate": 7.998614958448754e-06, "loss": 0.0094, "step": 1310 }, { "epoch": 34.51, "learning_rate": 7.617728531855957e-06, "loss": 0.0176, "step": 1320 }, { "epoch": 34.77, "learning_rate": 7.2368421052631575e-06, "loss": 0.0177, "step": 1330 }, { "epoch": 34.98, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.7475531101226807, "eval_runtime": 0.9569, "eval_samples_per_second": 63.746, "eval_steps_per_second": 2.09, "step": 1338 }, { "epoch": 35.03, "learning_rate": 6.85595567867036e-06, "loss": 0.0166, "step": 1340 }, { "epoch": 35.29, "learning_rate": 6.475069252077563e-06, "loss": 0.0109, "step": 1350 }, { "epoch": 35.56, "learning_rate": 6.0941828254847645e-06, "loss": 0.0134, "step": 1360 }, { "epoch": 35.82, "learning_rate": 5.713296398891966e-06, "loss": 0.0132, "step": 1370 }, { "epoch": 36.0, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.750593662261963, "eval_runtime": 0.9217, "eval_samples_per_second": 66.183, "eval_steps_per_second": 2.17, "step": 1377 }, { "epoch": 36.08, "learning_rate": 5.33240997229917e-06, "loss": 0.0117, "step": 1380 }, { "epoch": 36.34, "learning_rate": 4.951523545706371e-06, "loss": 0.0091, "step": 1390 }, { "epoch": 36.6, "learning_rate": 4.570637119113573e-06, "loss": 0.0144, "step": 1400 }, { "epoch": 36.86, "learning_rate": 4.189750692520776e-06, "loss": 0.0068, "step": 1410 }, { "epoch": 36.99, "eval_accuracy": 0.7377049180327869, "eval_loss": 1.6917140483856201, "eval_runtime": 0.9809, "eval_samples_per_second": 62.188, "eval_steps_per_second": 2.039, "step": 1415 }, { "epoch": 37.12, "learning_rate": 3.8088642659279783e-06, "loss": 0.0165, "step": 1420 }, { "epoch": 37.39, "learning_rate": 3.42797783933518e-06, "loss": 0.0159, "step": 1430 }, { "epoch": 37.65, "learning_rate": 3.0470914127423822e-06, "loss": 0.0155, "step": 1440 }, { "epoch": 37.91, "learning_rate": 2.666204986149585e-06, "loss": 0.0121, "step": 1450 }, { "epoch": 37.99, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.727645993232727, "eval_runtime": 1.0046, "eval_samples_per_second": 60.719, "eval_steps_per_second": 1.991, "step": 1453 }, { "epoch": 38.17, "learning_rate": 2.2853185595567866e-06, "loss": 0.0115, "step": 1460 }, { "epoch": 38.43, "learning_rate": 1.9044321329639892e-06, "loss": 0.0094, "step": 1470 }, { "epoch": 38.69, "learning_rate": 1.5235457063711911e-06, "loss": 0.0095, "step": 1480 }, { "epoch": 38.95, "learning_rate": 1.1426592797783933e-06, "loss": 0.0129, "step": 1490 }, { "epoch": 38.98, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.7218044996261597, "eval_runtime": 0.9303, "eval_samples_per_second": 65.571, "eval_steps_per_second": 2.15, "step": 1491 }, { "epoch": 39.22, "learning_rate": 7.617728531855956e-07, "loss": 0.0077, "step": 1500 }, { "epoch": 39.48, "learning_rate": 3.808864265927978e-07, "loss": 0.0115, "step": 1510 }, { "epoch": 39.74, "learning_rate": 0.0, "loss": 0.0067, "step": 1520 }, { "epoch": 39.74, "eval_accuracy": 0.7540983606557377, "eval_loss": 1.7220282554626465, "eval_runtime": 0.9466, "eval_samples_per_second": 64.441, "eval_steps_per_second": 2.113, "step": 1520 }, { "epoch": 39.74, "step": 1520, "total_flos": 1.5028755889037378e+19, "train_loss": 0.11905086408124159, "train_runtime": 3040.3234, "train_samples_per_second": 64.204, "train_steps_per_second": 0.5 } ], "logging_steps": 10, "max_steps": 1520, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 1.5028755889037378e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }