{ "best_metric": 0.5022222222222222, "best_model_checkpoint": "ansilmbabl/cards-blt-swin-tiny-patch4-window7-224-finetuned-v2/checkpoint-337", "epoch": 99.55555555555556, "eval_steps": 500, "global_step": 5600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.18, "learning_rate": 8.928571428571428e-07, "loss": 1.415, "step": 10 }, { "epoch": 0.36, "learning_rate": 1.7857142857142857e-06, "loss": 1.4195, "step": 20 }, { "epoch": 0.53, "learning_rate": 2.6785714285714285e-06, "loss": 1.419, "step": 30 }, { "epoch": 0.71, "learning_rate": 3.5714285714285714e-06, "loss": 1.3573, "step": 40 }, { "epoch": 0.89, "learning_rate": 4.464285714285715e-06, "loss": 1.4297, "step": 50 }, { "epoch": 1.0, "eval_accuracy": 0.49333333333333335, "eval_loss": 1.1976468563079834, "eval_runtime": 6.9062, "eval_samples_per_second": 260.637, "eval_steps_per_second": 8.253, "step": 56 }, { "epoch": 1.07, "learning_rate": 5.357142857142857e-06, "loss": 1.4063, "step": 60 }, { "epoch": 1.24, "learning_rate": 6.25e-06, "loss": 1.3637, "step": 70 }, { "epoch": 1.42, "learning_rate": 7.142857142857143e-06, "loss": 1.402, "step": 80 }, { "epoch": 1.6, "learning_rate": 8.035714285714286e-06, "loss": 1.3807, "step": 90 }, { "epoch": 1.78, "learning_rate": 8.92857142857143e-06, "loss": 1.4298, "step": 100 }, { "epoch": 1.96, "learning_rate": 9.821428571428573e-06, "loss": 1.4078, "step": 110 }, { "epoch": 1.99, "eval_accuracy": 0.5011111111111111, "eval_loss": 1.196445345878601, "eval_runtime": 6.7224, "eval_samples_per_second": 267.76, "eval_steps_per_second": 8.479, "step": 112 }, { "epoch": 2.13, "learning_rate": 1.0714285714285714e-05, "loss": 1.3884, "step": 120 }, { "epoch": 2.31, "learning_rate": 1.1607142857142857e-05, "loss": 1.3689, "step": 130 }, { "epoch": 2.49, "learning_rate": 1.25e-05, "loss": 1.4214, "step": 140 }, { "epoch": 2.67, "learning_rate": 1.3392857142857144e-05, "loss": 1.3834, "step": 150 }, { "epoch": 2.84, "learning_rate": 1.4285714285714285e-05, "loss": 1.417, "step": 160 }, { "epoch": 2.99, "eval_accuracy": 0.4961111111111111, "eval_loss": 1.2024986743927002, "eval_runtime": 6.8464, "eval_samples_per_second": 262.91, "eval_steps_per_second": 8.325, "step": 168 }, { "epoch": 3.02, "learning_rate": 1.5178571428571429e-05, "loss": 1.4647, "step": 170 }, { "epoch": 3.2, "learning_rate": 1.6071428571428572e-05, "loss": 1.4384, "step": 180 }, { "epoch": 3.38, "learning_rate": 1.6964285714285715e-05, "loss": 1.3899, "step": 190 }, { "epoch": 3.56, "learning_rate": 1.785714285714286e-05, "loss": 1.4333, "step": 200 }, { "epoch": 3.73, "learning_rate": 1.8750000000000002e-05, "loss": 1.3869, "step": 210 }, { "epoch": 3.91, "learning_rate": 1.9642857142857145e-05, "loss": 1.4163, "step": 220 }, { "epoch": 4.0, "eval_accuracy": 0.48833333333333334, "eval_loss": 1.2295453548431396, "eval_runtime": 6.7471, "eval_samples_per_second": 266.783, "eval_steps_per_second": 8.448, "step": 225 }, { "epoch": 4.09, "learning_rate": 2.0535714285714285e-05, "loss": 1.4564, "step": 230 }, { "epoch": 4.27, "learning_rate": 2.1428571428571428e-05, "loss": 1.4539, "step": 240 }, { "epoch": 4.44, "learning_rate": 2.2321428571428575e-05, "loss": 1.4111, "step": 250 }, { "epoch": 4.62, "learning_rate": 2.3214285714285715e-05, "loss": 1.4174, "step": 260 }, { "epoch": 4.8, "learning_rate": 2.4107142857142858e-05, "loss": 1.386, "step": 270 }, { "epoch": 4.98, "learning_rate": 2.5e-05, "loss": 1.4318, "step": 280 }, { "epoch": 5.0, "eval_accuracy": 0.495, "eval_loss": 1.2330048084259033, "eval_runtime": 6.8802, "eval_samples_per_second": 261.621, "eval_steps_per_second": 8.285, "step": 281 }, { "epoch": 5.16, "learning_rate": 2.5892857142857148e-05, "loss": 1.3915, "step": 290 }, { "epoch": 5.33, "learning_rate": 2.6785714285714288e-05, "loss": 1.3905, "step": 300 }, { "epoch": 5.51, "learning_rate": 2.767857142857143e-05, "loss": 1.4094, "step": 310 }, { "epoch": 5.69, "learning_rate": 2.857142857142857e-05, "loss": 1.4484, "step": 320 }, { "epoch": 5.87, "learning_rate": 2.9464285714285718e-05, "loss": 1.4383, "step": 330 }, { "epoch": 5.99, "eval_accuracy": 0.5022222222222222, "eval_loss": 1.2162481546401978, "eval_runtime": 6.9019, "eval_samples_per_second": 260.797, "eval_steps_per_second": 8.259, "step": 337 }, { "epoch": 6.04, "learning_rate": 3.0357142857142857e-05, "loss": 1.4271, "step": 340 }, { "epoch": 6.22, "learning_rate": 3.125e-05, "loss": 1.3845, "step": 350 }, { "epoch": 6.4, "learning_rate": 3.2142857142857144e-05, "loss": 1.4332, "step": 360 }, { "epoch": 6.58, "learning_rate": 3.303571428571429e-05, "loss": 1.4743, "step": 370 }, { "epoch": 6.76, "learning_rate": 3.392857142857143e-05, "loss": 1.3979, "step": 380 }, { "epoch": 6.93, "learning_rate": 3.4821428571428574e-05, "loss": 1.4212, "step": 390 }, { "epoch": 6.99, "eval_accuracy": 0.4716666666666667, "eval_loss": 1.2634377479553223, "eval_runtime": 6.8985, "eval_samples_per_second": 260.925, "eval_steps_per_second": 8.263, "step": 393 }, { "epoch": 7.11, "learning_rate": 3.571428571428572e-05, "loss": 1.4537, "step": 400 }, { "epoch": 7.29, "learning_rate": 3.6607142857142853e-05, "loss": 1.4253, "step": 410 }, { "epoch": 7.47, "learning_rate": 3.7500000000000003e-05, "loss": 1.4421, "step": 420 }, { "epoch": 7.64, "learning_rate": 3.839285714285715e-05, "loss": 1.4282, "step": 430 }, { "epoch": 7.82, "learning_rate": 3.928571428571429e-05, "loss": 1.4997, "step": 440 }, { "epoch": 8.0, "learning_rate": 4.017857142857143e-05, "loss": 1.4346, "step": 450 }, { "epoch": 8.0, "eval_accuracy": 0.4688888888888889, "eval_loss": 1.3083486557006836, "eval_runtime": 6.7559, "eval_samples_per_second": 266.436, "eval_steps_per_second": 8.437, "step": 450 }, { "epoch": 8.18, "learning_rate": 4.107142857142857e-05, "loss": 1.4238, "step": 460 }, { "epoch": 8.36, "learning_rate": 4.196428571428572e-05, "loss": 1.4162, "step": 470 }, { "epoch": 8.53, "learning_rate": 4.2857142857142856e-05, "loss": 1.3962, "step": 480 }, { "epoch": 8.71, "learning_rate": 4.375e-05, "loss": 1.38, "step": 490 }, { "epoch": 8.89, "learning_rate": 4.464285714285715e-05, "loss": 1.419, "step": 500 }, { "epoch": 9.0, "eval_accuracy": 0.48055555555555557, "eval_loss": 1.271929383277893, "eval_runtime": 6.6282, "eval_samples_per_second": 271.567, "eval_steps_per_second": 8.6, "step": 506 }, { "epoch": 9.07, "learning_rate": 4.5535714285714286e-05, "loss": 1.4266, "step": 510 }, { "epoch": 9.24, "learning_rate": 4.642857142857143e-05, "loss": 1.4394, "step": 520 }, { "epoch": 9.42, "learning_rate": 4.732142857142857e-05, "loss": 1.4178, "step": 530 }, { "epoch": 9.6, "learning_rate": 4.8214285714285716e-05, "loss": 1.4636, "step": 540 }, { "epoch": 9.78, "learning_rate": 4.910714285714286e-05, "loss": 1.3616, "step": 550 }, { "epoch": 9.96, "learning_rate": 5e-05, "loss": 1.4252, "step": 560 }, { "epoch": 9.99, "eval_accuracy": 0.4911111111111111, "eval_loss": 1.3047653436660767, "eval_runtime": 7.0266, "eval_samples_per_second": 256.17, "eval_steps_per_second": 8.112, "step": 562 }, { "epoch": 10.13, "learning_rate": 4.990079365079365e-05, "loss": 1.4389, "step": 570 }, { "epoch": 10.31, "learning_rate": 4.9801587301587306e-05, "loss": 1.3925, "step": 580 }, { "epoch": 10.49, "learning_rate": 4.9702380952380955e-05, "loss": 1.4302, "step": 590 }, { "epoch": 10.67, "learning_rate": 4.960317460317461e-05, "loss": 1.405, "step": 600 }, { "epoch": 10.84, "learning_rate": 4.950396825396826e-05, "loss": 1.4522, "step": 610 }, { "epoch": 10.99, "eval_accuracy": 0.47944444444444445, "eval_loss": 1.2707706689834595, "eval_runtime": 7.1468, "eval_samples_per_second": 251.861, "eval_steps_per_second": 7.976, "step": 618 }, { "epoch": 11.02, "learning_rate": 4.940476190476191e-05, "loss": 1.4201, "step": 620 }, { "epoch": 11.2, "learning_rate": 4.930555555555556e-05, "loss": 1.3786, "step": 630 }, { "epoch": 11.38, "learning_rate": 4.9206349206349204e-05, "loss": 1.4126, "step": 640 }, { "epoch": 11.56, "learning_rate": 4.910714285714286e-05, "loss": 1.4261, "step": 650 }, { "epoch": 11.73, "learning_rate": 4.900793650793651e-05, "loss": 1.3891, "step": 660 }, { "epoch": 11.91, "learning_rate": 4.8908730158730156e-05, "loss": 1.3748, "step": 670 }, { "epoch": 12.0, "eval_accuracy": 0.43833333333333335, "eval_loss": 1.3720223903656006, "eval_runtime": 6.9318, "eval_samples_per_second": 259.673, "eval_steps_per_second": 8.223, "step": 675 }, { "epoch": 12.09, "learning_rate": 4.880952380952381e-05, "loss": 1.479, "step": 680 }, { "epoch": 12.27, "learning_rate": 4.871031746031746e-05, "loss": 1.3825, "step": 690 }, { "epoch": 12.44, "learning_rate": 4.8611111111111115e-05, "loss": 1.4049, "step": 700 }, { "epoch": 12.62, "learning_rate": 4.8511904761904764e-05, "loss": 1.422, "step": 710 }, { "epoch": 12.8, "learning_rate": 4.841269841269841e-05, "loss": 1.3707, "step": 720 }, { "epoch": 12.98, "learning_rate": 4.831349206349207e-05, "loss": 1.3966, "step": 730 }, { "epoch": 13.0, "eval_accuracy": 0.45944444444444443, "eval_loss": 1.3094544410705566, "eval_runtime": 7.1035, "eval_samples_per_second": 253.396, "eval_steps_per_second": 8.024, "step": 731 }, { "epoch": 13.16, "learning_rate": 4.8214285714285716e-05, "loss": 1.3888, "step": 740 }, { "epoch": 13.33, "learning_rate": 4.811507936507937e-05, "loss": 1.4374, "step": 750 }, { "epoch": 13.51, "learning_rate": 4.801587301587302e-05, "loss": 1.4004, "step": 760 }, { "epoch": 13.69, "learning_rate": 4.791666666666667e-05, "loss": 1.4099, "step": 770 }, { "epoch": 13.87, "learning_rate": 4.781746031746032e-05, "loss": 1.4507, "step": 780 }, { "epoch": 13.99, "eval_accuracy": 0.485, "eval_loss": 1.2430291175842285, "eval_runtime": 7.2727, "eval_samples_per_second": 247.502, "eval_steps_per_second": 7.838, "step": 787 }, { "epoch": 14.04, "learning_rate": 4.771825396825397e-05, "loss": 1.4554, "step": 790 }, { "epoch": 14.22, "learning_rate": 4.761904761904762e-05, "loss": 1.3986, "step": 800 }, { "epoch": 14.4, "learning_rate": 4.751984126984127e-05, "loss": 1.4145, "step": 810 }, { "epoch": 14.58, "learning_rate": 4.7420634920634924e-05, "loss": 1.4258, "step": 820 }, { "epoch": 14.76, "learning_rate": 4.732142857142857e-05, "loss": 1.4066, "step": 830 }, { "epoch": 14.93, "learning_rate": 4.722222222222222e-05, "loss": 1.4033, "step": 840 }, { "epoch": 14.99, "eval_accuracy": 0.47944444444444445, "eval_loss": 1.272768497467041, "eval_runtime": 6.8312, "eval_samples_per_second": 263.498, "eval_steps_per_second": 8.344, "step": 843 }, { "epoch": 15.11, "learning_rate": 4.7123015873015876e-05, "loss": 1.4264, "step": 850 }, { "epoch": 15.29, "learning_rate": 4.7023809523809525e-05, "loss": 1.3796, "step": 860 }, { "epoch": 15.47, "learning_rate": 4.692460317460317e-05, "loss": 1.3972, "step": 870 }, { "epoch": 15.64, "learning_rate": 4.682539682539683e-05, "loss": 1.3486, "step": 880 }, { "epoch": 15.82, "learning_rate": 4.672619047619048e-05, "loss": 1.3769, "step": 890 }, { "epoch": 16.0, "learning_rate": 4.662698412698413e-05, "loss": 1.3972, "step": 900 }, { "epoch": 16.0, "eval_accuracy": 0.48833333333333334, "eval_loss": 1.261121392250061, "eval_runtime": 6.6409, "eval_samples_per_second": 271.049, "eval_steps_per_second": 8.583, "step": 900 }, { "epoch": 16.18, "learning_rate": 4.652777777777778e-05, "loss": 1.3787, "step": 910 }, { "epoch": 16.36, "learning_rate": 4.642857142857143e-05, "loss": 1.3228, "step": 920 }, { "epoch": 16.53, "learning_rate": 4.6329365079365085e-05, "loss": 1.3655, "step": 930 }, { "epoch": 16.71, "learning_rate": 4.623015873015873e-05, "loss": 1.3589, "step": 940 }, { "epoch": 16.89, "learning_rate": 4.613095238095239e-05, "loss": 1.4136, "step": 950 }, { "epoch": 17.0, "eval_accuracy": 0.45, "eval_loss": 1.3166221380233765, "eval_runtime": 6.9227, "eval_samples_per_second": 260.013, "eval_steps_per_second": 8.234, "step": 956 }, { "epoch": 17.07, "learning_rate": 4.603174603174603e-05, "loss": 1.4259, "step": 960 }, { "epoch": 17.24, "learning_rate": 4.5932539682539685e-05, "loss": 1.3792, "step": 970 }, { "epoch": 17.42, "learning_rate": 4.5833333333333334e-05, "loss": 1.3724, "step": 980 }, { "epoch": 17.6, "learning_rate": 4.573412698412698e-05, "loss": 1.4, "step": 990 }, { "epoch": 17.78, "learning_rate": 4.563492063492064e-05, "loss": 1.3087, "step": 1000 }, { "epoch": 17.96, "learning_rate": 4.5535714285714286e-05, "loss": 1.3992, "step": 1010 }, { "epoch": 17.99, "eval_accuracy": 0.4855555555555556, "eval_loss": 1.3103010654449463, "eval_runtime": 6.5821, "eval_samples_per_second": 273.469, "eval_steps_per_second": 8.66, "step": 1012 }, { "epoch": 18.13, "learning_rate": 4.543650793650794e-05, "loss": 1.3577, "step": 1020 }, { "epoch": 18.31, "learning_rate": 4.533730158730159e-05, "loss": 1.3907, "step": 1030 }, { "epoch": 18.49, "learning_rate": 4.523809523809524e-05, "loss": 1.3839, "step": 1040 }, { "epoch": 18.67, "learning_rate": 4.5138888888888894e-05, "loss": 1.3949, "step": 1050 }, { "epoch": 18.84, "learning_rate": 4.503968253968254e-05, "loss": 1.3614, "step": 1060 }, { "epoch": 18.99, "eval_accuracy": 0.44222222222222224, "eval_loss": 1.3302438259124756, "eval_runtime": 6.9237, "eval_samples_per_second": 259.976, "eval_steps_per_second": 8.233, "step": 1068 }, { "epoch": 19.02, "learning_rate": 4.494047619047619e-05, "loss": 1.3348, "step": 1070 }, { "epoch": 19.2, "learning_rate": 4.4841269841269846e-05, "loss": 1.3525, "step": 1080 }, { "epoch": 19.38, "learning_rate": 4.4742063492063494e-05, "loss": 1.3349, "step": 1090 }, { "epoch": 19.56, "learning_rate": 4.464285714285715e-05, "loss": 1.3511, "step": 1100 }, { "epoch": 19.73, "learning_rate": 4.45436507936508e-05, "loss": 1.3608, "step": 1110 }, { "epoch": 19.91, "learning_rate": 4.4444444444444447e-05, "loss": 1.3747, "step": 1120 }, { "epoch": 20.0, "eval_accuracy": 0.4855555555555556, "eval_loss": 1.2918972969055176, "eval_runtime": 6.9094, "eval_samples_per_second": 260.515, "eval_steps_per_second": 8.25, "step": 1125 }, { "epoch": 20.09, "learning_rate": 4.4345238095238095e-05, "loss": 1.372, "step": 1130 }, { "epoch": 20.27, "learning_rate": 4.4246031746031744e-05, "loss": 1.3213, "step": 1140 }, { "epoch": 20.44, "learning_rate": 4.41468253968254e-05, "loss": 1.3659, "step": 1150 }, { "epoch": 20.62, "learning_rate": 4.404761904761905e-05, "loss": 1.374, "step": 1160 }, { "epoch": 20.8, "learning_rate": 4.39484126984127e-05, "loss": 1.3621, "step": 1170 }, { "epoch": 20.98, "learning_rate": 4.384920634920635e-05, "loss": 1.3868, "step": 1180 }, { "epoch": 21.0, "eval_accuracy": 0.4727777777777778, "eval_loss": 1.3165982961654663, "eval_runtime": 6.6725, "eval_samples_per_second": 269.765, "eval_steps_per_second": 8.543, "step": 1181 }, { "epoch": 21.16, "learning_rate": 4.375e-05, "loss": 1.3344, "step": 1190 }, { "epoch": 21.33, "learning_rate": 4.3650793650793655e-05, "loss": 1.3649, "step": 1200 }, { "epoch": 21.51, "learning_rate": 4.35515873015873e-05, "loss": 1.2993, "step": 1210 }, { "epoch": 21.69, "learning_rate": 4.345238095238096e-05, "loss": 1.3745, "step": 1220 }, { "epoch": 21.87, "learning_rate": 4.335317460317461e-05, "loss": 1.3399, "step": 1230 }, { "epoch": 21.99, "eval_accuracy": 0.4672222222222222, "eval_loss": 1.3200290203094482, "eval_runtime": 6.7301, "eval_samples_per_second": 267.457, "eval_steps_per_second": 8.469, "step": 1237 }, { "epoch": 22.04, "learning_rate": 4.3253968253968256e-05, "loss": 1.3777, "step": 1240 }, { "epoch": 22.22, "learning_rate": 4.315476190476191e-05, "loss": 1.3122, "step": 1250 }, { "epoch": 22.4, "learning_rate": 4.305555555555556e-05, "loss": 1.3405, "step": 1260 }, { "epoch": 22.58, "learning_rate": 4.295634920634921e-05, "loss": 1.299, "step": 1270 }, { "epoch": 22.76, "learning_rate": 4.2857142857142856e-05, "loss": 1.3672, "step": 1280 }, { "epoch": 22.93, "learning_rate": 4.2757936507936505e-05, "loss": 1.3943, "step": 1290 }, { "epoch": 22.99, "eval_accuracy": 0.4811111111111111, "eval_loss": 1.2920361757278442, "eval_runtime": 6.9015, "eval_samples_per_second": 260.815, "eval_steps_per_second": 8.259, "step": 1293 }, { "epoch": 23.11, "learning_rate": 4.265873015873016e-05, "loss": 1.3459, "step": 1300 }, { "epoch": 23.29, "learning_rate": 4.255952380952381e-05, "loss": 1.3264, "step": 1310 }, { "epoch": 23.47, "learning_rate": 4.2460317460317464e-05, "loss": 1.3297, "step": 1320 }, { "epoch": 23.64, "learning_rate": 4.236111111111111e-05, "loss": 1.3092, "step": 1330 }, { "epoch": 23.82, "learning_rate": 4.226190476190476e-05, "loss": 1.3551, "step": 1340 }, { "epoch": 24.0, "learning_rate": 4.2162698412698416e-05, "loss": 1.3635, "step": 1350 }, { "epoch": 24.0, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.3109022378921509, "eval_runtime": 6.7144, "eval_samples_per_second": 268.079, "eval_steps_per_second": 8.489, "step": 1350 }, { "epoch": 24.18, "learning_rate": 4.2063492063492065e-05, "loss": 1.3229, "step": 1360 }, { "epoch": 24.36, "learning_rate": 4.196428571428572e-05, "loss": 1.3578, "step": 1370 }, { "epoch": 24.53, "learning_rate": 4.186507936507937e-05, "loss": 1.3052, "step": 1380 }, { "epoch": 24.71, "learning_rate": 4.176587301587302e-05, "loss": 1.3843, "step": 1390 }, { "epoch": 24.89, "learning_rate": 4.166666666666667e-05, "loss": 1.3724, "step": 1400 }, { "epoch": 25.0, "eval_accuracy": 0.46444444444444444, "eval_loss": 1.3099535703659058, "eval_runtime": 6.6043, "eval_samples_per_second": 272.552, "eval_steps_per_second": 8.631, "step": 1406 }, { "epoch": 25.07, "learning_rate": 4.156746031746032e-05, "loss": 1.3264, "step": 1410 }, { "epoch": 25.24, "learning_rate": 4.1468253968253976e-05, "loss": 1.2923, "step": 1420 }, { "epoch": 25.42, "learning_rate": 4.136904761904762e-05, "loss": 1.34, "step": 1430 }, { "epoch": 25.6, "learning_rate": 4.126984126984127e-05, "loss": 1.2817, "step": 1440 }, { "epoch": 25.78, "learning_rate": 4.117063492063492e-05, "loss": 1.3498, "step": 1450 }, { "epoch": 25.96, "learning_rate": 4.107142857142857e-05, "loss": 1.3141, "step": 1460 }, { "epoch": 25.99, "eval_accuracy": 0.49777777777777776, "eval_loss": 1.3263020515441895, "eval_runtime": 9.6902, "eval_samples_per_second": 185.755, "eval_steps_per_second": 5.882, "step": 1462 }, { "epoch": 26.13, "learning_rate": 4.0972222222222225e-05, "loss": 1.3192, "step": 1470 }, { "epoch": 26.31, "learning_rate": 4.0873015873015874e-05, "loss": 1.3228, "step": 1480 }, { "epoch": 26.49, "learning_rate": 4.077380952380952e-05, "loss": 1.2749, "step": 1490 }, { "epoch": 26.67, "learning_rate": 4.067460317460318e-05, "loss": 1.289, "step": 1500 }, { "epoch": 26.84, "learning_rate": 4.0575396825396826e-05, "loss": 1.3576, "step": 1510 }, { "epoch": 26.99, "eval_accuracy": 0.4772222222222222, "eval_loss": 1.330748438835144, "eval_runtime": 6.6957, "eval_samples_per_second": 268.828, "eval_steps_per_second": 8.513, "step": 1518 }, { "epoch": 27.02, "learning_rate": 4.047619047619048e-05, "loss": 1.3147, "step": 1520 }, { "epoch": 27.2, "learning_rate": 4.037698412698413e-05, "loss": 1.2995, "step": 1530 }, { "epoch": 27.38, "learning_rate": 4.027777777777778e-05, "loss": 1.2871, "step": 1540 }, { "epoch": 27.56, "learning_rate": 4.017857142857143e-05, "loss": 1.313, "step": 1550 }, { "epoch": 27.73, "learning_rate": 4.007936507936508e-05, "loss": 1.361, "step": 1560 }, { "epoch": 27.91, "learning_rate": 3.998015873015874e-05, "loss": 1.3022, "step": 1570 }, { "epoch": 28.0, "eval_accuracy": 0.49777777777777776, "eval_loss": 1.3408504724502563, "eval_runtime": 6.9866, "eval_samples_per_second": 257.635, "eval_steps_per_second": 8.158, "step": 1575 }, { "epoch": 28.09, "learning_rate": 3.9880952380952386e-05, "loss": 1.2554, "step": 1580 }, { "epoch": 28.27, "learning_rate": 3.9781746031746034e-05, "loss": 1.3175, "step": 1590 }, { "epoch": 28.44, "learning_rate": 3.968253968253968e-05, "loss": 1.2905, "step": 1600 }, { "epoch": 28.62, "learning_rate": 3.958333333333333e-05, "loss": 1.2806, "step": 1610 }, { "epoch": 28.8, "learning_rate": 3.9484126984126986e-05, "loss": 1.3055, "step": 1620 }, { "epoch": 28.98, "learning_rate": 3.9384920634920635e-05, "loss": 1.2982, "step": 1630 }, { "epoch": 29.0, "eval_accuracy": 0.4583333333333333, "eval_loss": 1.3961677551269531, "eval_runtime": 6.8118, "eval_samples_per_second": 264.246, "eval_steps_per_second": 8.368, "step": 1631 }, { "epoch": 29.16, "learning_rate": 3.928571428571429e-05, "loss": 1.2994, "step": 1640 }, { "epoch": 29.33, "learning_rate": 3.918650793650794e-05, "loss": 1.2676, "step": 1650 }, { "epoch": 29.51, "learning_rate": 3.908730158730159e-05, "loss": 1.2888, "step": 1660 }, { "epoch": 29.69, "learning_rate": 3.898809523809524e-05, "loss": 1.2839, "step": 1670 }, { "epoch": 29.87, "learning_rate": 3.888888888888889e-05, "loss": 1.2657, "step": 1680 }, { "epoch": 29.99, "eval_accuracy": 0.4816666666666667, "eval_loss": 1.3329031467437744, "eval_runtime": 6.7377, "eval_samples_per_second": 267.155, "eval_steps_per_second": 8.46, "step": 1687 }, { "epoch": 30.04, "learning_rate": 3.878968253968254e-05, "loss": 1.2745, "step": 1690 }, { "epoch": 30.22, "learning_rate": 3.8690476190476195e-05, "loss": 1.3186, "step": 1700 }, { "epoch": 30.4, "learning_rate": 3.859126984126984e-05, "loss": 1.2745, "step": 1710 }, { "epoch": 30.58, "learning_rate": 3.84920634920635e-05, "loss": 1.3076, "step": 1720 }, { "epoch": 30.76, "learning_rate": 3.839285714285715e-05, "loss": 1.2578, "step": 1730 }, { "epoch": 30.93, "learning_rate": 3.8293650793650795e-05, "loss": 1.3152, "step": 1740 }, { "epoch": 30.99, "eval_accuracy": 0.49, "eval_loss": 1.2973191738128662, "eval_runtime": 7.0246, "eval_samples_per_second": 256.242, "eval_steps_per_second": 8.114, "step": 1743 }, { "epoch": 31.11, "learning_rate": 3.8194444444444444e-05, "loss": 1.248, "step": 1750 }, { "epoch": 31.29, "learning_rate": 3.809523809523809e-05, "loss": 1.2537, "step": 1760 }, { "epoch": 31.47, "learning_rate": 3.799603174603175e-05, "loss": 1.247, "step": 1770 }, { "epoch": 31.64, "learning_rate": 3.7896825396825396e-05, "loss": 1.2904, "step": 1780 }, { "epoch": 31.82, "learning_rate": 3.779761904761905e-05, "loss": 1.3096, "step": 1790 }, { "epoch": 32.0, "learning_rate": 3.76984126984127e-05, "loss": 1.2924, "step": 1800 }, { "epoch": 32.0, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.3158966302871704, "eval_runtime": 6.8489, "eval_samples_per_second": 262.815, "eval_steps_per_second": 8.322, "step": 1800 }, { "epoch": 32.18, "learning_rate": 3.759920634920635e-05, "loss": 1.2709, "step": 1810 }, { "epoch": 32.36, "learning_rate": 3.7500000000000003e-05, "loss": 1.2427, "step": 1820 }, { "epoch": 32.53, "learning_rate": 3.740079365079365e-05, "loss": 1.2435, "step": 1830 }, { "epoch": 32.71, "learning_rate": 3.730158730158731e-05, "loss": 1.2843, "step": 1840 }, { "epoch": 32.89, "learning_rate": 3.7202380952380956e-05, "loss": 1.214, "step": 1850 }, { "epoch": 33.0, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.395532488822937, "eval_runtime": 6.7585, "eval_samples_per_second": 266.33, "eval_steps_per_second": 8.434, "step": 1856 }, { "epoch": 33.07, "learning_rate": 3.7103174603174604e-05, "loss": 1.3143, "step": 1860 }, { "epoch": 33.24, "learning_rate": 3.700396825396826e-05, "loss": 1.2143, "step": 1870 }, { "epoch": 33.42, "learning_rate": 3.690476190476191e-05, "loss": 1.2721, "step": 1880 }, { "epoch": 33.6, "learning_rate": 3.6805555555555556e-05, "loss": 1.2267, "step": 1890 }, { "epoch": 33.78, "learning_rate": 3.6706349206349205e-05, "loss": 1.2616, "step": 1900 }, { "epoch": 33.96, "learning_rate": 3.6607142857142853e-05, "loss": 1.2717, "step": 1910 }, { "epoch": 33.99, "eval_accuracy": 0.46, "eval_loss": 1.4583297967910767, "eval_runtime": 6.7797, "eval_samples_per_second": 265.497, "eval_steps_per_second": 8.407, "step": 1912 }, { "epoch": 34.13, "learning_rate": 3.650793650793651e-05, "loss": 1.2594, "step": 1920 }, { "epoch": 34.31, "learning_rate": 3.640873015873016e-05, "loss": 1.3058, "step": 1930 }, { "epoch": 34.49, "learning_rate": 3.630952380952381e-05, "loss": 1.285, "step": 1940 }, { "epoch": 34.67, "learning_rate": 3.621031746031746e-05, "loss": 1.2276, "step": 1950 }, { "epoch": 34.84, "learning_rate": 3.611111111111111e-05, "loss": 1.2692, "step": 1960 }, { "epoch": 34.99, "eval_accuracy": 0.4938888888888889, "eval_loss": 1.3503786325454712, "eval_runtime": 6.4105, "eval_samples_per_second": 280.79, "eval_steps_per_second": 8.892, "step": 1968 }, { "epoch": 35.02, "learning_rate": 3.6011904761904765e-05, "loss": 1.2435, "step": 1970 }, { "epoch": 35.2, "learning_rate": 3.591269841269841e-05, "loss": 1.2224, "step": 1980 }, { "epoch": 35.38, "learning_rate": 3.581349206349207e-05, "loss": 1.2219, "step": 1990 }, { "epoch": 35.56, "learning_rate": 3.571428571428572e-05, "loss": 1.2391, "step": 2000 }, { "epoch": 35.73, "learning_rate": 3.5615079365079365e-05, "loss": 1.2594, "step": 2010 }, { "epoch": 35.91, "learning_rate": 3.551587301587302e-05, "loss": 1.2127, "step": 2020 }, { "epoch": 36.0, "eval_accuracy": 0.48333333333333334, "eval_loss": 1.378441333770752, "eval_runtime": 6.7487, "eval_samples_per_second": 266.719, "eval_steps_per_second": 8.446, "step": 2025 }, { "epoch": 36.09, "learning_rate": 3.541666666666667e-05, "loss": 1.2251, "step": 2030 }, { "epoch": 36.27, "learning_rate": 3.5317460317460324e-05, "loss": 1.2232, "step": 2040 }, { "epoch": 36.44, "learning_rate": 3.521825396825397e-05, "loss": 1.2207, "step": 2050 }, { "epoch": 36.62, "learning_rate": 3.511904761904762e-05, "loss": 1.2139, "step": 2060 }, { "epoch": 36.8, "learning_rate": 3.501984126984127e-05, "loss": 1.2464, "step": 2070 }, { "epoch": 36.98, "learning_rate": 3.492063492063492e-05, "loss": 1.1956, "step": 2080 }, { "epoch": 37.0, "eval_accuracy": 0.4816666666666667, "eval_loss": 1.4183870553970337, "eval_runtime": 6.7564, "eval_samples_per_second": 266.414, "eval_steps_per_second": 8.436, "step": 2081 }, { "epoch": 37.16, "learning_rate": 3.4821428571428574e-05, "loss": 1.199, "step": 2090 }, { "epoch": 37.33, "learning_rate": 3.472222222222222e-05, "loss": 1.2005, "step": 2100 }, { "epoch": 37.51, "learning_rate": 3.462301587301587e-05, "loss": 1.193, "step": 2110 }, { "epoch": 37.69, "learning_rate": 3.4523809523809526e-05, "loss": 1.2632, "step": 2120 }, { "epoch": 37.87, "learning_rate": 3.4424603174603174e-05, "loss": 1.2408, "step": 2130 }, { "epoch": 37.99, "eval_accuracy": 0.49444444444444446, "eval_loss": 1.3849085569381714, "eval_runtime": 7.0969, "eval_samples_per_second": 253.633, "eval_steps_per_second": 8.032, "step": 2137 }, { "epoch": 38.04, "learning_rate": 3.432539682539683e-05, "loss": 1.1826, "step": 2140 }, { "epoch": 38.22, "learning_rate": 3.422619047619048e-05, "loss": 1.2048, "step": 2150 }, { "epoch": 38.4, "learning_rate": 3.412698412698413e-05, "loss": 1.2153, "step": 2160 }, { "epoch": 38.58, "learning_rate": 3.402777777777778e-05, "loss": 1.2151, "step": 2170 }, { "epoch": 38.76, "learning_rate": 3.392857142857143e-05, "loss": 1.23, "step": 2180 }, { "epoch": 38.93, "learning_rate": 3.3829365079365086e-05, "loss": 1.1699, "step": 2190 }, { "epoch": 38.99, "eval_accuracy": 0.48444444444444446, "eval_loss": 1.429752230644226, "eval_runtime": 6.9781, "eval_samples_per_second": 257.951, "eval_steps_per_second": 8.168, "step": 2193 }, { "epoch": 39.11, "learning_rate": 3.3730158730158734e-05, "loss": 1.1914, "step": 2200 }, { "epoch": 39.29, "learning_rate": 3.363095238095238e-05, "loss": 1.218, "step": 2210 }, { "epoch": 39.47, "learning_rate": 3.353174603174603e-05, "loss": 1.2134, "step": 2220 }, { "epoch": 39.64, "learning_rate": 3.343253968253968e-05, "loss": 1.1926, "step": 2230 }, { "epoch": 39.82, "learning_rate": 3.3333333333333335e-05, "loss": 1.1607, "step": 2240 }, { "epoch": 40.0, "learning_rate": 3.3234126984126983e-05, "loss": 1.1727, "step": 2250 }, { "epoch": 40.0, "eval_accuracy": 0.4772222222222222, "eval_loss": 1.4331458806991577, "eval_runtime": 6.6454, "eval_samples_per_second": 270.865, "eval_steps_per_second": 8.577, "step": 2250 }, { "epoch": 40.18, "learning_rate": 3.313492063492064e-05, "loss": 1.1827, "step": 2260 }, { "epoch": 40.36, "learning_rate": 3.303571428571429e-05, "loss": 1.1691, "step": 2270 }, { "epoch": 40.53, "learning_rate": 3.2936507936507936e-05, "loss": 1.2009, "step": 2280 }, { "epoch": 40.71, "learning_rate": 3.283730158730159e-05, "loss": 1.1939, "step": 2290 }, { "epoch": 40.89, "learning_rate": 3.273809523809524e-05, "loss": 1.1485, "step": 2300 }, { "epoch": 41.0, "eval_accuracy": 0.4672222222222222, "eval_loss": 1.4596869945526123, "eval_runtime": 6.7934, "eval_samples_per_second": 264.961, "eval_steps_per_second": 8.39, "step": 2306 }, { "epoch": 41.07, "learning_rate": 3.263888888888889e-05, "loss": 1.185, "step": 2310 }, { "epoch": 41.24, "learning_rate": 3.253968253968254e-05, "loss": 1.1438, "step": 2320 }, { "epoch": 41.42, "learning_rate": 3.244047619047619e-05, "loss": 1.2154, "step": 2330 }, { "epoch": 41.6, "learning_rate": 3.234126984126985e-05, "loss": 1.1789, "step": 2340 }, { "epoch": 41.78, "learning_rate": 3.2242063492063495e-05, "loss": 1.1464, "step": 2350 }, { "epoch": 41.96, "learning_rate": 3.2142857142857144e-05, "loss": 1.1668, "step": 2360 }, { "epoch": 41.99, "eval_accuracy": 0.47833333333333333, "eval_loss": 1.4428596496582031, "eval_runtime": 6.9455, "eval_samples_per_second": 259.161, "eval_steps_per_second": 8.207, "step": 2362 }, { "epoch": 42.13, "learning_rate": 3.20436507936508e-05, "loss": 1.1201, "step": 2370 }, { "epoch": 42.31, "learning_rate": 3.194444444444444e-05, "loss": 1.1545, "step": 2380 }, { "epoch": 42.49, "learning_rate": 3.1845238095238096e-05, "loss": 1.1587, "step": 2390 }, { "epoch": 42.67, "learning_rate": 3.1746031746031745e-05, "loss": 1.1606, "step": 2400 }, { "epoch": 42.84, "learning_rate": 3.16468253968254e-05, "loss": 1.1881, "step": 2410 }, { "epoch": 42.99, "eval_accuracy": 0.48388888888888887, "eval_loss": 1.455505132675171, "eval_runtime": 6.7551, "eval_samples_per_second": 266.464, "eval_steps_per_second": 8.438, "step": 2418 }, { "epoch": 43.02, "learning_rate": 3.154761904761905e-05, "loss": 1.1859, "step": 2420 }, { "epoch": 43.2, "learning_rate": 3.14484126984127e-05, "loss": 1.1149, "step": 2430 }, { "epoch": 43.38, "learning_rate": 3.134920634920635e-05, "loss": 1.1699, "step": 2440 }, { "epoch": 43.56, "learning_rate": 3.125e-05, "loss": 1.1132, "step": 2450 }, { "epoch": 43.73, "learning_rate": 3.1150793650793656e-05, "loss": 1.129, "step": 2460 }, { "epoch": 43.91, "learning_rate": 3.1051587301587304e-05, "loss": 1.1204, "step": 2470 }, { "epoch": 44.0, "eval_accuracy": 0.47833333333333333, "eval_loss": 1.464751958847046, "eval_runtime": 6.9852, "eval_samples_per_second": 257.688, "eval_steps_per_second": 8.16, "step": 2475 }, { "epoch": 44.09, "learning_rate": 3.095238095238095e-05, "loss": 1.1405, "step": 2480 }, { "epoch": 44.27, "learning_rate": 3.085317460317461e-05, "loss": 1.1173, "step": 2490 }, { "epoch": 44.44, "learning_rate": 3.075396825396826e-05, "loss": 1.1739, "step": 2500 }, { "epoch": 44.62, "learning_rate": 3.0654761904761905e-05, "loss": 1.1311, "step": 2510 }, { "epoch": 44.8, "learning_rate": 3.055555555555556e-05, "loss": 1.1483, "step": 2520 }, { "epoch": 44.98, "learning_rate": 3.0456349206349206e-05, "loss": 1.1523, "step": 2530 }, { "epoch": 45.0, "eval_accuracy": 0.47333333333333333, "eval_loss": 1.474352240562439, "eval_runtime": 6.7424, "eval_samples_per_second": 266.966, "eval_steps_per_second": 8.454, "step": 2531 }, { "epoch": 45.16, "learning_rate": 3.0357142857142857e-05, "loss": 1.1291, "step": 2540 }, { "epoch": 45.33, "learning_rate": 3.0257936507936506e-05, "loss": 1.1552, "step": 2550 }, { "epoch": 45.51, "learning_rate": 3.0158730158730158e-05, "loss": 1.1652, "step": 2560 }, { "epoch": 45.69, "learning_rate": 3.005952380952381e-05, "loss": 1.1732, "step": 2570 }, { "epoch": 45.87, "learning_rate": 2.996031746031746e-05, "loss": 1.1206, "step": 2580 }, { "epoch": 45.99, "eval_accuracy": 0.4905555555555556, "eval_loss": 1.4791755676269531, "eval_runtime": 6.7618, "eval_samples_per_second": 266.202, "eval_steps_per_second": 8.43, "step": 2587 }, { "epoch": 46.04, "learning_rate": 2.9861111111111113e-05, "loss": 1.1128, "step": 2590 }, { "epoch": 46.22, "learning_rate": 2.9761904761904762e-05, "loss": 1.1148, "step": 2600 }, { "epoch": 46.4, "learning_rate": 2.9662698412698414e-05, "loss": 1.1391, "step": 2610 }, { "epoch": 46.58, "learning_rate": 2.9563492063492066e-05, "loss": 1.1445, "step": 2620 }, { "epoch": 46.76, "learning_rate": 2.9464285714285718e-05, "loss": 1.1596, "step": 2630 }, { "epoch": 46.93, "learning_rate": 2.9365079365079366e-05, "loss": 1.1135, "step": 2640 }, { "epoch": 46.99, "eval_accuracy": 0.4677777777777778, "eval_loss": 1.5008630752563477, "eval_runtime": 7.3069, "eval_samples_per_second": 246.342, "eval_steps_per_second": 7.801, "step": 2643 }, { "epoch": 47.11, "learning_rate": 2.9265873015873018e-05, "loss": 1.1324, "step": 2650 }, { "epoch": 47.29, "learning_rate": 2.916666666666667e-05, "loss": 1.0933, "step": 2660 }, { "epoch": 47.47, "learning_rate": 2.906746031746032e-05, "loss": 1.1701, "step": 2670 }, { "epoch": 47.64, "learning_rate": 2.8968253968253974e-05, "loss": 1.1505, "step": 2680 }, { "epoch": 47.82, "learning_rate": 2.886904761904762e-05, "loss": 1.1117, "step": 2690 }, { "epoch": 48.0, "learning_rate": 2.876984126984127e-05, "loss": 1.1227, "step": 2700 }, { "epoch": 48.0, "eval_accuracy": 0.47333333333333333, "eval_loss": 1.5479964017868042, "eval_runtime": 6.7183, "eval_samples_per_second": 267.925, "eval_steps_per_second": 8.484, "step": 2700 }, { "epoch": 48.18, "learning_rate": 2.867063492063492e-05, "loss": 1.1419, "step": 2710 }, { "epoch": 48.36, "learning_rate": 2.857142857142857e-05, "loss": 1.0893, "step": 2720 }, { "epoch": 48.53, "learning_rate": 2.8472222222222223e-05, "loss": 1.0633, "step": 2730 }, { "epoch": 48.71, "learning_rate": 2.8373015873015875e-05, "loss": 1.0889, "step": 2740 }, { "epoch": 48.89, "learning_rate": 2.8273809523809523e-05, "loss": 1.1017, "step": 2750 }, { "epoch": 49.0, "eval_accuracy": 0.46444444444444444, "eval_loss": 1.590686559677124, "eval_runtime": 9.0291, "eval_samples_per_second": 199.355, "eval_steps_per_second": 6.313, "step": 2756 }, { "epoch": 49.07, "learning_rate": 2.8174603174603175e-05, "loss": 1.0731, "step": 2760 }, { "epoch": 49.24, "learning_rate": 2.8075396825396827e-05, "loss": 1.0992, "step": 2770 }, { "epoch": 49.42, "learning_rate": 2.797619047619048e-05, "loss": 1.0779, "step": 2780 }, { "epoch": 49.6, "learning_rate": 2.787698412698413e-05, "loss": 1.0771, "step": 2790 }, { "epoch": 49.78, "learning_rate": 2.777777777777778e-05, "loss": 1.1053, "step": 2800 }, { "epoch": 49.96, "learning_rate": 2.767857142857143e-05, "loss": 1.1601, "step": 2810 }, { "epoch": 49.99, "eval_accuracy": 0.47, "eval_loss": 1.5136301517486572, "eval_runtime": 6.8103, "eval_samples_per_second": 264.305, "eval_steps_per_second": 8.37, "step": 2812 }, { "epoch": 50.13, "learning_rate": 2.7579365079365083e-05, "loss": 1.1253, "step": 2820 }, { "epoch": 50.31, "learning_rate": 2.7480158730158735e-05, "loss": 1.0357, "step": 2830 }, { "epoch": 50.49, "learning_rate": 2.7380952380952383e-05, "loss": 1.0808, "step": 2840 }, { "epoch": 50.67, "learning_rate": 2.7281746031746032e-05, "loss": 1.0321, "step": 2850 }, { "epoch": 50.84, "learning_rate": 2.718253968253968e-05, "loss": 1.1239, "step": 2860 }, { "epoch": 50.99, "eval_accuracy": 0.47888888888888886, "eval_loss": 1.5383570194244385, "eval_runtime": 6.9446, "eval_samples_per_second": 259.194, "eval_steps_per_second": 8.208, "step": 2868 }, { "epoch": 51.02, "learning_rate": 2.7083333333333332e-05, "loss": 1.1178, "step": 2870 }, { "epoch": 51.2, "learning_rate": 2.6984126984126984e-05, "loss": 1.0706, "step": 2880 }, { "epoch": 51.38, "learning_rate": 2.6884920634920636e-05, "loss": 1.077, "step": 2890 }, { "epoch": 51.56, "learning_rate": 2.6785714285714288e-05, "loss": 1.0307, "step": 2900 }, { "epoch": 51.73, "learning_rate": 2.6686507936507936e-05, "loss": 1.0621, "step": 2910 }, { "epoch": 51.91, "learning_rate": 2.6587301587301588e-05, "loss": 1.09, "step": 2920 }, { "epoch": 52.0, "eval_accuracy": 0.4711111111111111, "eval_loss": 1.5716180801391602, "eval_runtime": 6.726, "eval_samples_per_second": 267.618, "eval_steps_per_second": 8.475, "step": 2925 }, { "epoch": 52.09, "learning_rate": 2.648809523809524e-05, "loss": 1.1091, "step": 2930 }, { "epoch": 52.27, "learning_rate": 2.6388888888888892e-05, "loss": 1.0716, "step": 2940 }, { "epoch": 52.44, "learning_rate": 2.628968253968254e-05, "loss": 1.0558, "step": 2950 }, { "epoch": 52.62, "learning_rate": 2.6190476190476192e-05, "loss": 1.0986, "step": 2960 }, { "epoch": 52.8, "learning_rate": 2.6091269841269844e-05, "loss": 1.0729, "step": 2970 }, { "epoch": 52.98, "learning_rate": 2.5992063492063496e-05, "loss": 1.1023, "step": 2980 }, { "epoch": 53.0, "eval_accuracy": 0.4727777777777778, "eval_loss": 1.5735939741134644, "eval_runtime": 6.8239, "eval_samples_per_second": 263.777, "eval_steps_per_second": 8.353, "step": 2981 }, { "epoch": 53.16, "learning_rate": 2.5892857142857148e-05, "loss": 1.017, "step": 2990 }, { "epoch": 53.33, "learning_rate": 2.5793650793650796e-05, "loss": 1.103, "step": 3000 }, { "epoch": 53.51, "learning_rate": 2.5694444444444445e-05, "loss": 1.0374, "step": 3010 }, { "epoch": 53.69, "learning_rate": 2.5595238095238093e-05, "loss": 1.0361, "step": 3020 }, { "epoch": 53.87, "learning_rate": 2.5496031746031745e-05, "loss": 1.1038, "step": 3030 }, { "epoch": 53.99, "eval_accuracy": 0.45555555555555555, "eval_loss": 1.5918738842010498, "eval_runtime": 6.817, "eval_samples_per_second": 264.046, "eval_steps_per_second": 8.361, "step": 3037 }, { "epoch": 54.04, "learning_rate": 2.5396825396825397e-05, "loss": 1.0261, "step": 3040 }, { "epoch": 54.22, "learning_rate": 2.529761904761905e-05, "loss": 1.0808, "step": 3050 }, { "epoch": 54.4, "learning_rate": 2.5198412698412697e-05, "loss": 1.0486, "step": 3060 }, { "epoch": 54.58, "learning_rate": 2.509920634920635e-05, "loss": 1.0311, "step": 3070 }, { "epoch": 54.76, "learning_rate": 2.5e-05, "loss": 1.0527, "step": 3080 }, { "epoch": 54.93, "learning_rate": 2.4900793650793653e-05, "loss": 1.058, "step": 3090 }, { "epoch": 54.99, "eval_accuracy": 0.4772222222222222, "eval_loss": 1.5533833503723145, "eval_runtime": 6.859, "eval_samples_per_second": 262.427, "eval_steps_per_second": 8.31, "step": 3093 }, { "epoch": 55.11, "learning_rate": 2.4801587301587305e-05, "loss": 1.0369, "step": 3100 }, { "epoch": 55.29, "learning_rate": 2.4702380952380953e-05, "loss": 1.0456, "step": 3110 }, { "epoch": 55.47, "learning_rate": 2.4603174603174602e-05, "loss": 1.0249, "step": 3120 }, { "epoch": 55.64, "learning_rate": 2.4503968253968254e-05, "loss": 1.04, "step": 3130 }, { "epoch": 55.82, "learning_rate": 2.4404761904761906e-05, "loss": 1.0552, "step": 3140 }, { "epoch": 56.0, "learning_rate": 2.4305555555555558e-05, "loss": 1.0405, "step": 3150 }, { "epoch": 56.0, "eval_accuracy": 0.4716666666666667, "eval_loss": 1.5788267850875854, "eval_runtime": 6.7843, "eval_samples_per_second": 265.32, "eval_steps_per_second": 8.402, "step": 3150 }, { "epoch": 56.18, "learning_rate": 2.4206349206349206e-05, "loss": 1.0667, "step": 3160 }, { "epoch": 56.36, "learning_rate": 2.4107142857142858e-05, "loss": 1.046, "step": 3170 }, { "epoch": 56.53, "learning_rate": 2.400793650793651e-05, "loss": 1.0146, "step": 3180 }, { "epoch": 56.71, "learning_rate": 2.390873015873016e-05, "loss": 1.0656, "step": 3190 }, { "epoch": 56.89, "learning_rate": 2.380952380952381e-05, "loss": 1.0172, "step": 3200 }, { "epoch": 57.0, "eval_accuracy": 0.4766666666666667, "eval_loss": 1.585532546043396, "eval_runtime": 6.9697, "eval_samples_per_second": 258.26, "eval_steps_per_second": 8.178, "step": 3206 }, { "epoch": 57.07, "learning_rate": 2.3710317460317462e-05, "loss": 1.0719, "step": 3210 }, { "epoch": 57.24, "learning_rate": 2.361111111111111e-05, "loss": 1.0542, "step": 3220 }, { "epoch": 57.42, "learning_rate": 2.3511904761904762e-05, "loss": 1.0316, "step": 3230 }, { "epoch": 57.6, "learning_rate": 2.3412698412698414e-05, "loss": 0.993, "step": 3240 }, { "epoch": 57.78, "learning_rate": 2.3313492063492066e-05, "loss": 1.0508, "step": 3250 }, { "epoch": 57.96, "learning_rate": 2.3214285714285715e-05, "loss": 1.0036, "step": 3260 }, { "epoch": 57.99, "eval_accuracy": 0.455, "eval_loss": 1.6425151824951172, "eval_runtime": 7.0778, "eval_samples_per_second": 254.315, "eval_steps_per_second": 8.053, "step": 3262 }, { "epoch": 58.13, "learning_rate": 2.3115079365079367e-05, "loss": 1.0261, "step": 3270 }, { "epoch": 58.31, "learning_rate": 2.3015873015873015e-05, "loss": 1.0191, "step": 3280 }, { "epoch": 58.49, "learning_rate": 2.2916666666666667e-05, "loss": 1.0035, "step": 3290 }, { "epoch": 58.67, "learning_rate": 2.281746031746032e-05, "loss": 0.9924, "step": 3300 }, { "epoch": 58.84, "learning_rate": 2.271825396825397e-05, "loss": 1.0124, "step": 3310 }, { "epoch": 58.99, "eval_accuracy": 0.4677777777777778, "eval_loss": 1.6039385795593262, "eval_runtime": 6.8431, "eval_samples_per_second": 263.038, "eval_steps_per_second": 8.33, "step": 3318 }, { "epoch": 59.02, "learning_rate": 2.261904761904762e-05, "loss": 1.0292, "step": 3320 }, { "epoch": 59.2, "learning_rate": 2.251984126984127e-05, "loss": 0.9945, "step": 3330 }, { "epoch": 59.38, "learning_rate": 2.2420634920634923e-05, "loss": 1.0021, "step": 3340 }, { "epoch": 59.56, "learning_rate": 2.2321428571428575e-05, "loss": 1.0187, "step": 3350 }, { "epoch": 59.73, "learning_rate": 2.2222222222222223e-05, "loss": 1.0092, "step": 3360 }, { "epoch": 59.91, "learning_rate": 2.2123015873015872e-05, "loss": 1.0647, "step": 3370 }, { "epoch": 60.0, "eval_accuracy": 0.4572222222222222, "eval_loss": 1.5890642404556274, "eval_runtime": 11.7484, "eval_samples_per_second": 153.212, "eval_steps_per_second": 4.852, "step": 3375 }, { "epoch": 60.09, "learning_rate": 2.2023809523809524e-05, "loss": 1.009, "step": 3380 }, { "epoch": 60.27, "learning_rate": 2.1924603174603176e-05, "loss": 0.9819, "step": 3390 }, { "epoch": 60.44, "learning_rate": 2.1825396825396827e-05, "loss": 1.0114, "step": 3400 }, { "epoch": 60.62, "learning_rate": 2.172619047619048e-05, "loss": 1.0253, "step": 3410 }, { "epoch": 60.8, "learning_rate": 2.1626984126984128e-05, "loss": 0.9988, "step": 3420 }, { "epoch": 60.98, "learning_rate": 2.152777777777778e-05, "loss": 1.0143, "step": 3430 }, { "epoch": 61.0, "eval_accuracy": 0.4483333333333333, "eval_loss": 1.6265422105789185, "eval_runtime": 6.6939, "eval_samples_per_second": 268.9, "eval_steps_per_second": 8.515, "step": 3431 }, { "epoch": 61.16, "learning_rate": 2.1428571428571428e-05, "loss": 0.9913, "step": 3440 }, { "epoch": 61.33, "learning_rate": 2.132936507936508e-05, "loss": 0.978, "step": 3450 }, { "epoch": 61.51, "learning_rate": 2.1230158730158732e-05, "loss": 0.9833, "step": 3460 }, { "epoch": 61.69, "learning_rate": 2.113095238095238e-05, "loss": 1.049, "step": 3470 }, { "epoch": 61.87, "learning_rate": 2.1031746031746032e-05, "loss": 1.0051, "step": 3480 }, { "epoch": 61.99, "eval_accuracy": 0.4633333333333333, "eval_loss": 1.6208295822143555, "eval_runtime": 6.6968, "eval_samples_per_second": 268.784, "eval_steps_per_second": 8.511, "step": 3487 }, { "epoch": 62.04, "learning_rate": 2.0932539682539684e-05, "loss": 1.0076, "step": 3490 }, { "epoch": 62.22, "learning_rate": 2.0833333333333336e-05, "loss": 1.0126, "step": 3500 }, { "epoch": 62.4, "learning_rate": 2.0734126984126988e-05, "loss": 0.9634, "step": 3510 }, { "epoch": 62.58, "learning_rate": 2.0634920634920636e-05, "loss": 0.9616, "step": 3520 }, { "epoch": 62.76, "learning_rate": 2.0535714285714285e-05, "loss": 1.0025, "step": 3530 }, { "epoch": 62.93, "learning_rate": 2.0436507936507937e-05, "loss": 0.9571, "step": 3540 }, { "epoch": 62.99, "eval_accuracy": 0.4483333333333333, "eval_loss": 1.6873899698257446, "eval_runtime": 6.8101, "eval_samples_per_second": 264.314, "eval_steps_per_second": 8.37, "step": 3543 }, { "epoch": 63.11, "learning_rate": 2.033730158730159e-05, "loss": 0.9859, "step": 3550 }, { "epoch": 63.29, "learning_rate": 2.023809523809524e-05, "loss": 0.9828, "step": 3560 }, { "epoch": 63.47, "learning_rate": 2.013888888888889e-05, "loss": 0.9969, "step": 3570 }, { "epoch": 63.64, "learning_rate": 2.003968253968254e-05, "loss": 1.0127, "step": 3580 }, { "epoch": 63.82, "learning_rate": 1.9940476190476193e-05, "loss": 0.9949, "step": 3590 }, { "epoch": 64.0, "learning_rate": 1.984126984126984e-05, "loss": 0.9838, "step": 3600 }, { "epoch": 64.0, "eval_accuracy": 0.45166666666666666, "eval_loss": 1.6777788400650024, "eval_runtime": 6.4627, "eval_samples_per_second": 278.521, "eval_steps_per_second": 8.82, "step": 3600 }, { "epoch": 64.18, "learning_rate": 1.9742063492063493e-05, "loss": 0.9745, "step": 3610 }, { "epoch": 64.36, "learning_rate": 1.9642857142857145e-05, "loss": 0.9895, "step": 3620 }, { "epoch": 64.53, "learning_rate": 1.9543650793650793e-05, "loss": 0.9219, "step": 3630 }, { "epoch": 64.71, "learning_rate": 1.9444444444444445e-05, "loss": 0.9853, "step": 3640 }, { "epoch": 64.89, "learning_rate": 1.9345238095238097e-05, "loss": 0.9995, "step": 3650 }, { "epoch": 65.0, "eval_accuracy": 0.4722222222222222, "eval_loss": 1.6247550249099731, "eval_runtime": 6.6147, "eval_samples_per_second": 272.12, "eval_steps_per_second": 8.617, "step": 3656 }, { "epoch": 65.07, "learning_rate": 1.924603174603175e-05, "loss": 0.9911, "step": 3660 }, { "epoch": 65.24, "learning_rate": 1.9146825396825398e-05, "loss": 1.0129, "step": 3670 }, { "epoch": 65.42, "learning_rate": 1.9047619047619046e-05, "loss": 0.9734, "step": 3680 }, { "epoch": 65.6, "learning_rate": 1.8948412698412698e-05, "loss": 0.9699, "step": 3690 }, { "epoch": 65.78, "learning_rate": 1.884920634920635e-05, "loss": 0.9707, "step": 3700 }, { "epoch": 65.96, "learning_rate": 1.8750000000000002e-05, "loss": 1.0374, "step": 3710 }, { "epoch": 65.99, "eval_accuracy": 0.4666666666666667, "eval_loss": 1.6644623279571533, "eval_runtime": 6.7375, "eval_samples_per_second": 267.163, "eval_steps_per_second": 8.46, "step": 3712 }, { "epoch": 66.13, "learning_rate": 1.8650793650793654e-05, "loss": 0.9862, "step": 3720 }, { "epoch": 66.31, "learning_rate": 1.8551587301587302e-05, "loss": 0.9879, "step": 3730 }, { "epoch": 66.49, "learning_rate": 1.8452380952380954e-05, "loss": 0.951, "step": 3740 }, { "epoch": 66.67, "learning_rate": 1.8353174603174602e-05, "loss": 0.991, "step": 3750 }, { "epoch": 66.84, "learning_rate": 1.8253968253968254e-05, "loss": 0.9483, "step": 3760 }, { "epoch": 66.99, "eval_accuracy": 0.46111111111111114, "eval_loss": 1.6306700706481934, "eval_runtime": 6.7855, "eval_samples_per_second": 265.273, "eval_steps_per_second": 8.4, "step": 3768 }, { "epoch": 67.02, "learning_rate": 1.8154761904761906e-05, "loss": 0.9483, "step": 3770 }, { "epoch": 67.2, "learning_rate": 1.8055555555555555e-05, "loss": 0.9179, "step": 3780 }, { "epoch": 67.38, "learning_rate": 1.7956349206349207e-05, "loss": 0.9575, "step": 3790 }, { "epoch": 67.56, "learning_rate": 1.785714285714286e-05, "loss": 0.9647, "step": 3800 }, { "epoch": 67.73, "learning_rate": 1.775793650793651e-05, "loss": 0.9526, "step": 3810 }, { "epoch": 67.91, "learning_rate": 1.7658730158730162e-05, "loss": 0.9825, "step": 3820 }, { "epoch": 68.0, "eval_accuracy": 0.4661111111111111, "eval_loss": 1.6661802530288696, "eval_runtime": 6.7756, "eval_samples_per_second": 265.661, "eval_steps_per_second": 8.413, "step": 3825 }, { "epoch": 68.09, "learning_rate": 1.755952380952381e-05, "loss": 0.9248, "step": 3830 }, { "epoch": 68.27, "learning_rate": 1.746031746031746e-05, "loss": 0.925, "step": 3840 }, { "epoch": 68.44, "learning_rate": 1.736111111111111e-05, "loss": 0.9444, "step": 3850 }, { "epoch": 68.62, "learning_rate": 1.7261904761904763e-05, "loss": 0.9719, "step": 3860 }, { "epoch": 68.8, "learning_rate": 1.7162698412698415e-05, "loss": 0.9858, "step": 3870 }, { "epoch": 68.98, "learning_rate": 1.7063492063492063e-05, "loss": 1.0023, "step": 3880 }, { "epoch": 69.0, "eval_accuracy": 0.46, "eval_loss": 1.6650328636169434, "eval_runtime": 6.9757, "eval_samples_per_second": 258.039, "eval_steps_per_second": 8.171, "step": 3881 }, { "epoch": 69.16, "learning_rate": 1.6964285714285715e-05, "loss": 0.9709, "step": 3890 }, { "epoch": 69.33, "learning_rate": 1.6865079365079367e-05, "loss": 0.9281, "step": 3900 }, { "epoch": 69.51, "learning_rate": 1.6765873015873016e-05, "loss": 0.9137, "step": 3910 }, { "epoch": 69.69, "learning_rate": 1.6666666666666667e-05, "loss": 0.9541, "step": 3920 }, { "epoch": 69.87, "learning_rate": 1.656746031746032e-05, "loss": 0.9642, "step": 3930 }, { "epoch": 69.99, "eval_accuracy": 0.4494444444444444, "eval_loss": 1.6953352689743042, "eval_runtime": 6.89, "eval_samples_per_second": 261.249, "eval_steps_per_second": 8.273, "step": 3937 }, { "epoch": 70.04, "learning_rate": 1.6468253968253968e-05, "loss": 0.978, "step": 3940 }, { "epoch": 70.22, "learning_rate": 1.636904761904762e-05, "loss": 0.9182, "step": 3950 }, { "epoch": 70.4, "learning_rate": 1.626984126984127e-05, "loss": 0.9306, "step": 3960 }, { "epoch": 70.58, "learning_rate": 1.6170634920634923e-05, "loss": 0.9004, "step": 3970 }, { "epoch": 70.76, "learning_rate": 1.6071428571428572e-05, "loss": 0.9347, "step": 3980 }, { "epoch": 70.93, "learning_rate": 1.597222222222222e-05, "loss": 0.9687, "step": 3990 }, { "epoch": 70.99, "eval_accuracy": 0.4661111111111111, "eval_loss": 1.707597017288208, "eval_runtime": 6.4216, "eval_samples_per_second": 280.303, "eval_steps_per_second": 8.876, "step": 3993 }, { "epoch": 71.11, "learning_rate": 1.5873015873015872e-05, "loss": 0.9244, "step": 4000 }, { "epoch": 71.29, "learning_rate": 1.5773809523809524e-05, "loss": 0.921, "step": 4010 }, { "epoch": 71.47, "learning_rate": 1.5674603174603176e-05, "loss": 0.9421, "step": 4020 }, { "epoch": 71.64, "learning_rate": 1.5575396825396828e-05, "loss": 0.9144, "step": 4030 }, { "epoch": 71.82, "learning_rate": 1.5476190476190476e-05, "loss": 1.0043, "step": 4040 }, { "epoch": 72.0, "learning_rate": 1.537698412698413e-05, "loss": 0.9542, "step": 4050 }, { "epoch": 72.0, "eval_accuracy": 0.46555555555555556, "eval_loss": 1.7011748552322388, "eval_runtime": 6.7739, "eval_samples_per_second": 265.725, "eval_steps_per_second": 8.415, "step": 4050 }, { "epoch": 72.18, "learning_rate": 1.527777777777778e-05, "loss": 0.9239, "step": 4060 }, { "epoch": 72.36, "learning_rate": 1.5178571428571429e-05, "loss": 0.9682, "step": 4070 }, { "epoch": 72.53, "learning_rate": 1.5079365079365079e-05, "loss": 0.942, "step": 4080 }, { "epoch": 72.71, "learning_rate": 1.498015873015873e-05, "loss": 0.9036, "step": 4090 }, { "epoch": 72.89, "learning_rate": 1.4880952380952381e-05, "loss": 0.9378, "step": 4100 }, { "epoch": 73.0, "eval_accuracy": 0.4533333333333333, "eval_loss": 1.7056083679199219, "eval_runtime": 6.5401, "eval_samples_per_second": 275.223, "eval_steps_per_second": 8.715, "step": 4106 }, { "epoch": 73.07, "learning_rate": 1.4781746031746033e-05, "loss": 0.9286, "step": 4110 }, { "epoch": 73.24, "learning_rate": 1.4682539682539683e-05, "loss": 0.9517, "step": 4120 }, { "epoch": 73.42, "learning_rate": 1.4583333333333335e-05, "loss": 0.9557, "step": 4130 }, { "epoch": 73.6, "learning_rate": 1.4484126984126987e-05, "loss": 0.9168, "step": 4140 }, { "epoch": 73.78, "learning_rate": 1.4384920634920635e-05, "loss": 0.9524, "step": 4150 }, { "epoch": 73.96, "learning_rate": 1.4285714285714285e-05, "loss": 0.9542, "step": 4160 }, { "epoch": 73.99, "eval_accuracy": 0.4572222222222222, "eval_loss": 1.7331255674362183, "eval_runtime": 6.6227, "eval_samples_per_second": 271.793, "eval_steps_per_second": 8.607, "step": 4162 }, { "epoch": 74.13, "learning_rate": 1.4186507936507937e-05, "loss": 0.9507, "step": 4170 }, { "epoch": 74.31, "learning_rate": 1.4087301587301587e-05, "loss": 0.8558, "step": 4180 }, { "epoch": 74.49, "learning_rate": 1.398809523809524e-05, "loss": 0.8986, "step": 4190 }, { "epoch": 74.67, "learning_rate": 1.388888888888889e-05, "loss": 0.9561, "step": 4200 }, { "epoch": 74.84, "learning_rate": 1.3789682539682541e-05, "loss": 0.9035, "step": 4210 }, { "epoch": 74.99, "eval_accuracy": 0.44166666666666665, "eval_loss": 1.7459461688995361, "eval_runtime": 6.917, "eval_samples_per_second": 260.229, "eval_steps_per_second": 8.241, "step": 4218 }, { "epoch": 75.02, "learning_rate": 1.3690476190476192e-05, "loss": 0.9349, "step": 4220 }, { "epoch": 75.2, "learning_rate": 1.359126984126984e-05, "loss": 0.9391, "step": 4230 }, { "epoch": 75.38, "learning_rate": 1.3492063492063492e-05, "loss": 0.9477, "step": 4240 }, { "epoch": 75.56, "learning_rate": 1.3392857142857144e-05, "loss": 0.8942, "step": 4250 }, { "epoch": 75.73, "learning_rate": 1.3293650793650794e-05, "loss": 0.9168, "step": 4260 }, { "epoch": 75.91, "learning_rate": 1.3194444444444446e-05, "loss": 0.9631, "step": 4270 }, { "epoch": 76.0, "eval_accuracy": 0.465, "eval_loss": 1.723626732826233, "eval_runtime": 6.7505, "eval_samples_per_second": 266.648, "eval_steps_per_second": 8.444, "step": 4275 }, { "epoch": 76.09, "learning_rate": 1.3095238095238096e-05, "loss": 0.9378, "step": 4280 }, { "epoch": 76.27, "learning_rate": 1.2996031746031748e-05, "loss": 0.9081, "step": 4290 }, { "epoch": 76.44, "learning_rate": 1.2896825396825398e-05, "loss": 0.9126, "step": 4300 }, { "epoch": 76.62, "learning_rate": 1.2797619047619047e-05, "loss": 0.8934, "step": 4310 }, { "epoch": 76.8, "learning_rate": 1.2698412698412699e-05, "loss": 0.9844, "step": 4320 }, { "epoch": 76.98, "learning_rate": 1.2599206349206349e-05, "loss": 0.8759, "step": 4330 }, { "epoch": 77.0, "eval_accuracy": 0.455, "eval_loss": 1.7293672561645508, "eval_runtime": 6.715, "eval_samples_per_second": 268.058, "eval_steps_per_second": 8.489, "step": 4331 }, { "epoch": 77.16, "learning_rate": 1.25e-05, "loss": 0.9142, "step": 4340 }, { "epoch": 77.33, "learning_rate": 1.2400793650793652e-05, "loss": 0.9153, "step": 4350 }, { "epoch": 77.51, "learning_rate": 1.2301587301587301e-05, "loss": 0.9225, "step": 4360 }, { "epoch": 77.69, "learning_rate": 1.2202380952380953e-05, "loss": 0.9304, "step": 4370 }, { "epoch": 77.87, "learning_rate": 1.2103174603174603e-05, "loss": 0.9218, "step": 4380 }, { "epoch": 77.99, "eval_accuracy": 0.4577777777777778, "eval_loss": 1.7653708457946777, "eval_runtime": 7.1048, "eval_samples_per_second": 253.348, "eval_steps_per_second": 8.023, "step": 4387 }, { "epoch": 78.04, "learning_rate": 1.2003968253968255e-05, "loss": 0.8532, "step": 4390 }, { "epoch": 78.22, "learning_rate": 1.1904761904761905e-05, "loss": 0.8794, "step": 4400 }, { "epoch": 78.4, "learning_rate": 1.1805555555555555e-05, "loss": 0.8853, "step": 4410 }, { "epoch": 78.58, "learning_rate": 1.1706349206349207e-05, "loss": 0.8914, "step": 4420 }, { "epoch": 78.76, "learning_rate": 1.1607142857142857e-05, "loss": 0.9161, "step": 4430 }, { "epoch": 78.93, "learning_rate": 1.1507936507936508e-05, "loss": 0.9077, "step": 4440 }, { "epoch": 78.99, "eval_accuracy": 0.45944444444444443, "eval_loss": 1.7234431505203247, "eval_runtime": 6.668, "eval_samples_per_second": 269.947, "eval_steps_per_second": 8.548, "step": 4443 }, { "epoch": 79.11, "learning_rate": 1.140873015873016e-05, "loss": 0.897, "step": 4450 }, { "epoch": 79.29, "learning_rate": 1.130952380952381e-05, "loss": 0.8687, "step": 4460 }, { "epoch": 79.47, "learning_rate": 1.1210317460317461e-05, "loss": 0.8983, "step": 4470 }, { "epoch": 79.64, "learning_rate": 1.1111111111111112e-05, "loss": 0.9047, "step": 4480 }, { "epoch": 79.82, "learning_rate": 1.1011904761904762e-05, "loss": 0.8923, "step": 4490 }, { "epoch": 80.0, "learning_rate": 1.0912698412698414e-05, "loss": 0.8924, "step": 4500 }, { "epoch": 80.0, "eval_accuracy": 0.4683333333333333, "eval_loss": 1.7255866527557373, "eval_runtime": 6.948, "eval_samples_per_second": 259.067, "eval_steps_per_second": 8.204, "step": 4500 }, { "epoch": 80.18, "learning_rate": 1.0813492063492064e-05, "loss": 0.8976, "step": 4510 }, { "epoch": 80.36, "learning_rate": 1.0714285714285714e-05, "loss": 0.8618, "step": 4520 }, { "epoch": 80.53, "learning_rate": 1.0615079365079366e-05, "loss": 0.8743, "step": 4530 }, { "epoch": 80.71, "learning_rate": 1.0515873015873016e-05, "loss": 0.9115, "step": 4540 }, { "epoch": 80.89, "learning_rate": 1.0416666666666668e-05, "loss": 0.9156, "step": 4550 }, { "epoch": 81.0, "eval_accuracy": 0.4677777777777778, "eval_loss": 1.73197603225708, "eval_runtime": 6.9362, "eval_samples_per_second": 259.509, "eval_steps_per_second": 8.218, "step": 4556 }, { "epoch": 81.07, "learning_rate": 1.0317460317460318e-05, "loss": 0.8605, "step": 4560 }, { "epoch": 81.24, "learning_rate": 1.0218253968253968e-05, "loss": 0.8848, "step": 4570 }, { "epoch": 81.42, "learning_rate": 1.011904761904762e-05, "loss": 0.875, "step": 4580 }, { "epoch": 81.6, "learning_rate": 1.001984126984127e-05, "loss": 0.896, "step": 4590 }, { "epoch": 81.78, "learning_rate": 9.92063492063492e-06, "loss": 0.8848, "step": 4600 }, { "epoch": 81.96, "learning_rate": 9.821428571428573e-06, "loss": 0.806, "step": 4610 }, { "epoch": 81.99, "eval_accuracy": 0.4661111111111111, "eval_loss": 1.734808087348938, "eval_runtime": 6.8749, "eval_samples_per_second": 261.821, "eval_steps_per_second": 8.291, "step": 4612 }, { "epoch": 82.13, "learning_rate": 9.722222222222223e-06, "loss": 0.8712, "step": 4620 }, { "epoch": 82.31, "learning_rate": 9.623015873015875e-06, "loss": 0.8963, "step": 4630 }, { "epoch": 82.49, "learning_rate": 9.523809523809523e-06, "loss": 0.871, "step": 4640 }, { "epoch": 82.67, "learning_rate": 9.424603174603175e-06, "loss": 0.854, "step": 4650 }, { "epoch": 82.84, "learning_rate": 9.325396825396827e-06, "loss": 0.8863, "step": 4660 }, { "epoch": 82.99, "eval_accuracy": 0.46055555555555555, "eval_loss": 1.7513699531555176, "eval_runtime": 6.8287, "eval_samples_per_second": 263.592, "eval_steps_per_second": 8.347, "step": 4668 }, { "epoch": 83.02, "learning_rate": 9.226190476190477e-06, "loss": 0.9394, "step": 4670 }, { "epoch": 83.2, "learning_rate": 9.126984126984127e-06, "loss": 0.887, "step": 4680 }, { "epoch": 83.38, "learning_rate": 9.027777777777777e-06, "loss": 0.8937, "step": 4690 }, { "epoch": 83.56, "learning_rate": 8.92857142857143e-06, "loss": 0.8514, "step": 4700 }, { "epoch": 83.73, "learning_rate": 8.829365079365081e-06, "loss": 0.8772, "step": 4710 }, { "epoch": 83.91, "learning_rate": 8.73015873015873e-06, "loss": 0.8698, "step": 4720 }, { "epoch": 84.0, "eval_accuracy": 0.4661111111111111, "eval_loss": 1.7483917474746704, "eval_runtime": 6.4282, "eval_samples_per_second": 280.014, "eval_steps_per_second": 8.867, "step": 4725 }, { "epoch": 84.09, "learning_rate": 8.630952380952381e-06, "loss": 0.8657, "step": 4730 }, { "epoch": 84.27, "learning_rate": 8.531746031746032e-06, "loss": 0.8555, "step": 4740 }, { "epoch": 84.44, "learning_rate": 8.432539682539684e-06, "loss": 0.8727, "step": 4750 }, { "epoch": 84.62, "learning_rate": 8.333333333333334e-06, "loss": 0.8733, "step": 4760 }, { "epoch": 84.8, "learning_rate": 8.234126984126984e-06, "loss": 0.9119, "step": 4770 }, { "epoch": 84.98, "learning_rate": 8.134920634920636e-06, "loss": 0.8623, "step": 4780 }, { "epoch": 85.0, "eval_accuracy": 0.4777777777777778, "eval_loss": 1.7420405149459839, "eval_runtime": 7.1131, "eval_samples_per_second": 253.054, "eval_steps_per_second": 8.013, "step": 4781 }, { "epoch": 85.16, "learning_rate": 8.035714285714286e-06, "loss": 0.9058, "step": 4790 }, { "epoch": 85.33, "learning_rate": 7.936507936507936e-06, "loss": 0.9022, "step": 4800 }, { "epoch": 85.51, "learning_rate": 7.837301587301588e-06, "loss": 0.8344, "step": 4810 }, { "epoch": 85.69, "learning_rate": 7.738095238095238e-06, "loss": 0.8355, "step": 4820 }, { "epoch": 85.87, "learning_rate": 7.63888888888889e-06, "loss": 0.8643, "step": 4830 }, { "epoch": 85.99, "eval_accuracy": 0.46166666666666667, "eval_loss": 1.7636218070983887, "eval_runtime": 6.5419, "eval_samples_per_second": 275.149, "eval_steps_per_second": 8.713, "step": 4837 }, { "epoch": 86.04, "learning_rate": 7.5396825396825394e-06, "loss": 0.8704, "step": 4840 }, { "epoch": 86.22, "learning_rate": 7.4404761904761905e-06, "loss": 0.8238, "step": 4850 }, { "epoch": 86.4, "learning_rate": 7.3412698412698415e-06, "loss": 0.8606, "step": 4860 }, { "epoch": 86.58, "learning_rate": 7.242063492063493e-06, "loss": 0.8792, "step": 4870 }, { "epoch": 86.76, "learning_rate": 7.142857142857143e-06, "loss": 0.8643, "step": 4880 }, { "epoch": 86.93, "learning_rate": 7.043650793650794e-06, "loss": 0.8914, "step": 4890 }, { "epoch": 86.99, "eval_accuracy": 0.465, "eval_loss": 1.7551671266555786, "eval_runtime": 6.4893, "eval_samples_per_second": 277.379, "eval_steps_per_second": 8.784, "step": 4893 }, { "epoch": 87.11, "learning_rate": 6.944444444444445e-06, "loss": 0.8279, "step": 4900 }, { "epoch": 87.29, "learning_rate": 6.845238095238096e-06, "loss": 0.8537, "step": 4910 }, { "epoch": 87.47, "learning_rate": 6.746031746031746e-06, "loss": 0.8736, "step": 4920 }, { "epoch": 87.64, "learning_rate": 6.646825396825397e-06, "loss": 0.8449, "step": 4930 }, { "epoch": 87.82, "learning_rate": 6.547619047619048e-06, "loss": 0.8282, "step": 4940 }, { "epoch": 88.0, "learning_rate": 6.448412698412699e-06, "loss": 0.837, "step": 4950 }, { "epoch": 88.0, "eval_accuracy": 0.46444444444444444, "eval_loss": 1.755152702331543, "eval_runtime": 6.8002, "eval_samples_per_second": 264.7, "eval_steps_per_second": 8.382, "step": 4950 }, { "epoch": 88.18, "learning_rate": 6.349206349206349e-06, "loss": 0.8562, "step": 4960 }, { "epoch": 88.36, "learning_rate": 6.25e-06, "loss": 0.8646, "step": 4970 }, { "epoch": 88.53, "learning_rate": 6.1507936507936505e-06, "loss": 0.8499, "step": 4980 }, { "epoch": 88.71, "learning_rate": 6.0515873015873015e-06, "loss": 0.8845, "step": 4990 }, { "epoch": 88.89, "learning_rate": 5.9523809523809525e-06, "loss": 0.8217, "step": 5000 }, { "epoch": 89.0, "eval_accuracy": 0.4638888888888889, "eval_loss": 1.7532140016555786, "eval_runtime": 7.0476, "eval_samples_per_second": 255.408, "eval_steps_per_second": 8.088, "step": 5006 }, { "epoch": 89.07, "learning_rate": 5.8531746031746036e-06, "loss": 0.8373, "step": 5010 }, { "epoch": 89.24, "learning_rate": 5.753968253968254e-06, "loss": 0.7907, "step": 5020 }, { "epoch": 89.42, "learning_rate": 5.654761904761905e-06, "loss": 0.8347, "step": 5030 }, { "epoch": 89.6, "learning_rate": 5.555555555555556e-06, "loss": 0.867, "step": 5040 }, { "epoch": 89.78, "learning_rate": 5.456349206349207e-06, "loss": 0.8816, "step": 5050 }, { "epoch": 89.96, "learning_rate": 5.357142857142857e-06, "loss": 0.8601, "step": 5060 }, { "epoch": 89.99, "eval_accuracy": 0.4683333333333333, "eval_loss": 1.7447186708450317, "eval_runtime": 6.3932, "eval_samples_per_second": 281.551, "eval_steps_per_second": 8.916, "step": 5062 }, { "epoch": 90.13, "learning_rate": 5.257936507936508e-06, "loss": 0.8323, "step": 5070 }, { "epoch": 90.31, "learning_rate": 5.158730158730159e-06, "loss": 0.8112, "step": 5080 }, { "epoch": 90.49, "learning_rate": 5.05952380952381e-06, "loss": 0.8216, "step": 5090 }, { "epoch": 90.67, "learning_rate": 4.96031746031746e-06, "loss": 0.8778, "step": 5100 }, { "epoch": 90.84, "learning_rate": 4.861111111111111e-06, "loss": 0.8293, "step": 5110 }, { "epoch": 90.99, "eval_accuracy": 0.46111111111111114, "eval_loss": 1.7622219324111938, "eval_runtime": 6.9088, "eval_samples_per_second": 260.536, "eval_steps_per_second": 8.25, "step": 5118 }, { "epoch": 91.02, "learning_rate": 4.7619047619047615e-06, "loss": 0.8845, "step": 5120 }, { "epoch": 91.2, "learning_rate": 4.662698412698413e-06, "loss": 0.8444, "step": 5130 }, { "epoch": 91.38, "learning_rate": 4.563492063492064e-06, "loss": 0.8317, "step": 5140 }, { "epoch": 91.56, "learning_rate": 4.464285714285715e-06, "loss": 0.8954, "step": 5150 }, { "epoch": 91.73, "learning_rate": 4.365079365079365e-06, "loss": 0.8456, "step": 5160 }, { "epoch": 91.91, "learning_rate": 4.265873015873016e-06, "loss": 0.8301, "step": 5170 }, { "epoch": 92.0, "eval_accuracy": 0.4633333333333333, "eval_loss": 1.7615541219711304, "eval_runtime": 6.8433, "eval_samples_per_second": 263.03, "eval_steps_per_second": 8.329, "step": 5175 }, { "epoch": 92.09, "learning_rate": 4.166666666666667e-06, "loss": 0.8093, "step": 5180 }, { "epoch": 92.27, "learning_rate": 4.067460317460318e-06, "loss": 0.777, "step": 5190 }, { "epoch": 92.44, "learning_rate": 3.968253968253968e-06, "loss": 0.8227, "step": 5200 }, { "epoch": 92.62, "learning_rate": 3.869047619047619e-06, "loss": 0.8856, "step": 5210 }, { "epoch": 92.8, "learning_rate": 3.7698412698412697e-06, "loss": 0.8843, "step": 5220 }, { "epoch": 92.98, "learning_rate": 3.6706349206349208e-06, "loss": 0.7752, "step": 5230 }, { "epoch": 93.0, "eval_accuracy": 0.4722222222222222, "eval_loss": 1.758492112159729, "eval_runtime": 6.7386, "eval_samples_per_second": 267.116, "eval_steps_per_second": 8.459, "step": 5231 }, { "epoch": 93.16, "learning_rate": 3.5714285714285714e-06, "loss": 0.8809, "step": 5240 }, { "epoch": 93.33, "learning_rate": 3.4722222222222224e-06, "loss": 0.8723, "step": 5250 }, { "epoch": 93.51, "learning_rate": 3.373015873015873e-06, "loss": 0.8171, "step": 5260 }, { "epoch": 93.69, "learning_rate": 3.273809523809524e-06, "loss": 0.8263, "step": 5270 }, { "epoch": 93.87, "learning_rate": 3.1746031746031746e-06, "loss": 0.8533, "step": 5280 }, { "epoch": 93.99, "eval_accuracy": 0.46166666666666667, "eval_loss": 1.7842094898223877, "eval_runtime": 6.567, "eval_samples_per_second": 274.097, "eval_steps_per_second": 8.68, "step": 5287 }, { "epoch": 94.04, "learning_rate": 3.0753968253968252e-06, "loss": 0.8544, "step": 5290 }, { "epoch": 94.22, "learning_rate": 2.9761904761904763e-06, "loss": 0.8108, "step": 5300 }, { "epoch": 94.4, "learning_rate": 2.876984126984127e-06, "loss": 0.8582, "step": 5310 }, { "epoch": 94.58, "learning_rate": 2.777777777777778e-06, "loss": 0.796, "step": 5320 }, { "epoch": 94.76, "learning_rate": 2.6785714285714285e-06, "loss": 0.8624, "step": 5330 }, { "epoch": 94.93, "learning_rate": 2.5793650793650795e-06, "loss": 0.8156, "step": 5340 }, { "epoch": 94.99, "eval_accuracy": 0.4622222222222222, "eval_loss": 1.7836720943450928, "eval_runtime": 7.173, "eval_samples_per_second": 250.943, "eval_steps_per_second": 7.947, "step": 5343 }, { "epoch": 95.11, "learning_rate": 2.48015873015873e-06, "loss": 0.8156, "step": 5350 }, { "epoch": 95.29, "learning_rate": 2.3809523809523808e-06, "loss": 0.8371, "step": 5360 }, { "epoch": 95.47, "learning_rate": 2.281746031746032e-06, "loss": 0.8235, "step": 5370 }, { "epoch": 95.64, "learning_rate": 2.1825396825396824e-06, "loss": 0.8466, "step": 5380 }, { "epoch": 95.82, "learning_rate": 2.0833333333333334e-06, "loss": 0.8283, "step": 5390 }, { "epoch": 96.0, "learning_rate": 1.984126984126984e-06, "loss": 0.8094, "step": 5400 }, { "epoch": 96.0, "eval_accuracy": 0.4583333333333333, "eval_loss": 1.7896106243133545, "eval_runtime": 6.5048, "eval_samples_per_second": 276.719, "eval_steps_per_second": 8.763, "step": 5400 }, { "epoch": 96.18, "learning_rate": 1.8849206349206349e-06, "loss": 0.7934, "step": 5410 }, { "epoch": 96.36, "learning_rate": 1.7857142857142857e-06, "loss": 0.8529, "step": 5420 }, { "epoch": 96.53, "learning_rate": 1.6865079365079365e-06, "loss": 0.8691, "step": 5430 }, { "epoch": 96.71, "learning_rate": 1.5873015873015873e-06, "loss": 0.8508, "step": 5440 }, { "epoch": 96.89, "learning_rate": 1.4880952380952381e-06, "loss": 0.839, "step": 5450 }, { "epoch": 97.0, "eval_accuracy": 0.465, "eval_loss": 1.7834640741348267, "eval_runtime": 6.5614, "eval_samples_per_second": 274.333, "eval_steps_per_second": 8.687, "step": 5456 }, { "epoch": 97.07, "learning_rate": 1.388888888888889e-06, "loss": 0.8844, "step": 5460 }, { "epoch": 97.24, "learning_rate": 1.2896825396825398e-06, "loss": 0.8183, "step": 5470 }, { "epoch": 97.42, "learning_rate": 1.1904761904761904e-06, "loss": 0.8597, "step": 5480 }, { "epoch": 97.6, "learning_rate": 1.0912698412698412e-06, "loss": 0.8369, "step": 5490 }, { "epoch": 97.78, "learning_rate": 9.92063492063492e-07, "loss": 0.8169, "step": 5500 }, { "epoch": 97.96, "learning_rate": 8.928571428571428e-07, "loss": 0.839, "step": 5510 }, { "epoch": 97.99, "eval_accuracy": 0.46, "eval_loss": 1.7882862091064453, "eval_runtime": 6.3877, "eval_samples_per_second": 281.793, "eval_steps_per_second": 8.923, "step": 5512 }, { "epoch": 98.13, "learning_rate": 7.936507936507937e-07, "loss": 0.8395, "step": 5520 }, { "epoch": 98.31, "learning_rate": 6.944444444444445e-07, "loss": 0.8356, "step": 5530 }, { "epoch": 98.49, "learning_rate": 5.952380952380952e-07, "loss": 0.8396, "step": 5540 }, { "epoch": 98.67, "learning_rate": 4.96031746031746e-07, "loss": 0.8194, "step": 5550 }, { "epoch": 98.84, "learning_rate": 3.9682539682539683e-07, "loss": 0.7763, "step": 5560 }, { "epoch": 98.99, "eval_accuracy": 0.45944444444444443, "eval_loss": 1.7838345766067505, "eval_runtime": 6.912, "eval_samples_per_second": 260.415, "eval_steps_per_second": 8.246, "step": 5568 }, { "epoch": 99.02, "learning_rate": 2.976190476190476e-07, "loss": 0.809, "step": 5570 }, { "epoch": 99.2, "learning_rate": 1.9841269841269841e-07, "loss": 0.8239, "step": 5580 }, { "epoch": 99.38, "learning_rate": 9.920634920634921e-08, "loss": 0.7982, "step": 5590 }, { "epoch": 99.56, "learning_rate": 0.0, "loss": 0.8186, "step": 5600 }, { "epoch": 99.56, "eval_accuracy": 0.46055555555555555, "eval_loss": 1.7836859226226807, "eval_runtime": 6.4969, "eval_samples_per_second": 277.055, "eval_steps_per_second": 8.773, "step": 5600 }, { "epoch": 99.56, "step": 5600, "total_flos": 1.782025780985856e+19, "train_loss": 1.117841152037893, "train_runtime": 7254.7208, "train_samples_per_second": 99.246, "train_steps_per_second": 0.772 } ], "logging_steps": 10, "max_steps": 5600, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 500, "total_flos": 1.782025780985856e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }