{ "best_metric": 0.3712206780910492, "best_model_checkpoint": "./outputs/checkpoint-59925", "epoch": 1200.0, "global_step": 61200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.2, "learning_rate": 4.901960784313725e-07, "loss": 1.1885, "step": 10 }, { "epoch": 0.39, "learning_rate": 9.80392156862745e-07, "loss": 1.182, "step": 20 }, { "epoch": 0.59, "learning_rate": 1.4705882352941175e-06, "loss": 1.1666, "step": 30 }, { "epoch": 0.78, "learning_rate": 1.96078431372549e-06, "loss": 1.1481, "step": 40 }, { "epoch": 0.98, "learning_rate": 2.450980392156863e-06, "loss": 1.1257, "step": 50 }, { "epoch": 1.0, "eval_loss": 1.1119023561477661, "eval_runtime": 2.2391, "eval_samples_per_second": 1017.827, "eval_steps_per_second": 4.02, "step": 51 }, { "epoch": 1.18, "learning_rate": 2.941176470588235e-06, "loss": 1.1055, "step": 60 }, { "epoch": 1.37, "learning_rate": 3.4313725490196073e-06, "loss": 1.0888, "step": 70 }, { "epoch": 1.57, "learning_rate": 3.92156862745098e-06, "loss": 1.0737, "step": 80 }, { "epoch": 1.76, "learning_rate": 4.4117647058823526e-06, "loss": 1.0625, "step": 90 }, { "epoch": 1.96, "learning_rate": 4.901960784313726e-06, "loss": 1.0507, "step": 100 }, { "epoch": 2.0, "eval_loss": 1.0434480905532837, "eval_runtime": 2.0694, "eval_samples_per_second": 1101.261, "eval_steps_per_second": 4.349, "step": 102 }, { "epoch": 2.16, "learning_rate": 5.392156862745097e-06, "loss": 1.0403, "step": 110 }, { "epoch": 2.35, "learning_rate": 5.88235294117647e-06, "loss": 1.0303, "step": 120 }, { "epoch": 2.55, "learning_rate": 6.372549019607843e-06, "loss": 1.0213, "step": 130 }, { "epoch": 2.75, "learning_rate": 6.8627450980392145e-06, "loss": 1.013, "step": 140 }, { "epoch": 2.94, "learning_rate": 7.352941176470588e-06, "loss": 1.0046, "step": 150 }, { "epoch": 3.0, "eval_loss": 0.9987890720367432, "eval_runtime": 2.0662, "eval_samples_per_second": 1103.006, "eval_steps_per_second": 4.356, "step": 153 }, { "epoch": 3.14, "learning_rate": 7.84313725490196e-06, "loss": 0.9976, "step": 160 }, { "epoch": 3.33, "learning_rate": 8.333333333333332e-06, "loss": 0.9917, "step": 170 }, { "epoch": 3.53, "learning_rate": 8.823529411764705e-06, "loss": 0.9867, "step": 180 }, { "epoch": 3.73, "learning_rate": 9.313725490196078e-06, "loss": 0.9803, "step": 190 }, { "epoch": 3.92, "learning_rate": 9.803921568627451e-06, "loss": 0.9761, "step": 200 }, { "epoch": 4.0, "eval_loss": 0.9724870920181274, "eval_runtime": 2.0693, "eval_samples_per_second": 1101.346, "eval_steps_per_second": 4.349, "step": 204 }, { "epoch": 4.12, "learning_rate": 1.0294117647058824e-05, "loss": 0.9732, "step": 210 }, { "epoch": 4.31, "learning_rate": 1.0784313725490194e-05, "loss": 0.969, "step": 220 }, { "epoch": 4.51, "learning_rate": 1.1274509803921567e-05, "loss": 0.9652, "step": 230 }, { "epoch": 4.71, "learning_rate": 1.176470588235294e-05, "loss": 0.9613, "step": 240 }, { "epoch": 4.9, "learning_rate": 1.2254901960784313e-05, "loss": 0.9572, "step": 250 }, { "epoch": 5.0, "eval_loss": 0.952890157699585, "eval_runtime": 2.0561, "eval_samples_per_second": 1108.418, "eval_steps_per_second": 4.377, "step": 255 }, { "epoch": 5.1, "learning_rate": 1.2745098039215686e-05, "loss": 0.9535, "step": 260 }, { "epoch": 5.29, "learning_rate": 1.323529411764706e-05, "loss": 0.9486, "step": 270 }, { "epoch": 5.49, "learning_rate": 1.3725490196078429e-05, "loss": 0.9442, "step": 280 }, { "epoch": 5.69, "learning_rate": 1.4215686274509802e-05, "loss": 0.9397, "step": 290 }, { "epoch": 5.88, "learning_rate": 1.4705882352941175e-05, "loss": 0.9357, "step": 300 }, { "epoch": 6.0, "eval_loss": 0.9303520917892456, "eval_runtime": 2.0452, "eval_samples_per_second": 1114.339, "eval_steps_per_second": 4.401, "step": 306 }, { "epoch": 6.08, "learning_rate": 1.5196078431372548e-05, "loss": 0.933, "step": 310 }, { "epoch": 6.27, "learning_rate": 1.568627450980392e-05, "loss": 0.9253, "step": 320 }, { "epoch": 6.47, "learning_rate": 1.6176470588235293e-05, "loss": 0.9209, "step": 330 }, { "epoch": 6.67, "learning_rate": 1.6666666666666664e-05, "loss": 0.9187, "step": 340 }, { "epoch": 6.86, "learning_rate": 1.7156862745098035e-05, "loss": 0.9128, "step": 350 }, { "epoch": 7.0, "eval_loss": 0.9099854826927185, "eval_runtime": 2.1224, "eval_samples_per_second": 1073.784, "eval_steps_per_second": 4.24, "step": 357 }, { "epoch": 7.06, "learning_rate": 1.764705882352941e-05, "loss": 0.9093, "step": 360 }, { "epoch": 7.25, "learning_rate": 1.813725490196078e-05, "loss": 0.9083, "step": 370 }, { "epoch": 7.45, "learning_rate": 1.8627450980392156e-05, "loss": 0.9076, "step": 380 }, { "epoch": 7.65, "learning_rate": 1.9117647058823528e-05, "loss": 0.9058, "step": 390 }, { "epoch": 7.84, "learning_rate": 1.9607843137254903e-05, "loss": 0.9037, "step": 400 }, { "epoch": 8.0, "eval_loss": 0.9003704786300659, "eval_runtime": 2.1145, "eval_samples_per_second": 1077.8, "eval_steps_per_second": 4.256, "step": 408 }, { "epoch": 8.04, "learning_rate": 2.009803921568627e-05, "loss": 0.9048, "step": 410 }, { "epoch": 8.24, "learning_rate": 2.058823529411765e-05, "loss": 0.8982, "step": 420 }, { "epoch": 8.43, "learning_rate": 2.1078431372549017e-05, "loss": 0.8997, "step": 430 }, { "epoch": 8.63, "learning_rate": 2.1568627450980388e-05, "loss": 0.8968, "step": 440 }, { "epoch": 8.82, "learning_rate": 2.2058823529411763e-05, "loss": 0.8984, "step": 450 }, { "epoch": 9.0, "eval_loss": 0.8941003084182739, "eval_runtime": 2.0854, "eval_samples_per_second": 1092.816, "eval_steps_per_second": 4.316, "step": 459 }, { "epoch": 9.02, "learning_rate": 2.2549019607843134e-05, "loss": 0.8978, "step": 460 }, { "epoch": 9.22, "learning_rate": 2.303921568627451e-05, "loss": 0.8946, "step": 470 }, { "epoch": 9.41, "learning_rate": 2.352941176470588e-05, "loss": 0.8958, "step": 480 }, { "epoch": 9.61, "learning_rate": 2.401960784313725e-05, "loss": 0.8906, "step": 490 }, { "epoch": 9.8, "learning_rate": 2.4509803921568626e-05, "loss": 0.8909, "step": 500 }, { "epoch": 10.0, "learning_rate": 2.4999999999999998e-05, "loss": 0.8904, "step": 510 }, { "epoch": 10.0, "eval_loss": 0.8895702958106995, "eval_runtime": 2.056, "eval_samples_per_second": 1108.47, "eval_steps_per_second": 4.377, "step": 510 }, { "epoch": 10.2, "learning_rate": 2.5490196078431373e-05, "loss": 0.8899, "step": 520 }, { "epoch": 10.39, "learning_rate": 2.598039215686274e-05, "loss": 0.8879, "step": 530 }, { "epoch": 10.59, "learning_rate": 2.647058823529412e-05, "loss": 0.8879, "step": 540 }, { "epoch": 10.78, "learning_rate": 2.6960784313725487e-05, "loss": 0.885, "step": 550 }, { "epoch": 10.98, "learning_rate": 2.7450980392156858e-05, "loss": 0.8846, "step": 560 }, { "epoch": 11.0, "eval_loss": 0.8801982998847961, "eval_runtime": 2.1061, "eval_samples_per_second": 1082.111, "eval_steps_per_second": 4.273, "step": 561 }, { "epoch": 11.18, "learning_rate": 2.7941176470588233e-05, "loss": 0.8806, "step": 570 }, { "epoch": 11.37, "learning_rate": 2.8431372549019604e-05, "loss": 0.8803, "step": 580 }, { "epoch": 11.57, "learning_rate": 2.892156862745098e-05, "loss": 0.8801, "step": 590 }, { "epoch": 11.76, "learning_rate": 2.941176470588235e-05, "loss": 0.8795, "step": 600 }, { "epoch": 11.96, "learning_rate": 2.9901960784313725e-05, "loss": 0.8748, "step": 610 }, { "epoch": 12.0, "eval_loss": 0.8775041699409485, "eval_runtime": 2.1281, "eval_samples_per_second": 1070.893, "eval_steps_per_second": 4.229, "step": 612 }, { "epoch": 12.16, "learning_rate": 3.0392156862745097e-05, "loss": 0.8723, "step": 620 }, { "epoch": 12.35, "learning_rate": 3.088235294117647e-05, "loss": 0.8733, "step": 630 }, { "epoch": 12.55, "learning_rate": 3.137254901960784e-05, "loss": 0.8739, "step": 640 }, { "epoch": 12.75, "learning_rate": 3.186274509803921e-05, "loss": 0.8722, "step": 650 }, { "epoch": 12.94, "learning_rate": 3.2352941176470585e-05, "loss": 0.8692, "step": 660 }, { "epoch": 13.0, "eval_loss": 0.8685004115104675, "eval_runtime": 2.1995, "eval_samples_per_second": 1036.155, "eval_steps_per_second": 4.092, "step": 663 }, { "epoch": 13.14, "learning_rate": 3.284313725490196e-05, "loss": 0.8681, "step": 670 }, { "epoch": 13.33, "learning_rate": 3.333333333333333e-05, "loss": 0.865, "step": 680 }, { "epoch": 13.53, "learning_rate": 3.38235294117647e-05, "loss": 0.8655, "step": 690 }, { "epoch": 13.73, "learning_rate": 3.431372549019607e-05, "loss": 0.8673, "step": 700 }, { "epoch": 13.92, "learning_rate": 3.480392156862745e-05, "loss": 0.8656, "step": 710 }, { "epoch": 14.0, "eval_loss": 0.866548478603363, "eval_runtime": 2.0997, "eval_samples_per_second": 1085.376, "eval_steps_per_second": 4.286, "step": 714 }, { "epoch": 14.12, "learning_rate": 3.529411764705882e-05, "loss": 0.8644, "step": 720 }, { "epoch": 14.31, "learning_rate": 3.5784313725490195e-05, "loss": 0.8649, "step": 730 }, { "epoch": 14.51, "learning_rate": 3.627450980392156e-05, "loss": 0.8648, "step": 740 }, { "epoch": 14.71, "learning_rate": 3.676470588235294e-05, "loss": 0.8614, "step": 750 }, { "epoch": 14.9, "learning_rate": 3.725490196078431e-05, "loss": 0.8634, "step": 760 }, { "epoch": 15.0, "eval_loss": 0.8607373833656311, "eval_runtime": 2.2131, "eval_samples_per_second": 1029.772, "eval_steps_per_second": 4.067, "step": 765 }, { "epoch": 15.1, "learning_rate": 3.774509803921568e-05, "loss": 0.8596, "step": 770 }, { "epoch": 15.29, "learning_rate": 3.8235294117647055e-05, "loss": 0.861, "step": 780 }, { "epoch": 15.49, "learning_rate": 3.872549019607843e-05, "loss": 0.8584, "step": 790 }, { "epoch": 15.69, "learning_rate": 3.9215686274509805e-05, "loss": 0.8613, "step": 800 }, { "epoch": 15.88, "learning_rate": 3.970588235294117e-05, "loss": 0.8565, "step": 810 }, { "epoch": 16.0, "eval_loss": 0.8560643196105957, "eval_runtime": 2.0515, "eval_samples_per_second": 1110.887, "eval_steps_per_second": 4.387, "step": 816 }, { "epoch": 16.08, "learning_rate": 4.019607843137254e-05, "loss": 0.8546, "step": 820 }, { "epoch": 16.27, "learning_rate": 4.0686274509803916e-05, "loss": 0.8574, "step": 830 }, { "epoch": 16.47, "learning_rate": 4.11764705882353e-05, "loss": 0.8572, "step": 840 }, { "epoch": 16.67, "learning_rate": 4.1666666666666665e-05, "loss": 0.8582, "step": 850 }, { "epoch": 16.86, "learning_rate": 4.215686274509803e-05, "loss": 0.8555, "step": 860 }, { "epoch": 17.0, "eval_loss": 0.8547664880752563, "eval_runtime": 2.1626, "eval_samples_per_second": 1053.802, "eval_steps_per_second": 4.162, "step": 867 }, { "epoch": 17.06, "learning_rate": 4.264705882352941e-05, "loss": 0.8562, "step": 870 }, { "epoch": 17.25, "learning_rate": 4.3137254901960776e-05, "loss": 0.8546, "step": 880 }, { "epoch": 17.45, "learning_rate": 4.362745098039216e-05, "loss": 0.8538, "step": 890 }, { "epoch": 17.65, "learning_rate": 4.4117647058823526e-05, "loss": 0.8529, "step": 900 }, { "epoch": 17.84, "learning_rate": 4.46078431372549e-05, "loss": 0.8521, "step": 910 }, { "epoch": 18.0, "eval_loss": 0.8463531732559204, "eval_runtime": 2.1408, "eval_samples_per_second": 1064.533, "eval_steps_per_second": 4.204, "step": 918 }, { "epoch": 18.04, "learning_rate": 4.509803921568627e-05, "loss": 0.8503, "step": 920 }, { "epoch": 18.24, "learning_rate": 4.5588235294117636e-05, "loss": 0.849, "step": 930 }, { "epoch": 18.43, "learning_rate": 4.607843137254902e-05, "loss": 0.8514, "step": 940 }, { "epoch": 18.63, "learning_rate": 4.6568627450980386e-05, "loss": 0.8518, "step": 950 }, { "epoch": 18.82, "learning_rate": 4.705882352941176e-05, "loss": 0.8478, "step": 960 }, { "epoch": 19.0, "eval_loss": 0.8448628783226013, "eval_runtime": 2.1443, "eval_samples_per_second": 1062.799, "eval_steps_per_second": 4.197, "step": 969 }, { "epoch": 19.02, "learning_rate": 4.754901960784313e-05, "loss": 0.8473, "step": 970 }, { "epoch": 19.22, "learning_rate": 4.80392156862745e-05, "loss": 0.8483, "step": 980 }, { "epoch": 19.41, "learning_rate": 4.852941176470588e-05, "loss": 0.8437, "step": 990 }, { "epoch": 19.61, "learning_rate": 4.901960784313725e-05, "loss": 0.8462, "step": 1000 }, { "epoch": 19.8, "learning_rate": 4.950980392156862e-05, "loss": 0.8491, "step": 1010 }, { "epoch": 20.0, "learning_rate": 4.9999999999999996e-05, "loss": 0.847, "step": 1020 }, { "epoch": 20.0, "eval_loss": 0.8455402255058289, "eval_runtime": 2.0671, "eval_samples_per_second": 1102.533, "eval_steps_per_second": 4.354, "step": 1020 }, { "epoch": 20.2, "learning_rate": 5.049019607843137e-05, "loss": 0.8424, "step": 1030 }, { "epoch": 20.39, "learning_rate": 5.0980392156862745e-05, "loss": 0.8427, "step": 1040 }, { "epoch": 20.59, "learning_rate": 5.147058823529411e-05, "loss": 0.8415, "step": 1050 }, { "epoch": 20.78, "learning_rate": 5.196078431372548e-05, "loss": 0.8443, "step": 1060 }, { "epoch": 20.98, "learning_rate": 5.2450980392156856e-05, "loss": 0.842, "step": 1070 }, { "epoch": 21.0, "eval_loss": 0.8377746939659119, "eval_runtime": 2.1007, "eval_samples_per_second": 1084.901, "eval_steps_per_second": 4.284, "step": 1071 }, { "epoch": 21.18, "learning_rate": 5.294117647058824e-05, "loss": 0.836, "step": 1080 }, { "epoch": 21.37, "learning_rate": 5.3431372549019605e-05, "loss": 0.8397, "step": 1090 }, { "epoch": 21.57, "learning_rate": 5.3921568627450973e-05, "loss": 0.8357, "step": 1100 }, { "epoch": 21.76, "learning_rate": 5.441176470588235e-05, "loss": 0.8377, "step": 1110 }, { "epoch": 21.96, "learning_rate": 5.4901960784313716e-05, "loss": 0.8385, "step": 1120 }, { "epoch": 22.0, "eval_loss": 0.8358024954795837, "eval_runtime": 2.1023, "eval_samples_per_second": 1084.065, "eval_steps_per_second": 4.281, "step": 1122 }, { "epoch": 22.16, "learning_rate": 5.53921568627451e-05, "loss": 0.836, "step": 1130 }, { "epoch": 22.35, "learning_rate": 5.5882352941176466e-05, "loss": 0.8319, "step": 1140 }, { "epoch": 22.55, "learning_rate": 5.637254901960784e-05, "loss": 0.8307, "step": 1150 }, { "epoch": 22.75, "learning_rate": 5.686274509803921e-05, "loss": 0.8343, "step": 1160 }, { "epoch": 22.94, "learning_rate": 5.7352941176470576e-05, "loss": 0.8319, "step": 1170 }, { "epoch": 23.0, "eval_loss": 0.8331688046455383, "eval_runtime": 2.1797, "eval_samples_per_second": 1045.554, "eval_steps_per_second": 4.129, "step": 1173 }, { "epoch": 23.14, "learning_rate": 5.784313725490196e-05, "loss": 0.8339, "step": 1180 }, { "epoch": 23.33, "learning_rate": 5.8333333333333326e-05, "loss": 0.8361, "step": 1190 }, { "epoch": 23.53, "learning_rate": 5.88235294117647e-05, "loss": 0.8297, "step": 1200 }, { "epoch": 23.73, "learning_rate": 5.931372549019607e-05, "loss": 0.83, "step": 1210 }, { "epoch": 23.92, "learning_rate": 5.980392156862745e-05, "loss": 0.8267, "step": 1220 }, { "epoch": 24.0, "eval_loss": 0.8347041606903076, "eval_runtime": 2.0517, "eval_samples_per_second": 1110.806, "eval_steps_per_second": 4.387, "step": 1224 }, { "epoch": 24.12, "learning_rate": 6.029411764705882e-05, "loss": 0.8316, "step": 1230 }, { "epoch": 24.31, "learning_rate": 6.078431372549019e-05, "loss": 0.825, "step": 1240 }, { "epoch": 24.51, "learning_rate": 6.127450980392157e-05, "loss": 0.8269, "step": 1250 }, { "epoch": 24.71, "learning_rate": 6.176470588235294e-05, "loss": 0.8267, "step": 1260 }, { "epoch": 24.9, "learning_rate": 6.225490196078432e-05, "loss": 0.8266, "step": 1270 }, { "epoch": 25.0, "eval_loss": 0.8246671557426453, "eval_runtime": 2.0855, "eval_samples_per_second": 1092.759, "eval_steps_per_second": 4.315, "step": 1275 }, { "epoch": 25.1, "learning_rate": 6.274509803921569e-05, "loss": 0.8247, "step": 1280 }, { "epoch": 25.29, "learning_rate": 6.323529411764705e-05, "loss": 0.8223, "step": 1290 }, { "epoch": 25.49, "learning_rate": 6.372549019607842e-05, "loss": 0.822, "step": 1300 }, { "epoch": 25.69, "learning_rate": 6.421568627450979e-05, "loss": 0.8234, "step": 1310 }, { "epoch": 25.88, "learning_rate": 6.470588235294117e-05, "loss": 0.8242, "step": 1320 }, { "epoch": 26.0, "eval_loss": 0.8241580128669739, "eval_runtime": 2.044, "eval_samples_per_second": 1114.944, "eval_steps_per_second": 4.403, "step": 1326 }, { "epoch": 26.08, "learning_rate": 6.519607843137254e-05, "loss": 0.8246, "step": 1330 }, { "epoch": 26.27, "learning_rate": 6.568627450980392e-05, "loss": 0.8212, "step": 1340 }, { "epoch": 26.47, "learning_rate": 6.617647058823529e-05, "loss": 0.8264, "step": 1350 }, { "epoch": 26.67, "learning_rate": 6.666666666666666e-05, "loss": 0.8215, "step": 1360 }, { "epoch": 26.86, "learning_rate": 6.715686274509804e-05, "loss": 0.8215, "step": 1370 }, { "epoch": 27.0, "eval_loss": 0.8191553354263306, "eval_runtime": 2.1872, "eval_samples_per_second": 1041.969, "eval_steps_per_second": 4.115, "step": 1377 }, { "epoch": 27.06, "learning_rate": 6.76470588235294e-05, "loss": 0.8234, "step": 1380 }, { "epoch": 27.25, "learning_rate": 6.813725490196077e-05, "loss": 0.8182, "step": 1390 }, { "epoch": 27.45, "learning_rate": 6.862745098039214e-05, "loss": 0.8173, "step": 1400 }, { "epoch": 27.65, "learning_rate": 6.911764705882352e-05, "loss": 0.816, "step": 1410 }, { "epoch": 27.84, "learning_rate": 6.96078431372549e-05, "loss": 0.8171, "step": 1420 }, { "epoch": 28.0, "eval_loss": 0.8213248252868652, "eval_runtime": 2.2192, "eval_samples_per_second": 1026.926, "eval_steps_per_second": 4.055, "step": 1428 }, { "epoch": 28.04, "learning_rate": 7.009803921568627e-05, "loss": 0.8199, "step": 1430 }, { "epoch": 28.24, "learning_rate": 7.058823529411764e-05, "loss": 0.8202, "step": 1440 }, { "epoch": 28.43, "learning_rate": 7.107843137254901e-05, "loss": 0.8149, "step": 1450 }, { "epoch": 28.63, "learning_rate": 7.156862745098039e-05, "loss": 0.8163, "step": 1460 }, { "epoch": 28.82, "learning_rate": 7.205882352941176e-05, "loss": 0.8176, "step": 1470 }, { "epoch": 29.0, "eval_loss": 0.8160317540168762, "eval_runtime": 2.1369, "eval_samples_per_second": 1066.475, "eval_steps_per_second": 4.212, "step": 1479 }, { "epoch": 29.02, "learning_rate": 7.254901960784313e-05, "loss": 0.8171, "step": 1480 }, { "epoch": 29.22, "learning_rate": 7.303921568627451e-05, "loss": 0.8171, "step": 1490 }, { "epoch": 29.41, "learning_rate": 7.352941176470588e-05, "loss": 0.8127, "step": 1500 }, { "epoch": 29.61, "learning_rate": 7.401960784313726e-05, "loss": 0.8192, "step": 1510 }, { "epoch": 29.8, "learning_rate": 7.450980392156863e-05, "loss": 0.8142, "step": 1520 }, { "epoch": 30.0, "learning_rate": 7.5e-05, "loss": 0.8122, "step": 1530 }, { "epoch": 30.0, "eval_loss": 0.8127588629722595, "eval_runtime": 2.1677, "eval_samples_per_second": 1051.341, "eval_steps_per_second": 4.152, "step": 1530 }, { "epoch": 30.2, "learning_rate": 7.549019607843136e-05, "loss": 0.8164, "step": 1540 }, { "epoch": 30.39, "learning_rate": 7.598039215686273e-05, "loss": 0.8143, "step": 1550 }, { "epoch": 30.59, "learning_rate": 7.647058823529411e-05, "loss": 0.8113, "step": 1560 }, { "epoch": 30.78, "learning_rate": 7.696078431372548e-05, "loss": 0.8075, "step": 1570 }, { "epoch": 30.98, "learning_rate": 7.745098039215686e-05, "loss": 0.8107, "step": 1580 }, { "epoch": 31.0, "eval_loss": 0.803588330745697, "eval_runtime": 2.1888, "eval_samples_per_second": 1041.189, "eval_steps_per_second": 4.112, "step": 1581 }, { "epoch": 31.18, "learning_rate": 7.794117647058823e-05, "loss": 0.8038, "step": 1590 }, { "epoch": 31.37, "learning_rate": 7.843137254901961e-05, "loss": 0.8105, "step": 1600 }, { "epoch": 31.57, "learning_rate": 7.892156862745098e-05, "loss": 0.8057, "step": 1610 }, { "epoch": 31.76, "learning_rate": 7.941176470588235e-05, "loss": 0.8085, "step": 1620 }, { "epoch": 31.96, "learning_rate": 7.990196078431371e-05, "loss": 0.8069, "step": 1630 }, { "epoch": 32.0, "eval_loss": 0.8068580031394958, "eval_runtime": 2.182, "eval_samples_per_second": 1044.445, "eval_steps_per_second": 4.125, "step": 1632 }, { "epoch": 32.16, "learning_rate": 8.039215686274508e-05, "loss": 0.8078, "step": 1640 }, { "epoch": 32.35, "learning_rate": 8.088235294117646e-05, "loss": 0.8101, "step": 1650 }, { "epoch": 32.55, "learning_rate": 8.137254901960783e-05, "loss": 0.8126, "step": 1660 }, { "epoch": 32.75, "learning_rate": 8.18627450980392e-05, "loss": 0.8108, "step": 1670 }, { "epoch": 32.94, "learning_rate": 8.23529411764706e-05, "loss": 0.8081, "step": 1680 }, { "epoch": 33.0, "eval_loss": 0.8022666573524475, "eval_runtime": 2.2411, "eval_samples_per_second": 1016.898, "eval_steps_per_second": 4.016, "step": 1683 }, { "epoch": 33.14, "learning_rate": 8.284313725490196e-05, "loss": 0.8091, "step": 1690 }, { "epoch": 33.33, "learning_rate": 8.333333333333333e-05, "loss": 0.8027, "step": 1700 }, { "epoch": 33.53, "learning_rate": 8.38235294117647e-05, "loss": 0.8029, "step": 1710 }, { "epoch": 33.73, "learning_rate": 8.431372549019607e-05, "loss": 0.8044, "step": 1720 }, { "epoch": 33.92, "learning_rate": 8.480392156862745e-05, "loss": 0.8043, "step": 1730 }, { "epoch": 34.0, "eval_loss": 0.8047605752944946, "eval_runtime": 2.0682, "eval_samples_per_second": 1101.943, "eval_steps_per_second": 4.352, "step": 1734 }, { "epoch": 34.12, "learning_rate": 8.529411764705882e-05, "loss": 0.8112, "step": 1740 }, { "epoch": 34.31, "learning_rate": 8.578431372549018e-05, "loss": 0.8091, "step": 1750 }, { "epoch": 34.51, "learning_rate": 8.627450980392155e-05, "loss": 0.8104, "step": 1760 }, { "epoch": 34.71, "learning_rate": 8.676470588235295e-05, "loss": 0.8062, "step": 1770 }, { "epoch": 34.9, "learning_rate": 8.725490196078432e-05, "loss": 0.8071, "step": 1780 }, { "epoch": 35.0, "eval_loss": 0.8082063794136047, "eval_runtime": 2.0463, "eval_samples_per_second": 1113.701, "eval_steps_per_second": 4.398, "step": 1785 }, { "epoch": 35.1, "learning_rate": 8.774509803921568e-05, "loss": 0.8066, "step": 1790 }, { "epoch": 35.29, "learning_rate": 8.823529411764705e-05, "loss": 0.8041, "step": 1800 }, { "epoch": 35.49, "learning_rate": 8.872549019607842e-05, "loss": 0.8009, "step": 1810 }, { "epoch": 35.69, "learning_rate": 8.92156862745098e-05, "loss": 0.7993, "step": 1820 }, { "epoch": 35.88, "learning_rate": 8.970588235294117e-05, "loss": 0.8017, "step": 1830 }, { "epoch": 36.0, "eval_loss": 0.7971303462982178, "eval_runtime": 2.1621, "eval_samples_per_second": 1054.087, "eval_steps_per_second": 4.163, "step": 1836 }, { "epoch": 36.08, "learning_rate": 9.019607843137254e-05, "loss": 0.7994, "step": 1840 }, { "epoch": 36.27, "learning_rate": 9.06862745098039e-05, "loss": 0.7953, "step": 1850 }, { "epoch": 36.47, "learning_rate": 9.117647058823527e-05, "loss": 0.8009, "step": 1860 }, { "epoch": 36.67, "learning_rate": 9.166666666666667e-05, "loss": 0.7986, "step": 1870 }, { "epoch": 36.86, "learning_rate": 9.215686274509804e-05, "loss": 0.7965, "step": 1880 }, { "epoch": 37.0, "eval_loss": 0.7953115105628967, "eval_runtime": 2.096, "eval_samples_per_second": 1087.315, "eval_steps_per_second": 4.294, "step": 1887 }, { "epoch": 37.06, "learning_rate": 9.26470588235294e-05, "loss": 0.7974, "step": 1890 }, { "epoch": 37.25, "learning_rate": 9.313725490196077e-05, "loss": 0.7956, "step": 1900 }, { "epoch": 37.45, "learning_rate": 9.362745098039215e-05, "loss": 0.7954, "step": 1910 }, { "epoch": 37.65, "learning_rate": 9.411764705882352e-05, "loss": 0.7934, "step": 1920 }, { "epoch": 37.84, "learning_rate": 9.460784313725489e-05, "loss": 0.7953, "step": 1930 }, { "epoch": 38.0, "eval_loss": 0.8111655712127686, "eval_runtime": 2.1785, "eval_samples_per_second": 1046.146, "eval_steps_per_second": 4.131, "step": 1938 }, { "epoch": 38.04, "learning_rate": 9.509803921568626e-05, "loss": 0.7976, "step": 1940 }, { "epoch": 38.24, "learning_rate": 9.558823529411764e-05, "loss": 0.8007, "step": 1950 }, { "epoch": 38.43, "learning_rate": 9.6078431372549e-05, "loss": 0.8014, "step": 1960 }, { "epoch": 38.63, "learning_rate": 9.656862745098039e-05, "loss": 0.7939, "step": 1970 }, { "epoch": 38.82, "learning_rate": 9.705882352941176e-05, "loss": 0.7979, "step": 1980 }, { "epoch": 39.0, "eval_loss": 0.7954539656639099, "eval_runtime": 2.1921, "eval_samples_per_second": 1039.635, "eval_steps_per_second": 4.106, "step": 1989 }, { "epoch": 39.02, "learning_rate": 9.754901960784314e-05, "loss": 0.7966, "step": 1990 }, { "epoch": 39.22, "learning_rate": 9.80392156862745e-05, "loss": 0.792, "step": 2000 }, { "epoch": 39.41, "learning_rate": 9.852941176470587e-05, "loss": 0.7913, "step": 2010 }, { "epoch": 39.61, "learning_rate": 9.901960784313724e-05, "loss": 0.7892, "step": 2020 }, { "epoch": 39.8, "learning_rate": 9.950980392156861e-05, "loss": 0.7892, "step": 2030 }, { "epoch": 40.0, "learning_rate": 9.999999999999999e-05, "loss": 0.7887, "step": 2040 }, { "epoch": 40.0, "eval_loss": 0.7966196537017822, "eval_runtime": 2.1656, "eval_samples_per_second": 1052.34, "eval_steps_per_second": 4.156, "step": 2040 }, { "epoch": 40.2, "learning_rate": 0.00010049019607843136, "loss": 0.7982, "step": 2050 }, { "epoch": 40.39, "learning_rate": 0.00010098039215686274, "loss": 0.7965, "step": 2060 }, { "epoch": 40.59, "learning_rate": 0.00010147058823529411, "loss": 0.7897, "step": 2070 }, { "epoch": 40.78, "learning_rate": 0.00010196078431372549, "loss": 0.7892, "step": 2080 }, { "epoch": 40.98, "learning_rate": 0.00010245098039215686, "loss": 0.7866, "step": 2090 }, { "epoch": 41.0, "eval_loss": 0.7878917455673218, "eval_runtime": 2.1957, "eval_samples_per_second": 1037.957, "eval_steps_per_second": 4.099, "step": 2091 }, { "epoch": 41.18, "learning_rate": 0.00010294117647058823, "loss": 0.7895, "step": 2100 }, { "epoch": 41.37, "learning_rate": 0.0001034313725490196, "loss": 0.7871, "step": 2110 }, { "epoch": 41.57, "learning_rate": 0.00010392156862745096, "loss": 0.785, "step": 2120 }, { "epoch": 41.76, "learning_rate": 0.00010441176470588234, "loss": 0.7827, "step": 2130 }, { "epoch": 41.96, "learning_rate": 0.00010490196078431371, "loss": 0.7862, "step": 2140 }, { "epoch": 42.0, "eval_loss": 0.7828369736671448, "eval_runtime": 2.2171, "eval_samples_per_second": 1027.911, "eval_steps_per_second": 4.059, "step": 2142 }, { "epoch": 42.16, "learning_rate": 0.00010539215686274508, "loss": 0.7855, "step": 2150 }, { "epoch": 42.35, "learning_rate": 0.00010588235294117647, "loss": 0.785, "step": 2160 }, { "epoch": 42.55, "learning_rate": 0.00010637254901960784, "loss": 0.7865, "step": 2170 }, { "epoch": 42.75, "learning_rate": 0.00010686274509803921, "loss": 0.7823, "step": 2180 }, { "epoch": 42.94, "learning_rate": 0.00010735294117647058, "loss": 0.7836, "step": 2190 }, { "epoch": 43.0, "eval_loss": 0.7864591479301453, "eval_runtime": 2.1233, "eval_samples_per_second": 1073.307, "eval_steps_per_second": 4.239, "step": 2193 }, { "epoch": 43.14, "learning_rate": 0.00010784313725490195, "loss": 0.7888, "step": 2200 }, { "epoch": 43.33, "learning_rate": 0.00010833333333333333, "loss": 0.7871, "step": 2210 }, { "epoch": 43.53, "learning_rate": 0.0001088235294117647, "loss": 0.7861, "step": 2220 }, { "epoch": 43.73, "learning_rate": 0.00010931372549019606, "loss": 0.7843, "step": 2230 }, { "epoch": 43.92, "learning_rate": 0.00010980392156862743, "loss": 0.7851, "step": 2240 }, { "epoch": 44.0, "eval_loss": 0.7829829454421997, "eval_runtime": 2.1348, "eval_samples_per_second": 1067.557, "eval_steps_per_second": 4.216, "step": 2244 }, { "epoch": 44.12, "learning_rate": 0.00011029411764705883, "loss": 0.784, "step": 2250 }, { "epoch": 44.31, "learning_rate": 0.0001107843137254902, "loss": 0.784, "step": 2260 }, { "epoch": 44.51, "learning_rate": 0.00011127450980392156, "loss": 0.785, "step": 2270 }, { "epoch": 44.71, "learning_rate": 0.00011176470588235293, "loss": 0.7807, "step": 2280 }, { "epoch": 44.9, "learning_rate": 0.0001122549019607843, "loss": 0.7813, "step": 2290 }, { "epoch": 45.0, "eval_loss": 0.7840399146080017, "eval_runtime": 2.1519, "eval_samples_per_second": 1059.088, "eval_steps_per_second": 4.182, "step": 2295 }, { "epoch": 45.1, "learning_rate": 0.00011274509803921568, "loss": 0.7796, "step": 2300 }, { "epoch": 45.29, "learning_rate": 0.00011323529411764705, "loss": 0.7796, "step": 2310 }, { "epoch": 45.49, "learning_rate": 0.00011372549019607842, "loss": 0.7776, "step": 2320 }, { "epoch": 45.69, "learning_rate": 0.00011421568627450978, "loss": 0.7792, "step": 2330 }, { "epoch": 45.88, "learning_rate": 0.00011470588235294115, "loss": 0.78, "step": 2340 }, { "epoch": 46.0, "eval_loss": 0.7749137878417969, "eval_runtime": 2.1918, "eval_samples_per_second": 1039.761, "eval_steps_per_second": 4.106, "step": 2346 }, { "epoch": 46.08, "learning_rate": 0.00011519607843137255, "loss": 0.7761, "step": 2350 }, { "epoch": 46.27, "learning_rate": 0.00011568627450980392, "loss": 0.7747, "step": 2360 }, { "epoch": 46.47, "learning_rate": 0.00011617647058823528, "loss": 0.772, "step": 2370 }, { "epoch": 46.67, "learning_rate": 0.00011666666666666665, "loss": 0.7748, "step": 2380 }, { "epoch": 46.86, "learning_rate": 0.00011715686274509803, "loss": 0.779, "step": 2390 }, { "epoch": 47.0, "eval_loss": 0.7824994325637817, "eval_runtime": 2.077, "eval_samples_per_second": 1097.254, "eval_steps_per_second": 4.333, "step": 2397 }, { "epoch": 47.06, "learning_rate": 0.0001176470588235294, "loss": 0.7811, "step": 2400 }, { "epoch": 47.25, "learning_rate": 0.00011813725490196077, "loss": 0.7817, "step": 2410 }, { "epoch": 47.45, "learning_rate": 0.00011862745098039214, "loss": 0.7805, "step": 2420 }, { "epoch": 47.65, "learning_rate": 0.0001191176470588235, "loss": 0.7783, "step": 2430 }, { "epoch": 47.84, "learning_rate": 0.0001196078431372549, "loss": 0.7762, "step": 2440 }, { "epoch": 48.0, "eval_loss": 0.7712346911430359, "eval_runtime": 2.1789, "eval_samples_per_second": 1045.947, "eval_steps_per_second": 4.131, "step": 2448 }, { "epoch": 48.04, "learning_rate": 0.00012009803921568627, "loss": 0.7758, "step": 2450 }, { "epoch": 48.24, "learning_rate": 0.00012058823529411764, "loss": 0.7734, "step": 2460 }, { "epoch": 48.43, "learning_rate": 0.00012107843137254902, "loss": 0.7697, "step": 2470 }, { "epoch": 48.63, "learning_rate": 0.00012156862745098039, "loss": 0.7663, "step": 2480 }, { "epoch": 48.82, "learning_rate": 0.00012205882352941175, "loss": 0.7676, "step": 2490 }, { "epoch": 49.0, "eval_loss": 0.7674837112426758, "eval_runtime": 2.1122, "eval_samples_per_second": 1078.947, "eval_steps_per_second": 4.261, "step": 2499 }, { "epoch": 49.02, "learning_rate": 0.00012254901960784314, "loss": 0.7657, "step": 2500 }, { "epoch": 49.22, "learning_rate": 0.0001230392156862745, "loss": 0.7671, "step": 2510 }, { "epoch": 49.41, "learning_rate": 0.00012352941176470587, "loss": 0.7691, "step": 2520 }, { "epoch": 49.61, "learning_rate": 0.00012401960784313724, "loss": 0.7623, "step": 2530 }, { "epoch": 49.8, "learning_rate": 0.00012450980392156863, "loss": 0.7683, "step": 2540 }, { "epoch": 50.0, "learning_rate": 0.000125, "loss": 0.7638, "step": 2550 }, { "epoch": 50.0, "eval_loss": 0.7645083069801331, "eval_runtime": 2.1712, "eval_samples_per_second": 1049.658, "eval_steps_per_second": 4.145, "step": 2550 }, { "epoch": 50.2, "learning_rate": 0.00012549019607843137, "loss": 0.7679, "step": 2560 }, { "epoch": 50.39, "learning_rate": 0.00012598039215686274, "loss": 0.7716, "step": 2570 }, { "epoch": 50.59, "learning_rate": 0.0001264705882352941, "loss": 0.772, "step": 2580 }, { "epoch": 50.78, "learning_rate": 0.00012696078431372547, "loss": 0.7757, "step": 2590 }, { "epoch": 50.98, "learning_rate": 0.00012745098039215684, "loss": 0.7826, "step": 2600 }, { "epoch": 51.0, "eval_loss": 0.7879320979118347, "eval_runtime": 2.139, "eval_samples_per_second": 1065.46, "eval_steps_per_second": 4.208, "step": 2601 }, { "epoch": 51.18, "learning_rate": 0.0001279411764705882, "loss": 0.7886, "step": 2610 }, { "epoch": 51.37, "learning_rate": 0.00012843137254901958, "loss": 0.7883, "step": 2620 }, { "epoch": 51.57, "learning_rate": 0.00012892156862745097, "loss": 0.7851, "step": 2630 }, { "epoch": 51.76, "learning_rate": 0.00012941176470588234, "loss": 0.7797, "step": 2640 }, { "epoch": 51.96, "learning_rate": 0.0001299019607843137, "loss": 0.7728, "step": 2650 }, { "epoch": 52.0, "eval_loss": 0.7729543447494507, "eval_runtime": 2.2366, "eval_samples_per_second": 1018.972, "eval_steps_per_second": 4.024, "step": 2652 }, { "epoch": 52.16, "learning_rate": 0.00013039215686274508, "loss": 0.7681, "step": 2660 }, { "epoch": 52.35, "learning_rate": 0.00013088235294117647, "loss": 0.7614, "step": 2670 }, { "epoch": 52.55, "learning_rate": 0.00013137254901960784, "loss": 0.7626, "step": 2680 }, { "epoch": 52.75, "learning_rate": 0.0001318627450980392, "loss": 0.7621, "step": 2690 }, { "epoch": 52.94, "learning_rate": 0.00013235294117647058, "loss": 0.7629, "step": 2700 }, { "epoch": 53.0, "eval_loss": 0.7606103420257568, "eval_runtime": 2.2103, "eval_samples_per_second": 1031.088, "eval_steps_per_second": 4.072, "step": 2703 }, { "epoch": 53.14, "learning_rate": 0.00013284313725490194, "loss": 0.7619, "step": 2710 }, { "epoch": 53.33, "learning_rate": 0.0001333333333333333, "loss": 0.7705, "step": 2720 }, { "epoch": 53.53, "learning_rate": 0.0001338235294117647, "loss": 0.7831, "step": 2730 }, { "epoch": 53.73, "learning_rate": 0.00013431372549019608, "loss": 0.7841, "step": 2740 }, { "epoch": 53.92, "learning_rate": 0.00013480392156862744, "loss": 0.7819, "step": 2750 }, { "epoch": 54.0, "eval_loss": 0.7718145847320557, "eval_runtime": 2.0561, "eval_samples_per_second": 1108.406, "eval_steps_per_second": 4.377, "step": 2754 }, { "epoch": 54.12, "learning_rate": 0.0001352941176470588, "loss": 0.7737, "step": 2760 }, { "epoch": 54.31, "learning_rate": 0.00013578431372549018, "loss": 0.7763, "step": 2770 }, { "epoch": 54.51, "learning_rate": 0.00013627450980392155, "loss": 0.7791, "step": 2780 }, { "epoch": 54.71, "learning_rate": 0.00013676470588235292, "loss": 0.7741, "step": 2790 }, { "epoch": 54.9, "learning_rate": 0.00013725490196078428, "loss": 0.7802, "step": 2800 }, { "epoch": 55.0, "eval_loss": 0.7808622121810913, "eval_runtime": 2.1507, "eval_samples_per_second": 1059.636, "eval_steps_per_second": 4.185, "step": 2805 }, { "epoch": 55.1, "learning_rate": 0.00013774509803921568, "loss": 0.7777, "step": 2810 }, { "epoch": 55.29, "learning_rate": 0.00013823529411764705, "loss": 0.781, "step": 2820 }, { "epoch": 55.49, "learning_rate": 0.00013872549019607841, "loss": 0.7691, "step": 2830 }, { "epoch": 55.69, "learning_rate": 0.0001392156862745098, "loss": 0.7677, "step": 2840 }, { "epoch": 55.88, "learning_rate": 0.00013970588235294118, "loss": 0.7632, "step": 2850 }, { "epoch": 56.0, "eval_loss": 0.7576876878738403, "eval_runtime": 2.1386, "eval_samples_per_second": 1065.672, "eval_steps_per_second": 4.208, "step": 2856 }, { "epoch": 56.08, "learning_rate": 0.00014019607843137255, "loss": 0.7594, "step": 2860 }, { "epoch": 56.27, "learning_rate": 0.00014068627450980391, "loss": 0.7557, "step": 2870 }, { "epoch": 56.47, "learning_rate": 0.00014117647058823528, "loss": 0.7603, "step": 2880 }, { "epoch": 56.67, "learning_rate": 0.00014166666666666665, "loss": 0.7581, "step": 2890 }, { "epoch": 56.86, "learning_rate": 0.00014215686274509802, "loss": 0.7567, "step": 2900 }, { "epoch": 57.0, "eval_loss": 0.7653807401657104, "eval_runtime": 2.1852, "eval_samples_per_second": 1042.923, "eval_steps_per_second": 4.119, "step": 2907 }, { "epoch": 57.06, "learning_rate": 0.00014264705882352939, "loss": 0.7603, "step": 2910 }, { "epoch": 57.25, "learning_rate": 0.00014313725490196078, "loss": 0.7607, "step": 2920 }, { "epoch": 57.45, "learning_rate": 0.00014362745098039215, "loss": 0.759, "step": 2930 }, { "epoch": 57.65, "learning_rate": 0.00014411764705882352, "loss": 0.7567, "step": 2940 }, { "epoch": 57.84, "learning_rate": 0.00014460784313725488, "loss": 0.7564, "step": 2950 }, { "epoch": 58.0, "eval_loss": 0.7573947906494141, "eval_runtime": 2.2323, "eval_samples_per_second": 1020.928, "eval_steps_per_second": 4.032, "step": 2958 }, { "epoch": 58.04, "learning_rate": 0.00014509803921568625, "loss": 0.7568, "step": 2960 }, { "epoch": 58.24, "learning_rate": 0.00014558823529411762, "loss": 0.7602, "step": 2970 }, { "epoch": 58.43, "learning_rate": 0.00014607843137254902, "loss": 0.7537, "step": 2980 }, { "epoch": 58.63, "learning_rate": 0.00014656862745098038, "loss": 0.752, "step": 2990 }, { "epoch": 58.82, "learning_rate": 0.00014705882352941175, "loss": 0.7535, "step": 3000 }, { "epoch": 59.0, "eval_loss": 0.755523145198822, "eval_runtime": 2.1203, "eval_samples_per_second": 1074.859, "eval_steps_per_second": 4.245, "step": 3009 }, { "epoch": 59.02, "learning_rate": 0.00014754901960784312, "loss": 0.7549, "step": 3010 }, { "epoch": 59.22, "learning_rate": 0.00014803921568627451, "loss": 0.7576, "step": 3020 }, { "epoch": 59.41, "learning_rate": 0.00014852941176470588, "loss": 0.7449, "step": 3030 }, { "epoch": 59.61, "learning_rate": 0.00014901960784313725, "loss": 0.75, "step": 3040 }, { "epoch": 59.8, "learning_rate": 0.00014950980392156862, "loss": 0.7489, "step": 3050 }, { "epoch": 60.0, "learning_rate": 0.00015, "loss": 0.75, "step": 3060 }, { "epoch": 60.0, "eval_loss": 0.7484251856803894, "eval_runtime": 2.2273, "eval_samples_per_second": 1023.23, "eval_steps_per_second": 4.041, "step": 3060 }, { "epoch": 60.2, "learning_rate": 0.00014999998905083632, "loss": 0.7529, "step": 3070 }, { "epoch": 60.39, "learning_rate": 0.00014999995620334851, "loss": 0.7475, "step": 3080 }, { "epoch": 60.59, "learning_rate": 0.00014999990145754617, "loss": 0.7475, "step": 3090 }, { "epoch": 60.78, "learning_rate": 0.00014999982481344522, "loss": 0.7475, "step": 3100 }, { "epoch": 60.98, "learning_rate": 0.0001499997262710681, "loss": 0.7512, "step": 3110 }, { "epoch": 61.0, "eval_loss": 0.7487233877182007, "eval_runtime": 2.2297, "eval_samples_per_second": 1022.1, "eval_steps_per_second": 4.036, "step": 3111 }, { "epoch": 61.18, "learning_rate": 0.0001499996058304436, "loss": 0.7474, "step": 3120 }, { "epoch": 61.37, "learning_rate": 0.0001499994634916068, "loss": 0.7481, "step": 3130 }, { "epoch": 61.57, "learning_rate": 0.00014999929925459934, "loss": 0.7491, "step": 3140 }, { "epoch": 61.76, "learning_rate": 0.00014999911311946914, "loss": 0.757, "step": 3150 }, { "epoch": 61.96, "learning_rate": 0.00014999890508627056, "loss": 0.7493, "step": 3160 }, { "epoch": 62.0, "eval_loss": 0.7462049722671509, "eval_runtime": 2.0885, "eval_samples_per_second": 1091.196, "eval_steps_per_second": 4.309, "step": 3162 }, { "epoch": 62.16, "learning_rate": 0.00014999867515506433, "loss": 0.7456, "step": 3170 }, { "epoch": 62.35, "learning_rate": 0.00014999842332591757, "loss": 0.747, "step": 3180 }, { "epoch": 62.55, "learning_rate": 0.00014999814959890383, "loss": 0.741, "step": 3190 }, { "epoch": 62.75, "learning_rate": 0.00014999785397410304, "loss": 0.7395, "step": 3200 }, { "epoch": 62.94, "learning_rate": 0.0001499975364516015, "loss": 0.742, "step": 3210 }, { "epoch": 63.0, "eval_loss": 0.7449880838394165, "eval_runtime": 2.2315, "eval_samples_per_second": 1021.27, "eval_steps_per_second": 4.033, "step": 3213 }, { "epoch": 63.14, "learning_rate": 0.00014999719703149192, "loss": 0.7464, "step": 3220 }, { "epoch": 63.33, "learning_rate": 0.0001499968357138734, "loss": 0.7437, "step": 3230 }, { "epoch": 63.53, "learning_rate": 0.00014999645249885146, "loss": 0.7401, "step": 3240 }, { "epoch": 63.73, "learning_rate": 0.00014999604738653798, "loss": 0.7441, "step": 3250 }, { "epoch": 63.92, "learning_rate": 0.0001499956203770512, "loss": 0.7469, "step": 3260 }, { "epoch": 64.0, "eval_loss": 0.7464487552642822, "eval_runtime": 2.1124, "eval_samples_per_second": 1078.89, "eval_steps_per_second": 4.261, "step": 3264 }, { "epoch": 64.12, "learning_rate": 0.00014999517147051586, "loss": 0.744, "step": 3270 }, { "epoch": 64.31, "learning_rate": 0.00014999470066706303, "loss": 0.7427, "step": 3280 }, { "epoch": 64.51, "learning_rate": 0.0001499942079668301, "loss": 0.7379, "step": 3290 }, { "epoch": 64.71, "learning_rate": 0.00014999369336996101, "loss": 0.7426, "step": 3300 }, { "epoch": 64.9, "learning_rate": 0.00014999315687660596, "loss": 0.7449, "step": 3310 }, { "epoch": 65.0, "eval_loss": 0.7393355369567871, "eval_runtime": 2.0866, "eval_samples_per_second": 1092.226, "eval_steps_per_second": 4.313, "step": 3315 }, { "epoch": 65.1, "learning_rate": 0.0001499925984869216, "loss": 0.7412, "step": 3320 }, { "epoch": 65.29, "learning_rate": 0.00014999201820107102, "loss": 0.7426, "step": 3330 }, { "epoch": 65.49, "learning_rate": 0.0001499914160192236, "loss": 0.7328, "step": 3340 }, { "epoch": 65.69, "learning_rate": 0.00014999079194155516, "loss": 0.7346, "step": 3350 }, { "epoch": 65.88, "learning_rate": 0.0001499901459682479, "loss": 0.7321, "step": 3360 }, { "epoch": 66.0, "eval_loss": 0.7425104975700378, "eval_runtime": 2.1171, "eval_samples_per_second": 1076.448, "eval_steps_per_second": 4.251, "step": 3366 }, { "epoch": 66.08, "learning_rate": 0.0001499894780994905, "loss": 0.7411, "step": 3370 }, { "epoch": 66.27, "learning_rate": 0.0001499887883354779, "loss": 0.7411, "step": 3380 }, { "epoch": 66.47, "learning_rate": 0.00014998807667641157, "loss": 0.7395, "step": 3390 }, { "epoch": 66.67, "learning_rate": 0.0001499873431224992, "loss": 0.7384, "step": 3400 }, { "epoch": 66.86, "learning_rate": 0.00014998658767395503, "loss": 0.7411, "step": 3410 }, { "epoch": 67.0, "eval_loss": 0.7390904426574707, "eval_runtime": 2.1435, "eval_samples_per_second": 1063.218, "eval_steps_per_second": 4.199, "step": 3417 }, { "epoch": 67.06, "learning_rate": 0.00014998581033099962, "loss": 0.7368, "step": 3420 }, { "epoch": 67.25, "learning_rate": 0.00014998501109385993, "loss": 0.7379, "step": 3430 }, { "epoch": 67.45, "learning_rate": 0.00014998418996276934, "loss": 0.739, "step": 3440 }, { "epoch": 67.65, "learning_rate": 0.00014998334693796758, "loss": 0.7388, "step": 3450 }, { "epoch": 67.84, "learning_rate": 0.00014998248201970082, "loss": 0.7394, "step": 3460 }, { "epoch": 68.0, "eval_loss": 0.7413014769554138, "eval_runtime": 2.1617, "eval_samples_per_second": 1054.246, "eval_steps_per_second": 4.163, "step": 3468 }, { "epoch": 68.04, "learning_rate": 0.00014998159520822156, "loss": 0.7424, "step": 3470 }, { "epoch": 68.24, "learning_rate": 0.00014998068650378876, "loss": 0.7376, "step": 3480 }, { "epoch": 68.43, "learning_rate": 0.00014997975590666775, "loss": 0.734, "step": 3490 }, { "epoch": 68.63, "learning_rate": 0.0001499788034171302, "loss": 0.7281, "step": 3500 }, { "epoch": 68.82, "learning_rate": 0.00014997782903545428, "loss": 0.7301, "step": 3510 }, { "epoch": 69.0, "eval_loss": 0.7344282865524292, "eval_runtime": 2.1969, "eval_samples_per_second": 1037.378, "eval_steps_per_second": 4.097, "step": 3519 }, { "epoch": 69.02, "learning_rate": 0.0001499768327619244, "loss": 0.7302, "step": 3520 }, { "epoch": 69.22, "learning_rate": 0.00014997581459683156, "loss": 0.7292, "step": 3530 }, { "epoch": 69.41, "learning_rate": 0.00014997477454047295, "loss": 0.7285, "step": 3540 }, { "epoch": 69.61, "learning_rate": 0.00014997371259315226, "loss": 0.7297, "step": 3550 }, { "epoch": 69.8, "learning_rate": 0.00014997262875517955, "loss": 0.7245, "step": 3560 }, { "epoch": 70.0, "learning_rate": 0.00014997152302687134, "loss": 0.7208, "step": 3570 }, { "epoch": 70.0, "eval_loss": 0.7255963683128357, "eval_runtime": 2.129, "eval_samples_per_second": 1070.465, "eval_steps_per_second": 4.227, "step": 3570 }, { "epoch": 70.2, "learning_rate": 0.00014997039540855041, "loss": 0.7311, "step": 3580 }, { "epoch": 70.39, "learning_rate": 0.00014996924590054603, "loss": 0.7263, "step": 3590 }, { "epoch": 70.59, "learning_rate": 0.00014996807450319381, "loss": 0.7231, "step": 3600 }, { "epoch": 70.78, "learning_rate": 0.00014996688121683582, "loss": 0.7232, "step": 3610 }, { "epoch": 70.98, "learning_rate": 0.00014996566604182042, "loss": 0.7211, "step": 3620 }, { "epoch": 71.0, "eval_loss": 0.7225197553634644, "eval_runtime": 2.2345, "eval_samples_per_second": 1019.927, "eval_steps_per_second": 4.028, "step": 3621 }, { "epoch": 71.18, "learning_rate": 0.00014996442897850245, "loss": 0.7261, "step": 3630 }, { "epoch": 71.37, "learning_rate": 0.00014996317002724305, "loss": 0.7247, "step": 3640 }, { "epoch": 71.57, "learning_rate": 0.00014996188918840986, "loss": 0.7322, "step": 3650 }, { "epoch": 71.76, "learning_rate": 0.00014996058646237682, "loss": 0.7333, "step": 3660 }, { "epoch": 71.96, "learning_rate": 0.00014995926184952434, "loss": 0.7273, "step": 3670 }, { "epoch": 72.0, "eval_loss": 0.726405918598175, "eval_runtime": 2.1479, "eval_samples_per_second": 1061.037, "eval_steps_per_second": 4.19, "step": 3672 }, { "epoch": 72.16, "learning_rate": 0.00014995791535023917, "loss": 0.7277, "step": 3680 }, { "epoch": 72.35, "learning_rate": 0.0001499565469649144, "loss": 0.7324, "step": 3690 }, { "epoch": 72.55, "learning_rate": 0.00014995515669394963, "loss": 0.7308, "step": 3700 }, { "epoch": 72.75, "learning_rate": 0.00014995374453775077, "loss": 0.7262, "step": 3710 }, { "epoch": 72.94, "learning_rate": 0.00014995231049673012, "loss": 0.7267, "step": 3720 }, { "epoch": 73.0, "eval_loss": 0.7220990061759949, "eval_runtime": 2.1293, "eval_samples_per_second": 1070.303, "eval_steps_per_second": 4.227, "step": 3723 }, { "epoch": 73.14, "learning_rate": 0.00014995085457130645, "loss": 0.7224, "step": 3730 }, { "epoch": 73.33, "learning_rate": 0.00014994937676190477, "loss": 0.7255, "step": 3740 }, { "epoch": 73.53, "learning_rate": 0.0001499478770689566, "loss": 0.7224, "step": 3750 }, { "epoch": 73.73, "learning_rate": 0.00014994635549289985, "loss": 0.7241, "step": 3760 }, { "epoch": 73.92, "learning_rate": 0.00014994481203417875, "loss": 0.7222, "step": 3770 }, { "epoch": 74.0, "eval_loss": 0.7255922555923462, "eval_runtime": 2.0896, "eval_samples_per_second": 1090.633, "eval_steps_per_second": 4.307, "step": 3774 }, { "epoch": 74.12, "learning_rate": 0.00014994324669324397, "loss": 0.7213, "step": 3780 }, { "epoch": 74.31, "learning_rate": 0.00014994165947055255, "loss": 0.7197, "step": 3790 }, { "epoch": 74.51, "learning_rate": 0.00014994005036656795, "loss": 0.7233, "step": 3800 }, { "epoch": 74.71, "learning_rate": 0.00014993841938175994, "loss": 0.7224, "step": 3810 }, { "epoch": 74.9, "learning_rate": 0.00014993676651660478, "loss": 0.7175, "step": 3820 }, { "epoch": 75.0, "eval_loss": 0.7201787829399109, "eval_runtime": 2.0903, "eval_samples_per_second": 1090.287, "eval_steps_per_second": 4.306, "step": 3825 }, { "epoch": 75.1, "learning_rate": 0.00014993509177158503, "loss": 0.7198, "step": 3830 }, { "epoch": 75.29, "learning_rate": 0.00014993339514718972, "loss": 0.7277, "step": 3840 }, { "epoch": 75.49, "learning_rate": 0.00014993167664391417, "loss": 0.7224, "step": 3850 }, { "epoch": 75.69, "learning_rate": 0.00014992993626226022, "loss": 0.721, "step": 3860 }, { "epoch": 75.88, "learning_rate": 0.00014992817400273593, "loss": 0.7174, "step": 3870 }, { "epoch": 76.0, "eval_loss": 0.714850127696991, "eval_runtime": 2.0699, "eval_samples_per_second": 1101.043, "eval_steps_per_second": 4.348, "step": 3876 }, { "epoch": 76.08, "learning_rate": 0.00014992638986585592, "loss": 0.717, "step": 3880 }, { "epoch": 76.27, "learning_rate": 0.0001499245838521411, "loss": 0.7163, "step": 3890 }, { "epoch": 76.47, "learning_rate": 0.00014992275596211878, "loss": 0.7175, "step": 3900 }, { "epoch": 76.67, "learning_rate": 0.00014992090619632265, "loss": 0.7098, "step": 3910 }, { "epoch": 76.86, "learning_rate": 0.00014991903455529278, "loss": 0.7143, "step": 3920 }, { "epoch": 77.0, "eval_loss": 0.7127418518066406, "eval_runtime": 2.101, "eval_samples_per_second": 1084.704, "eval_steps_per_second": 4.284, "step": 3927 }, { "epoch": 77.06, "learning_rate": 0.00014991714103957572, "loss": 0.7136, "step": 3930 }, { "epoch": 77.25, "learning_rate": 0.00014991522564972428, "loss": 0.7134, "step": 3940 }, { "epoch": 77.45, "learning_rate": 0.0001499132883862977, "loss": 0.713, "step": 3950 }, { "epoch": 77.65, "learning_rate": 0.00014991132924986164, "loss": 0.7119, "step": 3960 }, { "epoch": 77.84, "learning_rate": 0.00014990934824098815, "loss": 0.7106, "step": 3970 }, { "epoch": 78.0, "eval_loss": 0.7061274647712708, "eval_runtime": 2.0952, "eval_samples_per_second": 1087.724, "eval_steps_per_second": 4.296, "step": 3978 }, { "epoch": 78.04, "learning_rate": 0.0001499073453602556, "loss": 0.7079, "step": 3980 }, { "epoch": 78.24, "learning_rate": 0.00014990532060824878, "loss": 0.7104, "step": 3990 }, { "epoch": 78.43, "learning_rate": 0.00014990327398555894, "loss": 0.7088, "step": 4000 }, { "epoch": 78.63, "learning_rate": 0.00014990120549278357, "loss": 0.7098, "step": 4010 }, { "epoch": 78.82, "learning_rate": 0.00014989911513052666, "loss": 0.7188, "step": 4020 }, { "epoch": 79.0, "eval_loss": 0.7153338193893433, "eval_runtime": 2.0939, "eval_samples_per_second": 1088.393, "eval_steps_per_second": 4.298, "step": 4029 }, { "epoch": 79.02, "learning_rate": 0.00014989700289939854, "loss": 0.7183, "step": 4030 }, { "epoch": 79.22, "learning_rate": 0.00014989486880001595, "loss": 0.7099, "step": 4040 }, { "epoch": 79.41, "learning_rate": 0.00014989271283300198, "loss": 0.7068, "step": 4050 }, { "epoch": 79.61, "learning_rate": 0.00014989053499898613, "loss": 0.707, "step": 4060 }, { "epoch": 79.8, "learning_rate": 0.0001498883352986043, "loss": 0.7039, "step": 4070 }, { "epoch": 80.0, "learning_rate": 0.0001498861137324987, "loss": 0.7103, "step": 4080 }, { "epoch": 80.0, "eval_loss": 0.7086203694343567, "eval_runtime": 2.1491, "eval_samples_per_second": 1060.423, "eval_steps_per_second": 4.188, "step": 4080 }, { "epoch": 80.2, "learning_rate": 0.00014988387030131803, "loss": 0.7066, "step": 4090 }, { "epoch": 80.39, "learning_rate": 0.0001498816050057173, "loss": 0.705, "step": 4100 }, { "epoch": 80.59, "learning_rate": 0.00014987931784635796, "loss": 0.7077, "step": 4110 }, { "epoch": 80.78, "learning_rate": 0.00014987700882390775, "loss": 0.7073, "step": 4120 }, { "epoch": 80.98, "learning_rate": 0.0001498746779390409, "loss": 0.7055, "step": 4130 }, { "epoch": 81.0, "eval_loss": 0.709750235080719, "eval_runtime": 2.1143, "eval_samples_per_second": 1077.874, "eval_steps_per_second": 4.257, "step": 4131 }, { "epoch": 81.18, "learning_rate": 0.00014987232519243792, "loss": 0.7106, "step": 4140 }, { "epoch": 81.37, "learning_rate": 0.00014986995058478584, "loss": 0.7064, "step": 4150 }, { "epoch": 81.57, "learning_rate": 0.00014986755411677794, "loss": 0.7061, "step": 4160 }, { "epoch": 81.76, "learning_rate": 0.00014986513578911395, "loss": 0.7081, "step": 4170 }, { "epoch": 81.96, "learning_rate": 0.00014986269560249995, "loss": 0.7026, "step": 4180 }, { "epoch": 82.0, "eval_loss": 0.7075186967849731, "eval_runtime": 2.1061, "eval_samples_per_second": 1082.105, "eval_steps_per_second": 4.273, "step": 4182 }, { "epoch": 82.16, "learning_rate": 0.00014986023355764846, "loss": 0.7069, "step": 4190 }, { "epoch": 82.35, "learning_rate": 0.0001498577496552783, "loss": 0.7038, "step": 4200 }, { "epoch": 82.55, "learning_rate": 0.0001498552438961147, "loss": 0.7068, "step": 4210 }, { "epoch": 82.75, "learning_rate": 0.00014985271628088938, "loss": 0.7073, "step": 4220 }, { "epoch": 82.94, "learning_rate": 0.00014985016681034024, "loss": 0.7191, "step": 4230 }, { "epoch": 83.0, "eval_loss": 0.7127190828323364, "eval_runtime": 2.1082, "eval_samples_per_second": 1081.004, "eval_steps_per_second": 4.269, "step": 4233 }, { "epoch": 83.14, "learning_rate": 0.00014984759548521172, "loss": 0.7117, "step": 4240 }, { "epoch": 83.33, "learning_rate": 0.0001498450023062546, "loss": 0.7083, "step": 4250 }, { "epoch": 83.53, "learning_rate": 0.00014984238727422597, "loss": 0.7064, "step": 4260 }, { "epoch": 83.73, "learning_rate": 0.00014983975038988944, "loss": 0.7066, "step": 4270 }, { "epoch": 83.92, "learning_rate": 0.00014983709165401489, "loss": 0.7027, "step": 4280 }, { "epoch": 84.0, "eval_loss": 0.7171905040740967, "eval_runtime": 2.2202, "eval_samples_per_second": 1026.505, "eval_steps_per_second": 4.054, "step": 4284 }, { "epoch": 84.12, "learning_rate": 0.00014983441106737857, "loss": 0.7064, "step": 4290 }, { "epoch": 84.31, "learning_rate": 0.0001498317086307632, "loss": 0.7106, "step": 4300 }, { "epoch": 84.51, "learning_rate": 0.00014982898434495783, "loss": 0.7057, "step": 4310 }, { "epoch": 84.71, "learning_rate": 0.0001498262382107579, "loss": 0.7022, "step": 4320 }, { "epoch": 84.9, "learning_rate": 0.00014982347022896516, "loss": 0.6981, "step": 4330 }, { "epoch": 85.0, "eval_loss": 0.7069874405860901, "eval_runtime": 2.0965, "eval_samples_per_second": 1087.073, "eval_steps_per_second": 4.293, "step": 4335 }, { "epoch": 85.1, "learning_rate": 0.00014982068040038786, "loss": 0.7042, "step": 4340 }, { "epoch": 85.29, "learning_rate": 0.00014981786872584056, "loss": 0.7022, "step": 4350 }, { "epoch": 85.49, "learning_rate": 0.00014981503520614415, "loss": 0.6987, "step": 4360 }, { "epoch": 85.69, "learning_rate": 0.000149812179842126, "loss": 0.699, "step": 4370 }, { "epoch": 85.88, "learning_rate": 0.00014980930263461985, "loss": 0.7064, "step": 4380 }, { "epoch": 86.0, "eval_loss": 0.7029294371604919, "eval_runtime": 2.1186, "eval_samples_per_second": 1075.695, "eval_steps_per_second": 4.248, "step": 4386 }, { "epoch": 86.08, "learning_rate": 0.00014980640358446573, "loss": 0.6995, "step": 4390 }, { "epoch": 86.27, "learning_rate": 0.0001498034826925101, "loss": 0.7056, "step": 4400 }, { "epoch": 86.47, "learning_rate": 0.0001498005399596058, "loss": 0.7014, "step": 4410 }, { "epoch": 86.67, "learning_rate": 0.00014979757538661204, "loss": 0.6983, "step": 4420 }, { "epoch": 86.86, "learning_rate": 0.00014979458897439442, "loss": 0.6943, "step": 4430 }, { "epoch": 87.0, "eval_loss": 0.7045770883560181, "eval_runtime": 2.1001, "eval_samples_per_second": 1085.211, "eval_steps_per_second": 4.286, "step": 4437 }, { "epoch": 87.06, "learning_rate": 0.0001497915807238249, "loss": 0.6975, "step": 4440 }, { "epoch": 87.25, "learning_rate": 0.00014978855063578183, "loss": 0.7033, "step": 4450 }, { "epoch": 87.45, "learning_rate": 0.00014978549871114992, "loss": 0.6967, "step": 4460 }, { "epoch": 87.65, "learning_rate": 0.00014978242495082024, "loss": 0.7053, "step": 4470 }, { "epoch": 87.84, "learning_rate": 0.00014977932935569032, "loss": 0.7025, "step": 4480 }, { "epoch": 88.0, "eval_loss": 0.7035739421844482, "eval_runtime": 2.0997, "eval_samples_per_second": 1085.417, "eval_steps_per_second": 4.286, "step": 4488 }, { "epoch": 88.04, "learning_rate": 0.00014977621192666395, "loss": 0.7019, "step": 4490 }, { "epoch": 88.24, "learning_rate": 0.00014977307266465139, "loss": 0.7036, "step": 4500 }, { "epoch": 88.43, "learning_rate": 0.00014976991157056917, "loss": 0.7015, "step": 4510 }, { "epoch": 88.63, "learning_rate": 0.00014976672864534034, "loss": 0.6985, "step": 4520 }, { "epoch": 88.82, "learning_rate": 0.0001497635238898942, "loss": 0.6959, "step": 4530 }, { "epoch": 89.0, "eval_loss": 0.7093929648399353, "eval_runtime": 2.2076, "eval_samples_per_second": 1032.357, "eval_steps_per_second": 4.077, "step": 4539 }, { "epoch": 89.02, "learning_rate": 0.00014976029730516646, "loss": 0.7044, "step": 4540 }, { "epoch": 89.22, "learning_rate": 0.00014975704889209923, "loss": 0.7057, "step": 4550 }, { "epoch": 89.41, "learning_rate": 0.00014975377865164097, "loss": 0.7022, "step": 4560 }, { "epoch": 89.61, "learning_rate": 0.00014975048658474652, "loss": 0.695, "step": 4570 }, { "epoch": 89.8, "learning_rate": 0.00014974717269237707, "loss": 0.6957, "step": 4580 }, { "epoch": 90.0, "learning_rate": 0.00014974383697550022, "loss": 0.6988, "step": 4590 }, { "epoch": 90.0, "eval_loss": 0.691733717918396, "eval_runtime": 2.1923, "eval_samples_per_second": 1039.543, "eval_steps_per_second": 4.105, "step": 4590 }, { "epoch": 90.2, "learning_rate": 0.00014974047943508995, "loss": 0.6922, "step": 4600 }, { "epoch": 90.39, "learning_rate": 0.00014973710007212652, "loss": 0.6949, "step": 4610 }, { "epoch": 90.59, "learning_rate": 0.00014973369888759668, "loss": 0.693, "step": 4620 }, { "epoch": 90.78, "learning_rate": 0.0001497302758824935, "loss": 0.6976, "step": 4630 }, { "epoch": 90.98, "learning_rate": 0.00014972683105781638, "loss": 0.6912, "step": 4640 }, { "epoch": 91.0, "eval_loss": 0.6925643682479858, "eval_runtime": 2.18, "eval_samples_per_second": 1045.407, "eval_steps_per_second": 4.128, "step": 4641 }, { "epoch": 91.18, "learning_rate": 0.00014972336441457118, "loss": 0.6917, "step": 4650 }, { "epoch": 91.37, "learning_rate": 0.00014971987595377008, "loss": 0.6871, "step": 4660 }, { "epoch": 91.57, "learning_rate": 0.00014971636567643161, "loss": 0.6927, "step": 4670 }, { "epoch": 91.76, "learning_rate": 0.0001497128335835807, "loss": 0.6945, "step": 4680 }, { "epoch": 91.96, "learning_rate": 0.00014970927967624864, "loss": 0.689, "step": 4690 }, { "epoch": 92.0, "eval_loss": 0.6880647540092468, "eval_runtime": 2.0762, "eval_samples_per_second": 1097.663, "eval_steps_per_second": 4.335, "step": 4692 }, { "epoch": 92.16, "learning_rate": 0.00014970570395547307, "loss": 0.6964, "step": 4700 }, { "epoch": 92.35, "learning_rate": 0.00014970210642229807, "loss": 0.6949, "step": 4710 }, { "epoch": 92.55, "learning_rate": 0.000149698487077774, "loss": 0.69, "step": 4720 }, { "epoch": 92.75, "learning_rate": 0.00014969484592295765, "loss": 0.6878, "step": 4730 }, { "epoch": 92.94, "learning_rate": 0.00014969118295891215, "loss": 0.687, "step": 4740 }, { "epoch": 93.0, "eval_loss": 0.6865754127502441, "eval_runtime": 2.2287, "eval_samples_per_second": 1022.574, "eval_steps_per_second": 4.038, "step": 4743 }, { "epoch": 93.14, "learning_rate": 0.000149687498186707, "loss": 0.689, "step": 4750 }, { "epoch": 93.33, "learning_rate": 0.00014968379160741805, "loss": 0.6867, "step": 4760 }, { "epoch": 93.53, "learning_rate": 0.00014968006322212758, "loss": 0.6896, "step": 4770 }, { "epoch": 93.73, "learning_rate": 0.00014967631303192417, "loss": 0.6887, "step": 4780 }, { "epoch": 93.92, "learning_rate": 0.0001496725410379028, "loss": 0.6867, "step": 4790 }, { "epoch": 94.0, "eval_loss": 0.6873466372489929, "eval_runtime": 2.114, "eval_samples_per_second": 1078.052, "eval_steps_per_second": 4.257, "step": 4794 }, { "epoch": 94.12, "learning_rate": 0.0001496687472411648, "loss": 0.6861, "step": 4800 }, { "epoch": 94.31, "learning_rate": 0.0001496649316428179, "loss": 0.6911, "step": 4810 }, { "epoch": 94.51, "learning_rate": 0.00014966109424397614, "loss": 0.6865, "step": 4820 }, { "epoch": 94.71, "learning_rate": 0.00014965723504575996, "loss": 0.6789, "step": 4830 }, { "epoch": 94.9, "learning_rate": 0.00014965335404929617, "loss": 0.6832, "step": 4840 }, { "epoch": 95.0, "eval_loss": 0.6820151805877686, "eval_runtime": 2.1799, "eval_samples_per_second": 1045.479, "eval_steps_per_second": 4.129, "step": 4845 }, { "epoch": 95.1, "learning_rate": 0.00014964945125571792, "loss": 0.6821, "step": 4850 }, { "epoch": 95.29, "learning_rate": 0.00014964552666616476, "loss": 0.6875, "step": 4860 }, { "epoch": 95.49, "learning_rate": 0.00014964158028178256, "loss": 0.7012, "step": 4870 }, { "epoch": 95.69, "learning_rate": 0.00014963761210372357, "loss": 0.6934, "step": 4880 }, { "epoch": 95.88, "learning_rate": 0.00014963362213314647, "loss": 0.6863, "step": 4890 }, { "epoch": 96.0, "eval_loss": 0.6808879375457764, "eval_runtime": 2.1682, "eval_samples_per_second": 1051.107, "eval_steps_per_second": 4.151, "step": 4896 }, { "epoch": 96.08, "learning_rate": 0.00014962961037121616, "loss": 0.6835, "step": 4900 }, { "epoch": 96.27, "learning_rate": 0.00014962557681910406, "loss": 0.6891, "step": 4910 }, { "epoch": 96.47, "learning_rate": 0.00014962152147798778, "loss": 0.6867, "step": 4920 }, { "epoch": 96.67, "learning_rate": 0.0001496174443490515, "loss": 0.678, "step": 4930 }, { "epoch": 96.86, "learning_rate": 0.0001496133454334856, "loss": 0.6908, "step": 4940 }, { "epoch": 97.0, "eval_loss": 0.6791673898696899, "eval_runtime": 2.094, "eval_samples_per_second": 1088.337, "eval_steps_per_second": 4.298, "step": 4947 }, { "epoch": 97.06, "learning_rate": 0.0001496092247324869, "loss": 0.6815, "step": 4950 }, { "epoch": 97.25, "learning_rate": 0.00014960508224725846, "loss": 0.6837, "step": 4960 }, { "epoch": 97.45, "learning_rate": 0.00014960091797900987, "loss": 0.6784, "step": 4970 }, { "epoch": 97.65, "learning_rate": 0.00014959673192895703, "loss": 0.6837, "step": 4980 }, { "epoch": 97.84, "learning_rate": 0.0001495925240983221, "loss": 0.6891, "step": 4990 }, { "epoch": 98.0, "eval_loss": 0.67962646484375, "eval_runtime": 2.1402, "eval_samples_per_second": 1064.861, "eval_steps_per_second": 4.205, "step": 4998 }, { "epoch": 98.04, "learning_rate": 0.0001495882944883337, "loss": 0.6784, "step": 5000 }, { "epoch": 98.24, "learning_rate": 0.00014958404310022683, "loss": 0.68, "step": 5010 }, { "epoch": 98.43, "learning_rate": 0.00014957976993524276, "loss": 0.6756, "step": 5020 }, { "epoch": 98.63, "learning_rate": 0.00014957547499462912, "loss": 0.6841, "step": 5030 }, { "epoch": 98.82, "learning_rate": 0.00014957115827964, "loss": 0.6803, "step": 5040 }, { "epoch": 99.0, "eval_loss": 0.6792941689491272, "eval_runtime": 2.1861, "eval_samples_per_second": 1042.487, "eval_steps_per_second": 4.117, "step": 5049 }, { "epoch": 99.02, "learning_rate": 0.00014956681979153577, "loss": 0.682, "step": 5050 }, { "epoch": 99.22, "learning_rate": 0.00014956245953158314, "loss": 0.6795, "step": 5060 }, { "epoch": 99.41, "learning_rate": 0.00014955807750105524, "loss": 0.6753, "step": 5070 }, { "epoch": 99.61, "learning_rate": 0.00014955367370123152, "loss": 0.6752, "step": 5080 }, { "epoch": 99.8, "learning_rate": 0.00014954924813339777, "loss": 0.676, "step": 5090 }, { "epoch": 100.0, "learning_rate": 0.0001495448007988462, "loss": 0.6755, "step": 5100 }, { "epoch": 100.0, "eval_loss": 0.6737886071205139, "eval_runtime": 2.1396, "eval_samples_per_second": 1065.176, "eval_steps_per_second": 4.206, "step": 5100 }, { "epoch": 100.2, "learning_rate": 0.0001495403316988753, "loss": 0.6741, "step": 5110 }, { "epoch": 100.39, "learning_rate": 0.00014953584083478993, "loss": 0.6756, "step": 5120 }, { "epoch": 100.59, "learning_rate": 0.0001495313282079014, "loss": 0.6759, "step": 5130 }, { "epoch": 100.78, "learning_rate": 0.00014952679381952718, "loss": 0.6725, "step": 5140 }, { "epoch": 100.98, "learning_rate": 0.0001495222376709913, "loss": 0.6735, "step": 5150 }, { "epoch": 101.0, "eval_loss": 0.6750496029853821, "eval_runtime": 2.2168, "eval_samples_per_second": 1028.06, "eval_steps_per_second": 4.06, "step": 5151 }, { "epoch": 101.18, "learning_rate": 0.00014951765976362405, "loss": 0.6753, "step": 5160 }, { "epoch": 101.37, "learning_rate": 0.00014951306009876203, "loss": 0.6732, "step": 5170 }, { "epoch": 101.57, "learning_rate": 0.00014950843867774828, "loss": 0.6717, "step": 5180 }, { "epoch": 101.76, "learning_rate": 0.00014950379550193212, "loss": 0.6704, "step": 5190 }, { "epoch": 101.96, "learning_rate": 0.00014949913057266928, "loss": 0.6727, "step": 5200 }, { "epoch": 102.0, "eval_loss": 0.672935962677002, "eval_runtime": 2.1734, "eval_samples_per_second": 1048.602, "eval_steps_per_second": 4.141, "step": 5202 }, { "epoch": 102.16, "learning_rate": 0.0001494944438913218, "loss": 0.6736, "step": 5210 }, { "epoch": 102.35, "learning_rate": 0.00014948973545925807, "loss": 0.6732, "step": 5220 }, { "epoch": 102.55, "learning_rate": 0.0001494850052778529, "loss": 0.6703, "step": 5230 }, { "epoch": 102.75, "learning_rate": 0.00014948025334848736, "loss": 0.6721, "step": 5240 }, { "epoch": 102.94, "learning_rate": 0.0001494754796725489, "loss": 0.6695, "step": 5250 }, { "epoch": 103.0, "eval_loss": 0.6733797192573547, "eval_runtime": 2.1812, "eval_samples_per_second": 1044.839, "eval_steps_per_second": 4.126, "step": 5253 }, { "epoch": 103.14, "learning_rate": 0.00014947068425143136, "loss": 0.6728, "step": 5260 }, { "epoch": 103.33, "learning_rate": 0.00014946586708653486, "loss": 0.6684, "step": 5270 }, { "epoch": 103.53, "learning_rate": 0.0001494610281792659, "loss": 0.6702, "step": 5280 }, { "epoch": 103.73, "learning_rate": 0.0001494561675310374, "loss": 0.6705, "step": 5290 }, { "epoch": 103.92, "learning_rate": 0.0001494512851432685, "loss": 0.6678, "step": 5300 }, { "epoch": 104.0, "eval_loss": 0.6701691746711731, "eval_runtime": 2.1386, "eval_samples_per_second": 1065.675, "eval_steps_per_second": 4.208, "step": 5304 }, { "epoch": 104.12, "learning_rate": 0.00014944638101738474, "loss": 0.6684, "step": 5310 }, { "epoch": 104.31, "learning_rate": 0.00014944145515481805, "loss": 0.6648, "step": 5320 }, { "epoch": 104.51, "learning_rate": 0.00014943650755700667, "loss": 0.6702, "step": 5330 }, { "epoch": 104.71, "learning_rate": 0.0001494315382253952, "loss": 0.671, "step": 5340 }, { "epoch": 104.9, "learning_rate": 0.0001494265471614345, "loss": 0.671, "step": 5350 }, { "epoch": 105.0, "eval_loss": 0.6720392107963562, "eval_runtime": 2.1432, "eval_samples_per_second": 1063.36, "eval_steps_per_second": 4.199, "step": 5355 }, { "epoch": 105.1, "learning_rate": 0.0001494215343665819, "loss": 0.6686, "step": 5360 }, { "epoch": 105.29, "learning_rate": 0.00014941649984230107, "loss": 0.6669, "step": 5370 }, { "epoch": 105.49, "learning_rate": 0.00014941144359006194, "loss": 0.6654, "step": 5380 }, { "epoch": 105.69, "learning_rate": 0.00014940636561134078, "loss": 0.6645, "step": 5390 }, { "epoch": 105.88, "learning_rate": 0.0001494012659076203, "loss": 0.6654, "step": 5400 }, { "epoch": 106.0, "eval_loss": 0.6686482429504395, "eval_runtime": 2.244, "eval_samples_per_second": 1015.59, "eval_steps_per_second": 4.011, "step": 5406 }, { "epoch": 106.08, "learning_rate": 0.00014939614448038948, "loss": 0.6674, "step": 5410 }, { "epoch": 106.27, "learning_rate": 0.00014939100133114368, "loss": 0.6675, "step": 5420 }, { "epoch": 106.47, "learning_rate": 0.00014938583646138457, "loss": 0.6667, "step": 5430 }, { "epoch": 106.67, "learning_rate": 0.00014938064987262016, "loss": 0.6673, "step": 5440 }, { "epoch": 106.86, "learning_rate": 0.00014937544156636484, "loss": 0.669, "step": 5450 }, { "epoch": 107.0, "eval_loss": 0.6682608723640442, "eval_runtime": 2.2531, "eval_samples_per_second": 1011.487, "eval_steps_per_second": 3.994, "step": 5457 }, { "epoch": 107.06, "learning_rate": 0.00014937021154413932, "loss": 0.6636, "step": 5460 }, { "epoch": 107.25, "learning_rate": 0.00014936495980747065, "loss": 0.6646, "step": 5470 }, { "epoch": 107.45, "learning_rate": 0.00014935968635789224, "loss": 0.6677, "step": 5480 }, { "epoch": 107.65, "learning_rate": 0.00014935439119694377, "loss": 0.6651, "step": 5490 }, { "epoch": 107.84, "learning_rate": 0.00014934907432617134, "loss": 0.6628, "step": 5500 }, { "epoch": 108.0, "eval_loss": 0.6639227867126465, "eval_runtime": 2.2629, "eval_samples_per_second": 1007.134, "eval_steps_per_second": 3.977, "step": 5508 }, { "epoch": 108.04, "learning_rate": 0.00014934373574712734, "loss": 0.6611, "step": 5510 }, { "epoch": 108.24, "learning_rate": 0.00014933837546137054, "loss": 0.6629, "step": 5520 }, { "epoch": 108.43, "learning_rate": 0.000149332993470466, "loss": 0.6639, "step": 5530 }, { "epoch": 108.63, "learning_rate": 0.00014932758977598514, "loss": 0.6703, "step": 5540 }, { "epoch": 108.82, "learning_rate": 0.0001493221643795058, "loss": 0.6655, "step": 5550 }, { "epoch": 109.0, "eval_loss": 0.6662523150444031, "eval_runtime": 2.2626, "eval_samples_per_second": 1007.256, "eval_steps_per_second": 3.978, "step": 5559 }, { "epoch": 109.02, "learning_rate": 0.00014931671728261195, "loss": 0.6633, "step": 5560 }, { "epoch": 109.22, "learning_rate": 0.00014931124848689407, "loss": 0.6672, "step": 5570 }, { "epoch": 109.41, "learning_rate": 0.00014930575799394893, "loss": 0.6585, "step": 5580 }, { "epoch": 109.61, "learning_rate": 0.00014930024580537968, "loss": 0.6637, "step": 5590 }, { "epoch": 109.8, "learning_rate": 0.00014929471192279568, "loss": 0.6653, "step": 5600 }, { "epoch": 110.0, "learning_rate": 0.00014928915634781272, "loss": 0.6637, "step": 5610 }, { "epoch": 110.0, "eval_loss": 0.6650734543800354, "eval_runtime": 2.1716, "eval_samples_per_second": 1049.462, "eval_steps_per_second": 4.144, "step": 5610 }, { "epoch": 110.2, "learning_rate": 0.00014928357908205295, "loss": 0.6677, "step": 5620 }, { "epoch": 110.39, "learning_rate": 0.00014927798012714477, "loss": 0.665, "step": 5630 }, { "epoch": 110.59, "learning_rate": 0.00014927235948472293, "loss": 0.6645, "step": 5640 }, { "epoch": 110.78, "learning_rate": 0.00014926671715642854, "loss": 0.6612, "step": 5650 }, { "epoch": 110.98, "learning_rate": 0.0001492610531439091, "loss": 0.6643, "step": 5660 }, { "epoch": 111.0, "eval_loss": 0.6638761162757874, "eval_runtime": 2.1014, "eval_samples_per_second": 1084.505, "eval_steps_per_second": 4.283, "step": 5661 }, { "epoch": 111.18, "learning_rate": 0.00014925536744881827, "loss": 0.6655, "step": 5670 }, { "epoch": 111.37, "learning_rate": 0.00014924966007281624, "loss": 0.6648, "step": 5680 }, { "epoch": 111.57, "learning_rate": 0.00014924393101756938, "loss": 0.662, "step": 5690 }, { "epoch": 111.76, "learning_rate": 0.00014923818028475045, "loss": 0.6596, "step": 5700 }, { "epoch": 111.96, "learning_rate": 0.0001492324078760386, "loss": 0.6607, "step": 5710 }, { "epoch": 112.0, "eval_loss": 0.656067967414856, "eval_runtime": 2.1149, "eval_samples_per_second": 1077.6, "eval_steps_per_second": 4.256, "step": 5712 }, { "epoch": 112.16, "learning_rate": 0.00014922661379311914, "loss": 0.6633, "step": 5720 }, { "epoch": 112.35, "learning_rate": 0.00014922079803768388, "loss": 0.6635, "step": 5730 }, { "epoch": 112.55, "learning_rate": 0.0001492149606114309, "loss": 0.6634, "step": 5740 }, { "epoch": 112.75, "learning_rate": 0.00014920910151606454, "loss": 0.6635, "step": 5750 }, { "epoch": 112.94, "learning_rate": 0.00014920322075329557, "loss": 0.6598, "step": 5760 }, { "epoch": 113.0, "eval_loss": 0.6590579748153687, "eval_runtime": 2.1383, "eval_samples_per_second": 1065.808, "eval_steps_per_second": 4.209, "step": 5763 }, { "epoch": 113.14, "learning_rate": 0.00014919731832484104, "loss": 0.6608, "step": 5770 }, { "epoch": 113.33, "learning_rate": 0.0001491913942324243, "loss": 0.6601, "step": 5780 }, { "epoch": 113.53, "learning_rate": 0.00014918544847777513, "loss": 0.6647, "step": 5790 }, { "epoch": 113.73, "learning_rate": 0.00014917948106262947, "loss": 0.6605, "step": 5800 }, { "epoch": 113.92, "learning_rate": 0.00014917349198872968, "loss": 0.6589, "step": 5810 }, { "epoch": 114.0, "eval_loss": 0.6609504818916321, "eval_runtime": 2.1322, "eval_samples_per_second": 1068.831, "eval_steps_per_second": 4.221, "step": 5814 }, { "epoch": 114.12, "learning_rate": 0.0001491674812578245, "loss": 0.6534, "step": 5820 }, { "epoch": 114.31, "learning_rate": 0.00014916144887166884, "loss": 0.6593, "step": 5830 }, { "epoch": 114.51, "learning_rate": 0.0001491553948320241, "loss": 0.6607, "step": 5840 }, { "epoch": 114.71, "learning_rate": 0.0001491493191406579, "loss": 0.6595, "step": 5850 }, { "epoch": 114.9, "learning_rate": 0.00014914322179934418, "loss": 0.6566, "step": 5860 }, { "epoch": 115.0, "eval_loss": 0.6566076874732971, "eval_runtime": 2.1529, "eval_samples_per_second": 1058.561, "eval_steps_per_second": 4.18, "step": 5865 }, { "epoch": 115.1, "learning_rate": 0.00014913710280986328, "loss": 0.6598, "step": 5870 }, { "epoch": 115.29, "learning_rate": 0.00014913096217400175, "loss": 0.6645, "step": 5880 }, { "epoch": 115.49, "learning_rate": 0.00014912479989355254, "loss": 0.6665, "step": 5890 }, { "epoch": 115.69, "learning_rate": 0.00014911861597031493, "loss": 0.6694, "step": 5900 }, { "epoch": 115.88, "learning_rate": 0.00014911241040609444, "loss": 0.6706, "step": 5910 }, { "epoch": 116.0, "eval_loss": 0.6748928427696228, "eval_runtime": 2.1111, "eval_samples_per_second": 1079.51, "eval_steps_per_second": 4.263, "step": 5916 }, { "epoch": 116.08, "learning_rate": 0.000149106183202703, "loss": 0.6744, "step": 5920 }, { "epoch": 116.27, "learning_rate": 0.0001490999343619588, "loss": 0.665, "step": 5930 }, { "epoch": 116.47, "learning_rate": 0.0001490936638856863, "loss": 0.6654, "step": 5940 }, { "epoch": 116.67, "learning_rate": 0.00014908737177571644, "loss": 0.6701, "step": 5950 }, { "epoch": 116.86, "learning_rate": 0.00014908105803388634, "loss": 0.6688, "step": 5960 }, { "epoch": 117.0, "eval_loss": 0.667015016078949, "eval_runtime": 2.2347, "eval_samples_per_second": 1019.841, "eval_steps_per_second": 4.027, "step": 5967 }, { "epoch": 117.06, "learning_rate": 0.00014907472266203944, "loss": 0.6653, "step": 5970 }, { "epoch": 117.25, "learning_rate": 0.00014906836566202554, "loss": 0.6631, "step": 5980 }, { "epoch": 117.45, "learning_rate": 0.00014906198703570076, "loss": 0.6656, "step": 5990 }, { "epoch": 117.65, "learning_rate": 0.0001490555867849275, "loss": 0.6625, "step": 6000 }, { "epoch": 117.84, "learning_rate": 0.00014904916491157452, "loss": 0.6657, "step": 6010 }, { "epoch": 118.0, "eval_loss": 0.6599269509315491, "eval_runtime": 2.1838, "eval_samples_per_second": 1043.574, "eval_steps_per_second": 4.121, "step": 6018 }, { "epoch": 118.04, "learning_rate": 0.00014904272141751684, "loss": 0.661, "step": 6020 }, { "epoch": 118.24, "learning_rate": 0.00014903625630463581, "loss": 0.658, "step": 6030 }, { "epoch": 118.43, "learning_rate": 0.0001490297695748191, "loss": 0.6598, "step": 6040 }, { "epoch": 118.63, "learning_rate": 0.0001490232612299607, "loss": 0.6664, "step": 6050 }, { "epoch": 118.82, "learning_rate": 0.00014901673127196092, "loss": 0.6611, "step": 6060 }, { "epoch": 119.0, "eval_loss": 0.6566824913024902, "eval_runtime": 2.1748, "eval_samples_per_second": 1047.897, "eval_steps_per_second": 4.138, "step": 6069 }, { "epoch": 119.02, "learning_rate": 0.00014901017970272634, "loss": 0.6568, "step": 6070 }, { "epoch": 119.22, "learning_rate": 0.00014900360652416987, "loss": 0.6577, "step": 6080 }, { "epoch": 119.41, "learning_rate": 0.00014899701173821071, "loss": 0.6559, "step": 6090 }, { "epoch": 119.61, "learning_rate": 0.00014899039534677444, "loss": 0.6552, "step": 6100 }, { "epoch": 119.8, "learning_rate": 0.00014898375735179287, "loss": 0.6548, "step": 6110 }, { "epoch": 120.0, "learning_rate": 0.00014897709775520417, "loss": 0.6528, "step": 6120 }, { "epoch": 120.0, "eval_loss": 0.6591194868087769, "eval_runtime": 2.2152, "eval_samples_per_second": 1028.818, "eval_steps_per_second": 4.063, "step": 6120 }, { "epoch": 120.2, "learning_rate": 0.00014897041655895276, "loss": 0.6594, "step": 6130 }, { "epoch": 120.39, "learning_rate": 0.0001489637137649894, "loss": 0.6593, "step": 6140 }, { "epoch": 120.59, "learning_rate": 0.0001489569893752712, "loss": 0.6523, "step": 6150 }, { "epoch": 120.78, "learning_rate": 0.00014895024339176149, "loss": 0.657, "step": 6160 }, { "epoch": 120.98, "learning_rate": 0.00014894347581642994, "loss": 0.652, "step": 6170 }, { "epoch": 121.0, "eval_loss": 0.6566243171691895, "eval_runtime": 2.1086, "eval_samples_per_second": 1080.825, "eval_steps_per_second": 4.268, "step": 6171 }, { "epoch": 121.18, "learning_rate": 0.00014893668665125258, "loss": 0.6498, "step": 6180 }, { "epoch": 121.37, "learning_rate": 0.00014892987589821164, "loss": 0.652, "step": 6190 }, { "epoch": 121.57, "learning_rate": 0.00014892304355929576, "loss": 0.6503, "step": 6200 }, { "epoch": 121.76, "learning_rate": 0.00014891618963649978, "loss": 0.6514, "step": 6210 }, { "epoch": 121.96, "learning_rate": 0.00014890931413182493, "loss": 0.6488, "step": 6220 }, { "epoch": 122.0, "eval_loss": 0.6527683734893799, "eval_runtime": 2.162, "eval_samples_per_second": 1054.109, "eval_steps_per_second": 4.163, "step": 6222 }, { "epoch": 122.16, "learning_rate": 0.0001489024170472787, "loss": 0.6491, "step": 6230 }, { "epoch": 122.35, "learning_rate": 0.00014889549838487486, "loss": 0.6536, "step": 6240 }, { "epoch": 122.55, "learning_rate": 0.00014888855814663355, "loss": 0.6554, "step": 6250 }, { "epoch": 122.75, "learning_rate": 0.0001488815963345811, "loss": 0.6561, "step": 6260 }, { "epoch": 122.94, "learning_rate": 0.00014887461295075025, "loss": 0.6538, "step": 6270 }, { "epoch": 123.0, "eval_loss": 0.6558259129524231, "eval_runtime": 2.1735, "eval_samples_per_second": 1048.543, "eval_steps_per_second": 4.141, "step": 6273 }, { "epoch": 123.14, "learning_rate": 0.00014886760799717998, "loss": 0.6462, "step": 6280 }, { "epoch": 123.33, "learning_rate": 0.0001488605814759156, "loss": 0.6557, "step": 6290 }, { "epoch": 123.53, "learning_rate": 0.00014885353338900867, "loss": 0.6523, "step": 6300 }, { "epoch": 123.73, "learning_rate": 0.00014884646373851707, "loss": 0.6496, "step": 6310 }, { "epoch": 123.92, "learning_rate": 0.000148839372526505, "loss": 0.6457, "step": 6320 }, { "epoch": 124.0, "eval_loss": 0.6509066820144653, "eval_runtime": 2.2241, "eval_samples_per_second": 1024.675, "eval_steps_per_second": 4.047, "step": 6324 }, { "epoch": 124.12, "learning_rate": 0.00014883225975504294, "loss": 0.6514, "step": 6330 }, { "epoch": 124.31, "learning_rate": 0.00014882512542620764, "loss": 0.648, "step": 6340 }, { "epoch": 124.51, "learning_rate": 0.0001488179695420822, "loss": 0.6467, "step": 6350 }, { "epoch": 124.71, "learning_rate": 0.00014881079210475593, "loss": 0.6431, "step": 6360 }, { "epoch": 124.9, "learning_rate": 0.00014880359311632453, "loss": 0.643, "step": 6370 }, { "epoch": 125.0, "eval_loss": 0.6462063789367676, "eval_runtime": 2.2405, "eval_samples_per_second": 1017.192, "eval_steps_per_second": 4.017, "step": 6375 }, { "epoch": 125.1, "learning_rate": 0.0001487963725788899, "loss": 0.6473, "step": 6380 }, { "epoch": 125.29, "learning_rate": 0.00014878913049456028, "loss": 0.649, "step": 6390 }, { "epoch": 125.49, "learning_rate": 0.00014878186686545026, "loss": 0.6446, "step": 6400 }, { "epoch": 125.69, "learning_rate": 0.00014877458169368056, "loss": 0.6455, "step": 6410 }, { "epoch": 125.88, "learning_rate": 0.00014876727498137836, "loss": 0.6433, "step": 6420 }, { "epoch": 126.0, "eval_loss": 0.645854651927948, "eval_runtime": 2.0792, "eval_samples_per_second": 1096.098, "eval_steps_per_second": 4.329, "step": 6426 }, { "epoch": 126.08, "learning_rate": 0.00014875994673067703, "loss": 0.6448, "step": 6430 }, { "epoch": 126.27, "learning_rate": 0.0001487525969437163, "loss": 0.6438, "step": 6440 }, { "epoch": 126.47, "learning_rate": 0.00014874522562264206, "loss": 0.6462, "step": 6450 }, { "epoch": 126.67, "learning_rate": 0.0001487378327696066, "loss": 0.6419, "step": 6460 }, { "epoch": 126.86, "learning_rate": 0.00014873041838676853, "loss": 0.6451, "step": 6470 }, { "epoch": 127.0, "eval_loss": 0.6453904509544373, "eval_runtime": 2.1225, "eval_samples_per_second": 1073.748, "eval_steps_per_second": 4.24, "step": 6477 }, { "epoch": 127.06, "learning_rate": 0.00014872298247629263, "loss": 0.6439, "step": 6480 }, { "epoch": 127.25, "learning_rate": 0.00014871552504035, "loss": 0.6429, "step": 6490 }, { "epoch": 127.45, "learning_rate": 0.00014870804608111812, "loss": 0.6456, "step": 6500 }, { "epoch": 127.65, "learning_rate": 0.00014870054560078057, "loss": 0.646, "step": 6510 }, { "epoch": 127.84, "learning_rate": 0.00014869302360152745, "loss": 0.6413, "step": 6520 }, { "epoch": 128.0, "eval_loss": 0.6441250443458557, "eval_runtime": 2.2111, "eval_samples_per_second": 1030.7, "eval_steps_per_second": 4.07, "step": 6528 }, { "epoch": 128.04, "learning_rate": 0.00014868548008555493, "loss": 0.6426, "step": 6530 }, { "epoch": 128.24, "learning_rate": 0.00014867791505506557, "loss": 0.6458, "step": 6540 }, { "epoch": 128.43, "learning_rate": 0.0001486703285122682, "loss": 0.6405, "step": 6550 }, { "epoch": 128.63, "learning_rate": 0.00014866272045937787, "loss": 0.6432, "step": 6560 }, { "epoch": 128.82, "learning_rate": 0.00014865509089861603, "loss": 0.6407, "step": 6570 }, { "epoch": 129.0, "eval_loss": 0.6409056782722473, "eval_runtime": 2.2046, "eval_samples_per_second": 1033.764, "eval_steps_per_second": 4.082, "step": 6579 }, { "epoch": 129.02, "learning_rate": 0.00014864743983221033, "loss": 0.6424, "step": 6580 }, { "epoch": 129.22, "learning_rate": 0.00014863976726239468, "loss": 0.6441, "step": 6590 }, { "epoch": 129.41, "learning_rate": 0.00014863207319140934, "loss": 0.6386, "step": 6600 }, { "epoch": 129.61, "learning_rate": 0.00014862435762150075, "loss": 0.6421, "step": 6610 }, { "epoch": 129.8, "learning_rate": 0.00014861662055492173, "loss": 0.6409, "step": 6620 }, { "epoch": 130.0, "learning_rate": 0.00014860886199393134, "loss": 0.6381, "step": 6630 }, { "epoch": 130.0, "eval_loss": 0.6422334909439087, "eval_runtime": 2.2457, "eval_samples_per_second": 1014.81, "eval_steps_per_second": 4.008, "step": 6630 }, { "epoch": 130.2, "learning_rate": 0.00014860108194079486, "loss": 0.6419, "step": 6640 }, { "epoch": 130.39, "learning_rate": 0.0001485932803977839, "loss": 0.6369, "step": 6650 }, { "epoch": 130.59, "learning_rate": 0.0001485854573671764, "loss": 0.6383, "step": 6660 }, { "epoch": 130.78, "learning_rate": 0.00014857761285125642, "loss": 0.6396, "step": 6670 }, { "epoch": 130.98, "learning_rate": 0.00014856974685231446, "loss": 0.6408, "step": 6680 }, { "epoch": 131.0, "eval_loss": 0.6432426571846008, "eval_runtime": 2.2309, "eval_samples_per_second": 1021.546, "eval_steps_per_second": 4.034, "step": 6681 }, { "epoch": 131.18, "learning_rate": 0.00014856185937264717, "loss": 0.6373, "step": 6690 }, { "epoch": 131.37, "learning_rate": 0.00014855395041455752, "loss": 0.6412, "step": 6700 }, { "epoch": 131.57, "learning_rate": 0.00014854601998035477, "loss": 0.6391, "step": 6710 }, { "epoch": 131.76, "learning_rate": 0.00014853806807235443, "loss": 0.6364, "step": 6720 }, { "epoch": 131.96, "learning_rate": 0.00014853009469287826, "loss": 0.6404, "step": 6730 }, { "epoch": 132.0, "eval_loss": 0.640821099281311, "eval_runtime": 2.0959, "eval_samples_per_second": 1087.358, "eval_steps_per_second": 4.294, "step": 6732 }, { "epoch": 132.16, "learning_rate": 0.00014852209984425434, "loss": 0.6456, "step": 6740 }, { "epoch": 132.35, "learning_rate": 0.00014851408352881693, "loss": 0.6469, "step": 6750 }, { "epoch": 132.55, "learning_rate": 0.0001485060457489067, "loss": 0.6374, "step": 6760 }, { "epoch": 132.75, "learning_rate": 0.0001484979865068704, "loss": 0.6421, "step": 6770 }, { "epoch": 132.94, "learning_rate": 0.00014848990580506124, "loss": 0.6412, "step": 6780 }, { "epoch": 133.0, "eval_loss": 0.635369598865509, "eval_runtime": 2.1408, "eval_samples_per_second": 1064.58, "eval_steps_per_second": 4.204, "step": 6783 }, { "epoch": 133.14, "learning_rate": 0.00014848180364583857, "loss": 0.6387, "step": 6790 }, { "epoch": 133.33, "learning_rate": 0.00014847368003156804, "loss": 0.6349, "step": 6800 }, { "epoch": 133.53, "learning_rate": 0.00014846553496462153, "loss": 0.638, "step": 6810 }, { "epoch": 133.73, "learning_rate": 0.00014845736844737727, "loss": 0.6346, "step": 6820 }, { "epoch": 133.92, "learning_rate": 0.0001484491804822197, "loss": 0.6348, "step": 6830 }, { "epoch": 134.0, "eval_loss": 0.6349772810935974, "eval_runtime": 2.174, "eval_samples_per_second": 1048.301, "eval_steps_per_second": 4.14, "step": 6834 }, { "epoch": 134.12, "learning_rate": 0.0001484409710715395, "loss": 0.6331, "step": 6840 }, { "epoch": 134.31, "learning_rate": 0.00014843274021773364, "loss": 0.638, "step": 6850 }, { "epoch": 134.51, "learning_rate": 0.00014842448792320534, "loss": 0.638, "step": 6860 }, { "epoch": 134.71, "learning_rate": 0.00014841621419036408, "loss": 0.6346, "step": 6870 }, { "epoch": 134.9, "learning_rate": 0.00014840791902162562, "loss": 0.6307, "step": 6880 }, { "epoch": 135.0, "eval_loss": 0.6389310956001282, "eval_runtime": 2.1053, "eval_samples_per_second": 1082.49, "eval_steps_per_second": 4.275, "step": 6885 }, { "epoch": 135.1, "learning_rate": 0.00014839960241941198, "loss": 0.6345, "step": 6890 }, { "epoch": 135.29, "learning_rate": 0.00014839126438615137, "loss": 0.6352, "step": 6900 }, { "epoch": 135.49, "learning_rate": 0.00014838290492427838, "loss": 0.6297, "step": 6910 }, { "epoch": 135.69, "learning_rate": 0.00014837452403623373, "loss": 0.6367, "step": 6920 }, { "epoch": 135.88, "learning_rate": 0.00014836612172446447, "loss": 0.639, "step": 6930 }, { "epoch": 136.0, "eval_loss": 0.6417258381843567, "eval_runtime": 2.2192, "eval_samples_per_second": 1026.924, "eval_steps_per_second": 4.055, "step": 6936 }, { "epoch": 136.08, "learning_rate": 0.00014835769799142389, "loss": 0.6377, "step": 6940 }, { "epoch": 136.27, "learning_rate": 0.00014834925283957153, "loss": 0.6389, "step": 6950 }, { "epoch": 136.47, "learning_rate": 0.0001483407862713732, "loss": 0.6385, "step": 6960 }, { "epoch": 136.67, "learning_rate": 0.00014833229828930094, "loss": 0.6331, "step": 6970 }, { "epoch": 136.86, "learning_rate": 0.00014832378889583303, "loss": 0.6319, "step": 6980 }, { "epoch": 137.0, "eval_loss": 0.6353015899658203, "eval_runtime": 2.2601, "eval_samples_per_second": 1008.349, "eval_steps_per_second": 3.982, "step": 6987 }, { "epoch": 137.06, "learning_rate": 0.00014831525809345404, "loss": 0.6331, "step": 6990 }, { "epoch": 137.25, "learning_rate": 0.0001483067058846548, "loss": 0.634, "step": 7000 }, { "epoch": 137.45, "learning_rate": 0.00014829813227193233, "loss": 0.6321, "step": 7010 }, { "epoch": 137.65, "learning_rate": 0.00014828953725778995, "loss": 0.6313, "step": 7020 }, { "epoch": 137.84, "learning_rate": 0.0001482809208447372, "loss": 0.6306, "step": 7030 }, { "epoch": 138.0, "eval_loss": 0.6385012269020081, "eval_runtime": 2.1538, "eval_samples_per_second": 1058.12, "eval_steps_per_second": 4.179, "step": 7038 }, { "epoch": 138.04, "learning_rate": 0.00014827228303528986, "loss": 0.6387, "step": 7040 }, { "epoch": 138.24, "learning_rate": 0.00014826362383197004, "loss": 0.6332, "step": 7050 }, { "epoch": 138.43, "learning_rate": 0.00014825494323730598, "loss": 0.6369, "step": 7060 }, { "epoch": 138.63, "learning_rate": 0.00014824624125383225, "loss": 0.6319, "step": 7070 }, { "epoch": 138.82, "learning_rate": 0.0001482375178840896, "loss": 0.6307, "step": 7080 }, { "epoch": 139.0, "eval_loss": 0.6411579251289368, "eval_runtime": 2.2349, "eval_samples_per_second": 1019.731, "eval_steps_per_second": 4.027, "step": 7089 }, { "epoch": 139.02, "learning_rate": 0.00014822877313062512, "loss": 0.6336, "step": 7090 }, { "epoch": 139.22, "learning_rate": 0.00014822000699599204, "loss": 0.6396, "step": 7100 }, { "epoch": 139.41, "learning_rate": 0.00014821121948274985, "loss": 0.6287, "step": 7110 }, { "epoch": 139.61, "learning_rate": 0.00014820241059346437, "loss": 0.6303, "step": 7120 }, { "epoch": 139.8, "learning_rate": 0.00014819358033070756, "loss": 0.6306, "step": 7130 }, { "epoch": 140.0, "learning_rate": 0.00014818472869705765, "loss": 0.6343, "step": 7140 }, { "epoch": 140.0, "eval_loss": 0.6308197379112244, "eval_runtime": 2.1192, "eval_samples_per_second": 1075.425, "eval_steps_per_second": 4.247, "step": 7140 }, { "epoch": 140.2, "learning_rate": 0.00014817585569509918, "loss": 0.6315, "step": 7150 }, { "epoch": 140.39, "learning_rate": 0.0001481669613274228, "loss": 0.6305, "step": 7160 }, { "epoch": 140.59, "learning_rate": 0.00014815804559662547, "loss": 0.6292, "step": 7170 }, { "epoch": 140.78, "learning_rate": 0.00014814910850531046, "loss": 0.6275, "step": 7180 }, { "epoch": 140.98, "learning_rate": 0.0001481401500560871, "loss": 0.6289, "step": 7190 }, { "epoch": 141.0, "eval_loss": 0.6336754560470581, "eval_runtime": 2.185, "eval_samples_per_second": 1043.027, "eval_steps_per_second": 4.119, "step": 7191 }, { "epoch": 141.18, "learning_rate": 0.00014813117025157114, "loss": 0.6284, "step": 7200 }, { "epoch": 141.37, "learning_rate": 0.00014812216909438443, "loss": 0.6286, "step": 7210 }, { "epoch": 141.57, "learning_rate": 0.00014811314658715516, "loss": 0.6321, "step": 7220 }, { "epoch": 141.76, "learning_rate": 0.00014810410273251762, "loss": 0.6281, "step": 7230 }, { "epoch": 141.96, "learning_rate": 0.00014809503753311252, "loss": 0.6298, "step": 7240 }, { "epoch": 142.0, "eval_loss": 0.6342359781265259, "eval_runtime": 2.2299, "eval_samples_per_second": 1022.019, "eval_steps_per_second": 4.036, "step": 7242 }, { "epoch": 142.16, "learning_rate": 0.0001480859509915866, "loss": 0.6277, "step": 7250 }, { "epoch": 142.35, "learning_rate": 0.00014807684311059297, "loss": 0.6304, "step": 7260 }, { "epoch": 142.55, "learning_rate": 0.0001480677138927909, "loss": 0.6329, "step": 7270 }, { "epoch": 142.75, "learning_rate": 0.000148058563340846, "loss": 0.6289, "step": 7280 }, { "epoch": 142.94, "learning_rate": 0.00014804939145742993, "loss": 0.6284, "step": 7290 }, { "epoch": 143.0, "eval_loss": 0.6286506056785583, "eval_runtime": 2.2504, "eval_samples_per_second": 1012.705, "eval_steps_per_second": 3.999, "step": 7293 }, { "epoch": 143.14, "learning_rate": 0.00014804019824522072, "loss": 0.6306, "step": 7300 }, { "epoch": 143.33, "learning_rate": 0.00014803098370690256, "loss": 0.6258, "step": 7310 }, { "epoch": 143.53, "learning_rate": 0.00014802174784516592, "loss": 0.6242, "step": 7320 }, { "epoch": 143.73, "learning_rate": 0.00014801249066270745, "loss": 0.6251, "step": 7330 }, { "epoch": 143.92, "learning_rate": 0.00014800321216223004, "loss": 0.624, "step": 7340 }, { "epoch": 144.0, "eval_loss": 0.6305137872695923, "eval_runtime": 2.1855, "eval_samples_per_second": 1042.771, "eval_steps_per_second": 4.118, "step": 7344 }, { "epoch": 144.12, "learning_rate": 0.00014799391234644282, "loss": 0.6305, "step": 7350 }, { "epoch": 144.31, "learning_rate": 0.00014798459121806112, "loss": 0.6235, "step": 7360 }, { "epoch": 144.51, "learning_rate": 0.0001479752487798065, "loss": 0.6233, "step": 7370 }, { "epoch": 144.71, "learning_rate": 0.00014796588503440675, "loss": 0.6252, "step": 7380 }, { "epoch": 144.9, "learning_rate": 0.00014795649998459583, "loss": 0.6266, "step": 7390 }, { "epoch": 145.0, "eval_loss": 0.6337530612945557, "eval_runtime": 2.1245, "eval_samples_per_second": 1072.699, "eval_steps_per_second": 4.236, "step": 7395 }, { "epoch": 145.1, "learning_rate": 0.000147947093633114, "loss": 0.6315, "step": 7400 }, { "epoch": 145.29, "learning_rate": 0.00014793766598270773, "loss": 0.6313, "step": 7410 }, { "epoch": 145.49, "learning_rate": 0.00014792821703612968, "loss": 0.6256, "step": 7420 }, { "epoch": 145.69, "learning_rate": 0.0001479187467961387, "loss": 0.6237, "step": 7430 }, { "epoch": 145.88, "learning_rate": 0.00014790925526549987, "loss": 0.6253, "step": 7440 }, { "epoch": 146.0, "eval_loss": 0.6281046271324158, "eval_runtime": 2.2095, "eval_samples_per_second": 1031.434, "eval_steps_per_second": 4.073, "step": 7446 }, { "epoch": 146.08, "learning_rate": 0.0001478997424469846, "loss": 0.6263, "step": 7450 }, { "epoch": 146.27, "learning_rate": 0.0001478902083433703, "loss": 0.625, "step": 7460 }, { "epoch": 146.47, "learning_rate": 0.0001478806529574408, "loss": 0.6256, "step": 7470 }, { "epoch": 146.67, "learning_rate": 0.00014787107629198601, "loss": 0.6251, "step": 7480 }, { "epoch": 146.86, "learning_rate": 0.00014786147834980218, "loss": 0.6204, "step": 7490 }, { "epoch": 147.0, "eval_loss": 0.624106764793396, "eval_runtime": 2.317, "eval_samples_per_second": 983.579, "eval_steps_per_second": 3.884, "step": 7497 }, { "epoch": 147.06, "learning_rate": 0.00014785185913369157, "loss": 0.621, "step": 7500 }, { "epoch": 147.25, "learning_rate": 0.00014784221864646289, "loss": 0.6257, "step": 7510 }, { "epoch": 147.45, "learning_rate": 0.0001478325568909309, "loss": 0.6257, "step": 7520 }, { "epoch": 147.65, "learning_rate": 0.0001478228738699166, "loss": 0.6245, "step": 7530 }, { "epoch": 147.84, "learning_rate": 0.00014781316958624726, "loss": 0.6232, "step": 7540 }, { "epoch": 148.0, "eval_loss": 0.6222416758537292, "eval_runtime": 2.1817, "eval_samples_per_second": 1044.619, "eval_steps_per_second": 4.125, "step": 7548 }, { "epoch": 148.04, "learning_rate": 0.00014780344404275627, "loss": 0.6219, "step": 7550 }, { "epoch": 148.24, "learning_rate": 0.00014779369724228332, "loss": 0.6228, "step": 7560 }, { "epoch": 148.43, "learning_rate": 0.00014778392918767422, "loss": 0.626, "step": 7570 }, { "epoch": 148.63, "learning_rate": 0.00014777413988178103, "loss": 0.6252, "step": 7580 }, { "epoch": 148.82, "learning_rate": 0.00014776432932746202, "loss": 0.6213, "step": 7590 }, { "epoch": 149.0, "eval_loss": 0.620071530342102, "eval_runtime": 2.1112, "eval_samples_per_second": 1079.463, "eval_steps_per_second": 4.263, "step": 7599 }, { "epoch": 149.02, "learning_rate": 0.00014775449752758166, "loss": 0.6174, "step": 7600 }, { "epoch": 149.22, "learning_rate": 0.00014774464448501058, "loss": 0.6262, "step": 7610 }, { "epoch": 149.41, "learning_rate": 0.0001477347702026257, "loss": 0.6249, "step": 7620 }, { "epoch": 149.61, "learning_rate": 0.00014772487468331006, "loss": 0.6219, "step": 7630 }, { "epoch": 149.8, "learning_rate": 0.00014771495792995293, "loss": 0.6219, "step": 7640 }, { "epoch": 150.0, "learning_rate": 0.00014770501994544976, "loss": 0.6225, "step": 7650 }, { "epoch": 150.0, "eval_loss": 0.6237109303474426, "eval_runtime": 2.0993, "eval_samples_per_second": 1085.616, "eval_steps_per_second": 4.287, "step": 7650 }, { "epoch": 150.2, "learning_rate": 0.00014769506073270228, "loss": 0.6222, "step": 7660 }, { "epoch": 150.39, "learning_rate": 0.0001476850802946183, "loss": 0.626, "step": 7670 }, { "epoch": 150.59, "learning_rate": 0.00014767507863411194, "loss": 0.622, "step": 7680 }, { "epoch": 150.78, "learning_rate": 0.0001476650557541034, "loss": 0.6205, "step": 7690 }, { "epoch": 150.98, "learning_rate": 0.00014765501165751917, "loss": 0.6228, "step": 7700 }, { "epoch": 151.0, "eval_loss": 0.6192641258239746, "eval_runtime": 2.0691, "eval_samples_per_second": 1101.464, "eval_steps_per_second": 4.35, "step": 7701 }, { "epoch": 151.18, "learning_rate": 0.0001476449463472919, "loss": 0.6194, "step": 7710 }, { "epoch": 151.37, "learning_rate": 0.00014763485982636045, "loss": 0.6193, "step": 7720 }, { "epoch": 151.57, "learning_rate": 0.00014762475209766985, "loss": 0.6174, "step": 7730 }, { "epoch": 151.76, "learning_rate": 0.00014761462316417132, "loss": 0.6191, "step": 7740 }, { "epoch": 151.96, "learning_rate": 0.00014760447302882227, "loss": 0.6191, "step": 7750 }, { "epoch": 152.0, "eval_loss": 0.6199597120285034, "eval_runtime": 2.2126, "eval_samples_per_second": 1029.988, "eval_steps_per_second": 4.068, "step": 7752 }, { "epoch": 152.16, "learning_rate": 0.00014759430169458636, "loss": 0.6251, "step": 7760 }, { "epoch": 152.35, "learning_rate": 0.00014758410916443333, "loss": 0.6229, "step": 7770 }, { "epoch": 152.55, "learning_rate": 0.00014757389544133926, "loss": 0.6175, "step": 7780 }, { "epoch": 152.75, "learning_rate": 0.00014756366052828622, "loss": 0.6195, "step": 7790 }, { "epoch": 152.94, "learning_rate": 0.00014755340442826266, "loss": 0.6198, "step": 7800 }, { "epoch": 153.0, "eval_loss": 0.6229197978973389, "eval_runtime": 2.1172, "eval_samples_per_second": 1076.425, "eval_steps_per_second": 4.251, "step": 7803 }, { "epoch": 153.14, "learning_rate": 0.0001475431271442631, "loss": 0.6181, "step": 7810 }, { "epoch": 153.33, "learning_rate": 0.0001475328286792883, "loss": 0.6236, "step": 7820 }, { "epoch": 153.53, "learning_rate": 0.00014752250903634514, "loss": 0.621, "step": 7830 }, { "epoch": 153.73, "learning_rate": 0.00014751216821844677, "loss": 0.6167, "step": 7840 }, { "epoch": 153.92, "learning_rate": 0.00014750180622861243, "loss": 0.6183, "step": 7850 }, { "epoch": 154.0, "eval_loss": 0.6212825179100037, "eval_runtime": 2.2545, "eval_samples_per_second": 1010.847, "eval_steps_per_second": 3.992, "step": 7854 }, { "epoch": 154.12, "learning_rate": 0.00014749142306986763, "loss": 0.6227, "step": 7860 }, { "epoch": 154.31, "learning_rate": 0.00014748101874524402, "loss": 0.62, "step": 7870 }, { "epoch": 154.51, "learning_rate": 0.00014747059325777943, "loss": 0.6211, "step": 7880 }, { "epoch": 154.71, "learning_rate": 0.00014746014661051784, "loss": 0.6164, "step": 7890 }, { "epoch": 154.9, "learning_rate": 0.00014744967880650947, "loss": 0.6181, "step": 7900 }, { "epoch": 155.0, "eval_loss": 0.6212865114212036, "eval_runtime": 2.1579, "eval_samples_per_second": 1056.104, "eval_steps_per_second": 4.171, "step": 7905 }, { "epoch": 155.1, "learning_rate": 0.00014743918984881066, "loss": 0.6198, "step": 7910 }, { "epoch": 155.29, "learning_rate": 0.000147428679740484, "loss": 0.6242, "step": 7920 }, { "epoch": 155.49, "learning_rate": 0.00014741814848459812, "loss": 0.6169, "step": 7930 }, { "epoch": 155.69, "learning_rate": 0.000147407596084228, "loss": 0.6203, "step": 7940 }, { "epoch": 155.88, "learning_rate": 0.00014739702254245465, "loss": 0.6168, "step": 7950 }, { "epoch": 156.0, "eval_loss": 0.6163668036460876, "eval_runtime": 2.143, "eval_samples_per_second": 1063.445, "eval_steps_per_second": 4.2, "step": 7956 }, { "epoch": 156.08, "learning_rate": 0.00014738642786236533, "loss": 0.6208, "step": 7960 }, { "epoch": 156.27, "learning_rate": 0.00014737581204705345, "loss": 0.62, "step": 7970 }, { "epoch": 156.47, "learning_rate": 0.0001473651750996186, "loss": 0.6165, "step": 7980 }, { "epoch": 156.67, "learning_rate": 0.0001473545170231665, "loss": 0.6187, "step": 7990 }, { "epoch": 156.86, "learning_rate": 0.00014734383782080912, "loss": 0.6156, "step": 8000 }, { "epoch": 157.0, "eval_loss": 0.6160290241241455, "eval_runtime": 2.1391, "eval_samples_per_second": 1065.42, "eval_steps_per_second": 4.207, "step": 8007 }, { "epoch": 157.06, "learning_rate": 0.0001473331374956645, "loss": 0.619, "step": 8010 }, { "epoch": 157.25, "learning_rate": 0.00014732241605085693, "loss": 0.6117, "step": 8020 }, { "epoch": 157.45, "learning_rate": 0.0001473116734895168, "loss": 0.615, "step": 8030 }, { "epoch": 157.65, "learning_rate": 0.00014730090981478075, "loss": 0.6147, "step": 8040 }, { "epoch": 157.84, "learning_rate": 0.00014729012502979146, "loss": 0.6125, "step": 8050 }, { "epoch": 158.0, "eval_loss": 0.6153013110160828, "eval_runtime": 2.256, "eval_samples_per_second": 1010.206, "eval_steps_per_second": 3.989, "step": 8058 }, { "epoch": 158.04, "learning_rate": 0.00014727931913769793, "loss": 0.6186, "step": 8060 }, { "epoch": 158.24, "learning_rate": 0.00014726849214165516, "loss": 0.6138, "step": 8070 }, { "epoch": 158.43, "learning_rate": 0.00014725764404482445, "loss": 0.6207, "step": 8080 }, { "epoch": 158.63, "learning_rate": 0.00014724677485037317, "loss": 0.6155, "step": 8090 }, { "epoch": 158.82, "learning_rate": 0.0001472358845614749, "loss": 0.6126, "step": 8100 }, { "epoch": 159.0, "eval_loss": 0.6150580048561096, "eval_runtime": 2.1863, "eval_samples_per_second": 1042.382, "eval_steps_per_second": 4.116, "step": 8109 }, { "epoch": 159.02, "learning_rate": 0.00014722497318130935, "loss": 0.6151, "step": 8110 }, { "epoch": 159.22, "learning_rate": 0.0001472140407130624, "loss": 0.61, "step": 8120 }, { "epoch": 159.41, "learning_rate": 0.00014720308715992613, "loss": 0.6128, "step": 8130 }, { "epoch": 159.61, "learning_rate": 0.00014719211252509865, "loss": 0.6123, "step": 8140 }, { "epoch": 159.8, "learning_rate": 0.00014718111681178437, "loss": 0.6129, "step": 8150 }, { "epoch": 160.0, "learning_rate": 0.00014717010002319376, "loss": 0.6115, "step": 8160 }, { "epoch": 160.0, "eval_loss": 0.6163375377655029, "eval_runtime": 2.1353, "eval_samples_per_second": 1067.303, "eval_steps_per_second": 4.215, "step": 8160 }, { "epoch": 160.2, "learning_rate": 0.0001471590621625435, "loss": 0.6117, "step": 8170 }, { "epoch": 160.39, "learning_rate": 0.00014714800323305642, "loss": 0.6116, "step": 8180 }, { "epoch": 160.59, "learning_rate": 0.00014713692323796142, "loss": 0.6091, "step": 8190 }, { "epoch": 160.78, "learning_rate": 0.00014712582218049365, "loss": 0.6147, "step": 8200 }, { "epoch": 160.98, "learning_rate": 0.00014711470006389436, "loss": 0.611, "step": 8210 }, { "epoch": 161.0, "eval_loss": 0.6166603565216064, "eval_runtime": 2.1988, "eval_samples_per_second": 1036.453, "eval_steps_per_second": 4.093, "step": 8211 }, { "epoch": 161.18, "learning_rate": 0.000147103556891411, "loss": 0.6208, "step": 8220 }, { "epoch": 161.37, "learning_rate": 0.00014709239266629705, "loss": 0.6114, "step": 8230 }, { "epoch": 161.57, "learning_rate": 0.00014708120739181227, "loss": 0.612, "step": 8240 }, { "epoch": 161.76, "learning_rate": 0.0001470700010712225, "loss": 0.6107, "step": 8250 }, { "epoch": 161.96, "learning_rate": 0.00014705877370779975, "loss": 0.6099, "step": 8260 }, { "epoch": 162.0, "eval_loss": 0.608273446559906, "eval_runtime": 2.1536, "eval_samples_per_second": 1058.242, "eval_steps_per_second": 4.179, "step": 8262 }, { "epoch": 162.16, "learning_rate": 0.00014704752530482213, "loss": 0.6117, "step": 8270 }, { "epoch": 162.35, "learning_rate": 0.00014703625586557392, "loss": 0.6114, "step": 8280 }, { "epoch": 162.55, "learning_rate": 0.00014702496539334558, "loss": 0.6113, "step": 8290 }, { "epoch": 162.75, "learning_rate": 0.00014701365389143366, "loss": 0.6121, "step": 8300 }, { "epoch": 162.94, "learning_rate": 0.00014700232136314085, "loss": 0.6089, "step": 8310 }, { "epoch": 163.0, "eval_loss": 0.6103874444961548, "eval_runtime": 2.2403, "eval_samples_per_second": 1017.264, "eval_steps_per_second": 4.017, "step": 8313 }, { "epoch": 163.14, "learning_rate": 0.00014699096781177603, "loss": 0.6081, "step": 8320 }, { "epoch": 163.33, "learning_rate": 0.00014697959324065415, "loss": 0.6117, "step": 8330 }, { "epoch": 163.53, "learning_rate": 0.00014696819765309637, "loss": 0.6091, "step": 8340 }, { "epoch": 163.73, "learning_rate": 0.0001469567810524299, "loss": 0.6109, "step": 8350 }, { "epoch": 163.92, "learning_rate": 0.00014694534344198814, "loss": 0.6091, "step": 8360 }, { "epoch": 164.0, "eval_loss": 0.6139717698097229, "eval_runtime": 2.2739, "eval_samples_per_second": 1002.243, "eval_steps_per_second": 3.958, "step": 8364 }, { "epoch": 164.12, "learning_rate": 0.00014693388482511067, "loss": 0.6104, "step": 8370 }, { "epoch": 164.31, "learning_rate": 0.00014692240520514308, "loss": 0.6059, "step": 8380 }, { "epoch": 164.51, "learning_rate": 0.0001469109045854372, "loss": 0.6079, "step": 8390 }, { "epoch": 164.71, "learning_rate": 0.00014689938296935095, "loss": 0.6097, "step": 8400 }, { "epoch": 164.9, "learning_rate": 0.0001468878403602484, "loss": 0.6105, "step": 8410 }, { "epoch": 165.0, "eval_loss": 0.6122100949287415, "eval_runtime": 2.0978, "eval_samples_per_second": 1086.385, "eval_steps_per_second": 4.29, "step": 8415 }, { "epoch": 165.1, "learning_rate": 0.0001468762767614997, "loss": 0.6108, "step": 8420 }, { "epoch": 165.29, "learning_rate": 0.0001468646921764812, "loss": 0.6138, "step": 8430 }, { "epoch": 165.49, "learning_rate": 0.0001468530866085753, "loss": 0.6102, "step": 8440 }, { "epoch": 165.69, "learning_rate": 0.0001468414600611706, "loss": 0.6078, "step": 8450 }, { "epoch": 165.88, "learning_rate": 0.0001468298125376618, "loss": 0.61, "step": 8460 }, { "epoch": 166.0, "eval_loss": 0.6105751395225525, "eval_runtime": 2.2305, "eval_samples_per_second": 1021.739, "eval_steps_per_second": 4.035, "step": 8466 }, { "epoch": 166.08, "learning_rate": 0.0001468181440414497, "loss": 0.608, "step": 8470 }, { "epoch": 166.27, "learning_rate": 0.0001468064545759412, "loss": 0.6077, "step": 8480 }, { "epoch": 166.47, "learning_rate": 0.00014679474414454942, "loss": 0.6073, "step": 8490 }, { "epoch": 166.67, "learning_rate": 0.00014678301275069353, "loss": 0.6081, "step": 8500 }, { "epoch": 166.86, "learning_rate": 0.00014677126039779886, "loss": 0.6104, "step": 8510 }, { "epoch": 167.0, "eval_loss": 0.6062180399894714, "eval_runtime": 2.2377, "eval_samples_per_second": 1018.474, "eval_steps_per_second": 4.022, "step": 8517 }, { "epoch": 167.06, "learning_rate": 0.0001467594870892968, "loss": 0.61, "step": 8520 }, { "epoch": 167.25, "learning_rate": 0.00014674769282862487, "loss": 0.6098, "step": 8530 }, { "epoch": 167.45, "learning_rate": 0.0001467358776192268, "loss": 0.6099, "step": 8540 }, { "epoch": 167.65, "learning_rate": 0.0001467240414645523, "loss": 0.6036, "step": 8550 }, { "epoch": 167.84, "learning_rate": 0.00014671218436805732, "loss": 0.6067, "step": 8560 }, { "epoch": 168.0, "eval_loss": 0.6094751358032227, "eval_runtime": 2.1511, "eval_samples_per_second": 1059.437, "eval_steps_per_second": 4.184, "step": 8568 }, { "epoch": 168.04, "learning_rate": 0.00014670030633320383, "loss": 0.6032, "step": 8570 }, { "epoch": 168.24, "learning_rate": 0.00014668840736345993, "loss": 0.607, "step": 8580 }, { "epoch": 168.43, "learning_rate": 0.00014667648746229993, "loss": 0.6061, "step": 8590 }, { "epoch": 168.63, "learning_rate": 0.0001466645466332041, "loss": 0.608, "step": 8600 }, { "epoch": 168.82, "learning_rate": 0.00014665258487965896, "loss": 0.6056, "step": 8610 }, { "epoch": 169.0, "eval_loss": 0.606715202331543, "eval_runtime": 2.1396, "eval_samples_per_second": 1065.129, "eval_steps_per_second": 4.206, "step": 8619 }, { "epoch": 169.02, "learning_rate": 0.00014664060220515704, "loss": 0.6008, "step": 8620 }, { "epoch": 169.22, "learning_rate": 0.00014662859861319698, "loss": 0.6066, "step": 8630 }, { "epoch": 169.41, "learning_rate": 0.00014661657410728365, "loss": 0.604, "step": 8640 }, { "epoch": 169.61, "learning_rate": 0.00014660452869092786, "loss": 0.6058, "step": 8650 }, { "epoch": 169.8, "learning_rate": 0.0001465924623676466, "loss": 0.6037, "step": 8660 }, { "epoch": 170.0, "learning_rate": 0.00014658037514096305, "loss": 0.607, "step": 8670 }, { "epoch": 170.0, "eval_loss": 0.6091219782829285, "eval_runtime": 2.2153, "eval_samples_per_second": 1028.74, "eval_steps_per_second": 4.063, "step": 8670 }, { "epoch": 170.2, "learning_rate": 0.00014656826701440632, "loss": 0.6046, "step": 8680 }, { "epoch": 170.39, "learning_rate": 0.00014655613799151177, "loss": 0.6135, "step": 8690 }, { "epoch": 170.59, "learning_rate": 0.00014654398807582074, "loss": 0.6052, "step": 8700 }, { "epoch": 170.78, "learning_rate": 0.00014653181727088084, "loss": 0.6088, "step": 8710 }, { "epoch": 170.98, "learning_rate": 0.00014651962558024558, "loss": 0.6032, "step": 8720 }, { "epoch": 171.0, "eval_loss": 0.6041246652603149, "eval_runtime": 2.1575, "eval_samples_per_second": 1056.316, "eval_steps_per_second": 4.171, "step": 8721 }, { "epoch": 171.18, "learning_rate": 0.00014650741300747467, "loss": 0.6048, "step": 8730 }, { "epoch": 171.37, "learning_rate": 0.00014649517955613397, "loss": 0.6034, "step": 8740 }, { "epoch": 171.57, "learning_rate": 0.0001464829252297953, "loss": 0.6044, "step": 8750 }, { "epoch": 171.76, "learning_rate": 0.00014647065003203674, "loss": 0.6014, "step": 8760 }, { "epoch": 171.96, "learning_rate": 0.00014645835396644228, "loss": 0.6038, "step": 8770 }, { "epoch": 172.0, "eval_loss": 0.610372006893158, "eval_runtime": 2.2748, "eval_samples_per_second": 1001.839, "eval_steps_per_second": 3.956, "step": 8772 }, { "epoch": 172.16, "learning_rate": 0.00014644603703660214, "loss": 0.6019, "step": 8780 }, { "epoch": 172.35, "learning_rate": 0.0001464336992461126, "loss": 0.6056, "step": 8790 }, { "epoch": 172.55, "learning_rate": 0.000146421340598576, "loss": 0.6053, "step": 8800 }, { "epoch": 172.75, "learning_rate": 0.0001464089610976008, "loss": 0.6066, "step": 8810 }, { "epoch": 172.94, "learning_rate": 0.0001463965607468015, "loss": 0.605, "step": 8820 }, { "epoch": 173.0, "eval_loss": 0.606823742389679, "eval_runtime": 2.2188, "eval_samples_per_second": 1027.155, "eval_steps_per_second": 4.056, "step": 8823 }, { "epoch": 173.14, "learning_rate": 0.0001463841395497988, "loss": 0.6055, "step": 8830 }, { "epoch": 173.33, "learning_rate": 0.00014637169751021938, "loss": 0.602, "step": 8840 }, { "epoch": 173.53, "learning_rate": 0.000146359234631696, "loss": 0.6031, "step": 8850 }, { "epoch": 173.73, "learning_rate": 0.00014634675091786757, "loss": 0.6029, "step": 8860 }, { "epoch": 173.92, "learning_rate": 0.0001463342463723791, "loss": 0.6036, "step": 8870 }, { "epoch": 174.0, "eval_loss": 0.6004641056060791, "eval_runtime": 2.1823, "eval_samples_per_second": 1044.294, "eval_steps_per_second": 4.124, "step": 8874 }, { "epoch": 174.12, "learning_rate": 0.00014632172099888157, "loss": 0.6014, "step": 8880 }, { "epoch": 174.31, "learning_rate": 0.00014630917480103214, "loss": 0.5998, "step": 8890 }, { "epoch": 174.51, "learning_rate": 0.000146296607782494, "loss": 0.6006, "step": 8900 }, { "epoch": 174.71, "learning_rate": 0.0001462840199469365, "loss": 0.5982, "step": 8910 }, { "epoch": 174.9, "learning_rate": 0.00014627141129803492, "loss": 0.6035, "step": 8920 }, { "epoch": 175.0, "eval_loss": 0.605520486831665, "eval_runtime": 2.0997, "eval_samples_per_second": 1085.379, "eval_steps_per_second": 4.286, "step": 8925 }, { "epoch": 175.1, "learning_rate": 0.00014625878183947076, "loss": 0.6024, "step": 8930 }, { "epoch": 175.29, "learning_rate": 0.00014624613157493153, "loss": 0.6016, "step": 8940 }, { "epoch": 175.49, "learning_rate": 0.00014623346050811085, "loss": 0.6036, "step": 8950 }, { "epoch": 175.69, "learning_rate": 0.0001462207686427083, "loss": 0.6034, "step": 8960 }, { "epoch": 175.88, "learning_rate": 0.0001462080559824297, "loss": 0.6026, "step": 8970 }, { "epoch": 176.0, "eval_loss": 0.6013907194137573, "eval_runtime": 2.2617, "eval_samples_per_second": 1007.639, "eval_steps_per_second": 3.979, "step": 8976 }, { "epoch": 176.08, "learning_rate": 0.0001461953225309869, "loss": 0.5985, "step": 8980 }, { "epoch": 176.27, "learning_rate": 0.0001461825682920977, "loss": 0.6, "step": 8990 }, { "epoch": 176.47, "learning_rate": 0.00014616979326948607, "loss": 0.6012, "step": 9000 }, { "epoch": 176.67, "learning_rate": 0.00014615699746688206, "loss": 0.6005, "step": 9010 }, { "epoch": 176.86, "learning_rate": 0.00014614418088802173, "loss": 0.6012, "step": 9020 }, { "epoch": 177.0, "eval_loss": 0.6028780341148376, "eval_runtime": 2.2208, "eval_samples_per_second": 1026.22, "eval_steps_per_second": 4.053, "step": 9027 }, { "epoch": 177.06, "learning_rate": 0.0001461313435366473, "loss": 0.6033, "step": 9030 }, { "epoch": 177.25, "learning_rate": 0.00014611848541650686, "loss": 0.6005, "step": 9040 }, { "epoch": 177.45, "learning_rate": 0.00014610560653135482, "loss": 0.5972, "step": 9050 }, { "epoch": 177.65, "learning_rate": 0.0001460927068849515, "loss": 0.5979, "step": 9060 }, { "epoch": 177.84, "learning_rate": 0.00014607978648106327, "loss": 0.5945, "step": 9070 }, { "epoch": 178.0, "eval_loss": 0.5966967344284058, "eval_runtime": 2.2013, "eval_samples_per_second": 1035.296, "eval_steps_per_second": 4.088, "step": 9078 }, { "epoch": 178.04, "learning_rate": 0.0001460668453234626, "loss": 0.5976, "step": 9080 }, { "epoch": 178.24, "learning_rate": 0.00014605388341592805, "loss": 0.597, "step": 9090 }, { "epoch": 178.43, "learning_rate": 0.00014604090076224423, "loss": 0.5998, "step": 9100 }, { "epoch": 178.63, "learning_rate": 0.0001460278973662017, "loss": 0.5972, "step": 9110 }, { "epoch": 178.82, "learning_rate": 0.00014601487323159728, "loss": 0.6011, "step": 9120 }, { "epoch": 179.0, "eval_loss": 0.5920745730400085, "eval_runtime": 2.1356, "eval_samples_per_second": 1067.143, "eval_steps_per_second": 4.214, "step": 9129 }, { "epoch": 179.02, "learning_rate": 0.0001460018283622336, "loss": 0.6004, "step": 9130 }, { "epoch": 179.22, "learning_rate": 0.00014598876276191957, "loss": 0.597, "step": 9140 }, { "epoch": 179.41, "learning_rate": 0.00014597567643447, "loss": 0.5967, "step": 9150 }, { "epoch": 179.61, "learning_rate": 0.00014596256938370584, "loss": 0.5986, "step": 9160 }, { "epoch": 179.8, "learning_rate": 0.00014594944161345404, "loss": 0.5954, "step": 9170 }, { "epoch": 180.0, "learning_rate": 0.00014593629312754756, "loss": 0.5929, "step": 9180 }, { "epoch": 180.0, "eval_loss": 0.5991100668907166, "eval_runtime": 2.2402, "eval_samples_per_second": 1017.3, "eval_steps_per_second": 4.017, "step": 9180 }, { "epoch": 180.2, "learning_rate": 0.00014592312392982558, "loss": 0.5966, "step": 9190 }, { "epoch": 180.39, "learning_rate": 0.00014590993402413313, "loss": 0.5993, "step": 9200 }, { "epoch": 180.59, "learning_rate": 0.0001458967234143214, "loss": 0.6039, "step": 9210 }, { "epoch": 180.78, "learning_rate": 0.00014588349210424757, "loss": 0.5962, "step": 9220 }, { "epoch": 180.98, "learning_rate": 0.00014587024009777492, "loss": 0.5981, "step": 9230 }, { "epoch": 181.0, "eval_loss": 0.5953816175460815, "eval_runtime": 2.1845, "eval_samples_per_second": 1043.242, "eval_steps_per_second": 4.12, "step": 9231 }, { "epoch": 181.18, "learning_rate": 0.0001458569673987727, "loss": 0.5913, "step": 9240 }, { "epoch": 181.37, "learning_rate": 0.00014584367401111628, "loss": 0.5993, "step": 9250 }, { "epoch": 181.57, "learning_rate": 0.00014583035993868701, "loss": 0.5983, "step": 9260 }, { "epoch": 181.76, "learning_rate": 0.0001458170251853723, "loss": 0.5953, "step": 9270 }, { "epoch": 181.96, "learning_rate": 0.00014580366975506563, "loss": 0.6011, "step": 9280 }, { "epoch": 182.0, "eval_loss": 0.6006762385368347, "eval_runtime": 2.2479, "eval_samples_per_second": 1013.832, "eval_steps_per_second": 4.004, "step": 9282 }, { "epoch": 182.16, "learning_rate": 0.00014579029365166646, "loss": 0.6002, "step": 9290 }, { "epoch": 182.35, "learning_rate": 0.0001457768968790803, "loss": 0.596, "step": 9300 }, { "epoch": 182.55, "learning_rate": 0.00014576347944121872, "loss": 0.5964, "step": 9310 }, { "epoch": 182.75, "learning_rate": 0.00014575004134199937, "loss": 0.5929, "step": 9320 }, { "epoch": 182.94, "learning_rate": 0.00014573658258534578, "loss": 0.5977, "step": 9330 }, { "epoch": 183.0, "eval_loss": 0.6013053059577942, "eval_runtime": 2.1277, "eval_samples_per_second": 1071.122, "eval_steps_per_second": 4.23, "step": 9333 }, { "epoch": 183.14, "learning_rate": 0.0001457231031751877, "loss": 0.5967, "step": 9340 }, { "epoch": 183.33, "learning_rate": 0.00014570960311546073, "loss": 0.5993, "step": 9350 }, { "epoch": 183.53, "learning_rate": 0.00014569608241010663, "loss": 0.5945, "step": 9360 }, { "epoch": 183.73, "learning_rate": 0.00014568254106307318, "loss": 0.5928, "step": 9370 }, { "epoch": 183.92, "learning_rate": 0.00014566897907831408, "loss": 0.5947, "step": 9380 }, { "epoch": 184.0, "eval_loss": 0.602336049079895, "eval_runtime": 2.2459, "eval_samples_per_second": 1014.742, "eval_steps_per_second": 4.007, "step": 9384 }, { "epoch": 184.12, "learning_rate": 0.00014565539645978917, "loss": 0.5934, "step": 9390 }, { "epoch": 184.31, "learning_rate": 0.00014564179321146427, "loss": 0.5975, "step": 9400 }, { "epoch": 184.51, "learning_rate": 0.0001456281693373112, "loss": 0.5974, "step": 9410 }, { "epoch": 184.71, "learning_rate": 0.00014561452484130786, "loss": 0.5978, "step": 9420 }, { "epoch": 184.9, "learning_rate": 0.0001456008597274381, "loss": 0.59, "step": 9430 }, { "epoch": 185.0, "eval_loss": 0.5967941284179688, "eval_runtime": 2.1353, "eval_samples_per_second": 1067.295, "eval_steps_per_second": 4.215, "step": 9435 }, { "epoch": 185.1, "learning_rate": 0.00014558717399969188, "loss": 0.5959, "step": 9440 }, { "epoch": 185.29, "learning_rate": 0.00014557346766206508, "loss": 0.5968, "step": 9450 }, { "epoch": 185.49, "learning_rate": 0.00014555974071855967, "loss": 0.5948, "step": 9460 }, { "epoch": 185.69, "learning_rate": 0.0001455459931731836, "loss": 0.5959, "step": 9470 }, { "epoch": 185.88, "learning_rate": 0.00014553222502995087, "loss": 0.5924, "step": 9480 }, { "epoch": 186.0, "eval_loss": 0.5987167954444885, "eval_runtime": 2.2188, "eval_samples_per_second": 1027.115, "eval_steps_per_second": 4.056, "step": 9486 }, { "epoch": 186.08, "learning_rate": 0.00014551843629288143, "loss": 0.5948, "step": 9490 }, { "epoch": 186.27, "learning_rate": 0.00014550462696600133, "loss": 0.5946, "step": 9500 }, { "epoch": 186.47, "learning_rate": 0.00014549079705334253, "loss": 0.5933, "step": 9510 }, { "epoch": 186.67, "learning_rate": 0.0001454769465589431, "loss": 0.5963, "step": 9520 }, { "epoch": 186.86, "learning_rate": 0.00014546307548684708, "loss": 0.5906, "step": 9530 }, { "epoch": 187.0, "eval_loss": 0.5914626121520996, "eval_runtime": 2.1056, "eval_samples_per_second": 1082.37, "eval_steps_per_second": 4.274, "step": 9537 }, { "epoch": 187.06, "learning_rate": 0.0001454491838411045, "loss": 0.5908, "step": 9540 }, { "epoch": 187.25, "learning_rate": 0.00014543527162577137, "loss": 0.5945, "step": 9550 }, { "epoch": 187.45, "learning_rate": 0.00014542133884490983, "loss": 0.5961, "step": 9560 }, { "epoch": 187.65, "learning_rate": 0.00014540738550258787, "loss": 0.5961, "step": 9570 }, { "epoch": 187.84, "learning_rate": 0.00014539341160287958, "loss": 0.5928, "step": 9580 }, { "epoch": 188.0, "eval_loss": 0.5877456665039062, "eval_runtime": 2.2344, "eval_samples_per_second": 1019.947, "eval_steps_per_second": 4.028, "step": 9588 }, { "epoch": 188.04, "learning_rate": 0.00014537941714986503, "loss": 0.5913, "step": 9590 }, { "epoch": 188.24, "learning_rate": 0.0001453654021476303, "loss": 0.5938, "step": 9600 }, { "epoch": 188.43, "learning_rate": 0.00014535136660026742, "loss": 0.5895, "step": 9610 }, { "epoch": 188.63, "learning_rate": 0.00014533731051187448, "loss": 0.5888, "step": 9620 }, { "epoch": 188.82, "learning_rate": 0.00014532323388655557, "loss": 0.5849, "step": 9630 }, { "epoch": 189.0, "eval_loss": 0.5910800695419312, "eval_runtime": 2.1237, "eval_samples_per_second": 1073.117, "eval_steps_per_second": 4.238, "step": 9639 }, { "epoch": 189.02, "learning_rate": 0.0001453091367284207, "loss": 0.5896, "step": 9640 }, { "epoch": 189.22, "learning_rate": 0.00014529501904158597, "loss": 0.5961, "step": 9650 }, { "epoch": 189.41, "learning_rate": 0.00014528088083017339, "loss": 0.5835, "step": 9660 }, { "epoch": 189.61, "learning_rate": 0.00014526672209831104, "loss": 0.5849, "step": 9670 }, { "epoch": 189.8, "learning_rate": 0.00014525254285013294, "loss": 0.5893, "step": 9680 }, { "epoch": 190.0, "learning_rate": 0.0001452383430897791, "loss": 0.5913, "step": 9690 }, { "epoch": 190.0, "eval_loss": 0.5954359769821167, "eval_runtime": 2.2008, "eval_samples_per_second": 1035.542, "eval_steps_per_second": 4.089, "step": 9690 }, { "epoch": 190.2, "learning_rate": 0.00014522412282139555, "loss": 0.5899, "step": 9700 }, { "epoch": 190.39, "learning_rate": 0.00014520988204913426, "loss": 0.5912, "step": 9710 }, { "epoch": 190.59, "learning_rate": 0.0001451956207771533, "loss": 0.5932, "step": 9720 }, { "epoch": 190.78, "learning_rate": 0.00014518133900961653, "loss": 0.5892, "step": 9730 }, { "epoch": 190.98, "learning_rate": 0.00014516703675069401, "loss": 0.5863, "step": 9740 }, { "epoch": 191.0, "eval_loss": 0.5905748009681702, "eval_runtime": 2.1806, "eval_samples_per_second": 1045.123, "eval_steps_per_second": 4.127, "step": 9741 }, { "epoch": 191.18, "learning_rate": 0.00014515271400456162, "loss": 0.5901, "step": 9750 }, { "epoch": 191.37, "learning_rate": 0.0001451383707754013, "loss": 0.5891, "step": 9760 }, { "epoch": 191.57, "learning_rate": 0.00014512400706740095, "loss": 0.5917, "step": 9770 }, { "epoch": 191.76, "learning_rate": 0.00014510962288475445, "loss": 0.5881, "step": 9780 }, { "epoch": 191.96, "learning_rate": 0.00014509521823166164, "loss": 0.588, "step": 9790 }, { "epoch": 192.0, "eval_loss": 0.5942443013191223, "eval_runtime": 2.2206, "eval_samples_per_second": 1026.291, "eval_steps_per_second": 4.053, "step": 9792 }, { "epoch": 192.16, "learning_rate": 0.00014508079311232843, "loss": 0.5859, "step": 9800 }, { "epoch": 192.35, "learning_rate": 0.00014506634753096655, "loss": 0.5939, "step": 9810 }, { "epoch": 192.55, "learning_rate": 0.00014505188149179383, "loss": 0.5893, "step": 9820 }, { "epoch": 192.75, "learning_rate": 0.00014503739499903404, "loss": 0.5914, "step": 9830 }, { "epoch": 192.94, "learning_rate": 0.00014502288805691687, "loss": 0.5906, "step": 9840 }, { "epoch": 193.0, "eval_loss": 0.5924288034439087, "eval_runtime": 2.1443, "eval_samples_per_second": 1062.813, "eval_steps_per_second": 4.197, "step": 9843 }, { "epoch": 193.14, "learning_rate": 0.00014500836066967806, "loss": 0.5851, "step": 9850 }, { "epoch": 193.33, "learning_rate": 0.00014499381284155928, "loss": 0.5909, "step": 9860 }, { "epoch": 193.53, "learning_rate": 0.00014497924457680813, "loss": 0.5898, "step": 9870 }, { "epoch": 193.73, "learning_rate": 0.00014496465587967832, "loss": 0.591, "step": 9880 }, { "epoch": 193.92, "learning_rate": 0.0001449500467544293, "loss": 0.5927, "step": 9890 }, { "epoch": 194.0, "eval_loss": 0.5911122560501099, "eval_runtime": 2.1852, "eval_samples_per_second": 1042.912, "eval_steps_per_second": 4.119, "step": 9894 }, { "epoch": 194.12, "learning_rate": 0.00014493541720532666, "loss": 0.588, "step": 9900 }, { "epoch": 194.31, "learning_rate": 0.00014492076723664194, "loss": 0.5881, "step": 9910 }, { "epoch": 194.51, "learning_rate": 0.00014490609685265256, "loss": 0.5875, "step": 9920 }, { "epoch": 194.71, "learning_rate": 0.00014489140605764196, "loss": 0.5895, "step": 9930 }, { "epoch": 194.9, "learning_rate": 0.00014487669485589952, "loss": 0.5857, "step": 9940 }, { "epoch": 195.0, "eval_loss": 0.5852168202400208, "eval_runtime": 2.3015, "eval_samples_per_second": 990.24, "eval_steps_per_second": 3.911, "step": 9945 }, { "epoch": 195.1, "learning_rate": 0.00014486196325172058, "loss": 0.582, "step": 9950 }, { "epoch": 195.29, "learning_rate": 0.00014484721124940643, "loss": 0.5834, "step": 9960 }, { "epoch": 195.49, "learning_rate": 0.00014483243885326437, "loss": 0.5891, "step": 9970 }, { "epoch": 195.69, "learning_rate": 0.00014481764606760755, "loss": 0.589, "step": 9980 }, { "epoch": 195.88, "learning_rate": 0.00014480283289675517, "loss": 0.5859, "step": 9990 }, { "epoch": 196.0, "eval_loss": 0.5909630060195923, "eval_runtime": 2.2494, "eval_samples_per_second": 1013.138, "eval_steps_per_second": 4.001, "step": 9996 }, { "epoch": 196.08, "learning_rate": 0.00014478799934503233, "loss": 0.5859, "step": 10000 }, { "epoch": 196.27, "learning_rate": 0.00014477314541677016, "loss": 0.5885, "step": 10010 }, { "epoch": 196.47, "learning_rate": 0.00014475827111630555, "loss": 0.5799, "step": 10020 }, { "epoch": 196.67, "learning_rate": 0.00014474337644798159, "loss": 0.583, "step": 10030 }, { "epoch": 196.86, "learning_rate": 0.00014472846141614707, "loss": 0.5775, "step": 10040 }, { "epoch": 197.0, "eval_loss": 0.5853325128555298, "eval_runtime": 2.1195, "eval_samples_per_second": 1075.278, "eval_steps_per_second": 4.246, "step": 10047 }, { "epoch": 197.06, "learning_rate": 0.00014471352602515695, "loss": 0.5791, "step": 10050 }, { "epoch": 197.25, "learning_rate": 0.000144698570279372, "loss": 0.582, "step": 10060 }, { "epoch": 197.45, "learning_rate": 0.00014468359418315894, "loss": 0.5823, "step": 10070 }, { "epoch": 197.65, "learning_rate": 0.0001446685977408905, "loss": 0.5861, "step": 10080 }, { "epoch": 197.84, "learning_rate": 0.00014465358095694527, "loss": 0.586, "step": 10090 }, { "epoch": 198.0, "eval_loss": 0.5876543521881104, "eval_runtime": 2.0896, "eval_samples_per_second": 1090.616, "eval_steps_per_second": 4.307, "step": 10098 }, { "epoch": 198.04, "learning_rate": 0.00014463854383570782, "loss": 0.5862, "step": 10100 }, { "epoch": 198.24, "learning_rate": 0.00014462348638156865, "loss": 0.5902, "step": 10110 }, { "epoch": 198.43, "learning_rate": 0.00014460840859892424, "loss": 0.5845, "step": 10120 }, { "epoch": 198.63, "learning_rate": 0.0001445933104921769, "loss": 0.5818, "step": 10130 }, { "epoch": 198.82, "learning_rate": 0.000144578192065735, "loss": 0.5853, "step": 10140 }, { "epoch": 199.0, "eval_loss": 0.5847514867782593, "eval_runtime": 2.1429, "eval_samples_per_second": 1063.513, "eval_steps_per_second": 4.2, "step": 10149 }, { "epoch": 199.02, "learning_rate": 0.00014456305332401277, "loss": 0.5841, "step": 10150 }, { "epoch": 199.22, "learning_rate": 0.00014454789427143034, "loss": 0.5864, "step": 10160 }, { "epoch": 199.41, "learning_rate": 0.0001445327149124139, "loss": 0.582, "step": 10170 }, { "epoch": 199.61, "learning_rate": 0.00014451751525139538, "loss": 0.5867, "step": 10180 }, { "epoch": 199.8, "learning_rate": 0.00014450229529281285, "loss": 0.5813, "step": 10190 }, { "epoch": 200.0, "learning_rate": 0.0001444870550411101, "loss": 0.5824, "step": 10200 }, { "epoch": 200.0, "eval_loss": 0.5854251384735107, "eval_runtime": 2.1252, "eval_samples_per_second": 1072.384, "eval_steps_per_second": 4.235, "step": 10200 }, { "epoch": 200.2, "learning_rate": 0.00014447179450073703, "loss": 0.5841, "step": 10210 }, { "epoch": 200.39, "learning_rate": 0.00014445651367614933, "loss": 0.5785, "step": 10220 }, { "epoch": 200.59, "learning_rate": 0.00014444121257180866, "loss": 0.5868, "step": 10230 }, { "epoch": 200.78, "learning_rate": 0.0001444258911921826, "loss": 0.5842, "step": 10240 }, { "epoch": 200.98, "learning_rate": 0.0001444105495417447, "loss": 0.5797, "step": 10250 }, { "epoch": 201.0, "eval_loss": 0.5834174752235413, "eval_runtime": 2.1546, "eval_samples_per_second": 1057.713, "eval_steps_per_second": 4.177, "step": 10251 }, { "epoch": 201.18, "learning_rate": 0.00014439518762497428, "loss": 0.5814, "step": 10260 }, { "epoch": 201.37, "learning_rate": 0.00014437980544635675, "loss": 0.576, "step": 10270 }, { "epoch": 201.57, "learning_rate": 0.00014436440301038335, "loss": 0.5806, "step": 10280 }, { "epoch": 201.76, "learning_rate": 0.00014434898032155127, "loss": 0.5845, "step": 10290 }, { "epoch": 201.96, "learning_rate": 0.00014433353738436359, "loss": 0.5857, "step": 10300 }, { "epoch": 202.0, "eval_loss": 0.5791680216789246, "eval_runtime": 2.098, "eval_samples_per_second": 1086.25, "eval_steps_per_second": 4.29, "step": 10302 }, { "epoch": 202.16, "learning_rate": 0.00014431807420332921, "loss": 0.5806, "step": 10310 }, { "epoch": 202.35, "learning_rate": 0.00014430259078296317, "loss": 0.581, "step": 10320 }, { "epoch": 202.55, "learning_rate": 0.00014428708712778618, "loss": 0.579, "step": 10330 }, { "epoch": 202.75, "learning_rate": 0.000144271563242325, "loss": 0.5746, "step": 10340 }, { "epoch": 202.94, "learning_rate": 0.0001442560191311123, "loss": 0.5863, "step": 10350 }, { "epoch": 203.0, "eval_loss": 0.5824128985404968, "eval_runtime": 2.1448, "eval_samples_per_second": 1062.581, "eval_steps_per_second": 4.196, "step": 10353 }, { "epoch": 203.14, "learning_rate": 0.00014424045479868655, "loss": 0.5802, "step": 10360 }, { "epoch": 203.33, "learning_rate": 0.0001442248702495922, "loss": 0.5826, "step": 10370 }, { "epoch": 203.53, "learning_rate": 0.00014420926548837959, "loss": 0.5813, "step": 10380 }, { "epoch": 203.73, "learning_rate": 0.000144193640519605, "loss": 0.5836, "step": 10390 }, { "epoch": 203.92, "learning_rate": 0.00014417799534783055, "loss": 0.5826, "step": 10400 }, { "epoch": 204.0, "eval_loss": 0.5838117003440857, "eval_runtime": 2.1558, "eval_samples_per_second": 1057.159, "eval_steps_per_second": 4.175, "step": 10404 }, { "epoch": 204.12, "learning_rate": 0.00014416232997762428, "loss": 0.5853, "step": 10410 }, { "epoch": 204.31, "learning_rate": 0.00014414664441356008, "loss": 0.5776, "step": 10420 }, { "epoch": 204.51, "learning_rate": 0.00014413093866021788, "loss": 0.581, "step": 10430 }, { "epoch": 204.71, "learning_rate": 0.00014411521272218335, "loss": 0.5787, "step": 10440 }, { "epoch": 204.9, "learning_rate": 0.0001440994666040481, "loss": 0.579, "step": 10450 }, { "epoch": 205.0, "eval_loss": 0.5807640552520752, "eval_runtime": 2.1947, "eval_samples_per_second": 1038.42, "eval_steps_per_second": 4.101, "step": 10455 }, { "epoch": 205.1, "learning_rate": 0.0001440837003104097, "loss": 0.5808, "step": 10460 }, { "epoch": 205.29, "learning_rate": 0.0001440679138458715, "loss": 0.5786, "step": 10470 }, { "epoch": 205.49, "learning_rate": 0.00014405210721504284, "loss": 0.5807, "step": 10480 }, { "epoch": 205.69, "learning_rate": 0.00014403628042253887, "loss": 0.5794, "step": 10490 }, { "epoch": 205.88, "learning_rate": 0.0001440204334729807, "loss": 0.5758, "step": 10500 }, { "epoch": 206.0, "eval_loss": 0.58095782995224, "eval_runtime": 2.09, "eval_samples_per_second": 1090.42, "eval_steps_per_second": 4.306, "step": 10506 }, { "epoch": 206.08, "learning_rate": 0.00014400456637099525, "loss": 0.5798, "step": 10510 }, { "epoch": 206.27, "learning_rate": 0.00014398867912121538, "loss": 0.5726, "step": 10520 }, { "epoch": 206.47, "learning_rate": 0.00014397277172827977, "loss": 0.5783, "step": 10530 }, { "epoch": 206.67, "learning_rate": 0.00014395684419683306, "loss": 0.5765, "step": 10540 }, { "epoch": 206.86, "learning_rate": 0.00014394089653152576, "loss": 0.5798, "step": 10550 }, { "epoch": 207.0, "eval_loss": 0.5782448649406433, "eval_runtime": 2.1817, "eval_samples_per_second": 1044.609, "eval_steps_per_second": 4.125, "step": 10557 }, { "epoch": 207.06, "learning_rate": 0.0001439249287370142, "loss": 0.5779, "step": 10560 }, { "epoch": 207.25, "learning_rate": 0.0001439089408179606, "loss": 0.5839, "step": 10570 }, { "epoch": 207.45, "learning_rate": 0.00014389293277903312, "loss": 0.5778, "step": 10580 }, { "epoch": 207.65, "learning_rate": 0.00014387690462490572, "loss": 0.574, "step": 10590 }, { "epoch": 207.84, "learning_rate": 0.00014386085636025828, "loss": 0.576, "step": 10600 }, { "epoch": 208.0, "eval_loss": 0.5818247199058533, "eval_runtime": 2.2327, "eval_samples_per_second": 1020.751, "eval_steps_per_second": 4.031, "step": 10608 }, { "epoch": 208.04, "learning_rate": 0.00014384478798977655, "loss": 0.5776, "step": 10610 }, { "epoch": 208.24, "learning_rate": 0.00014382869951815207, "loss": 0.5749, "step": 10620 }, { "epoch": 208.43, "learning_rate": 0.00014381259095008238, "loss": 0.581, "step": 10630 }, { "epoch": 208.63, "learning_rate": 0.0001437964622902708, "loss": 0.5839, "step": 10640 }, { "epoch": 208.82, "learning_rate": 0.00014378031354342656, "loss": 0.5717, "step": 10650 }, { "epoch": 209.0, "eval_loss": 0.5826478600502014, "eval_runtime": 2.1857, "eval_samples_per_second": 1042.691, "eval_steps_per_second": 4.118, "step": 10659 }, { "epoch": 209.02, "learning_rate": 0.00014376414471426472, "loss": 0.5762, "step": 10660 }, { "epoch": 209.22, "learning_rate": 0.00014374795580750617, "loss": 0.5745, "step": 10670 }, { "epoch": 209.41, "learning_rate": 0.0001437317468278778, "loss": 0.5766, "step": 10680 }, { "epoch": 209.61, "learning_rate": 0.00014371551778011218, "loss": 0.5757, "step": 10690 }, { "epoch": 209.8, "learning_rate": 0.0001436992686689479, "loss": 0.5771, "step": 10700 }, { "epoch": 210.0, "learning_rate": 0.00014368299949912928, "loss": 0.5774, "step": 10710 }, { "epoch": 210.0, "eval_loss": 0.5800108313560486, "eval_runtime": 2.1291, "eval_samples_per_second": 1070.401, "eval_steps_per_second": 4.227, "step": 10710 }, { "epoch": 210.2, "learning_rate": 0.0001436667102754066, "loss": 0.5743, "step": 10720 }, { "epoch": 210.39, "learning_rate": 0.00014365040100253597, "loss": 0.5747, "step": 10730 }, { "epoch": 210.59, "learning_rate": 0.00014363407168527928, "loss": 0.5781, "step": 10740 }, { "epoch": 210.78, "learning_rate": 0.00014361772232840433, "loss": 0.5735, "step": 10750 }, { "epoch": 210.98, "learning_rate": 0.0001436013529366848, "loss": 0.5724, "step": 10760 }, { "epoch": 211.0, "eval_loss": 0.5812757611274719, "eval_runtime": 2.1877, "eval_samples_per_second": 1041.728, "eval_steps_per_second": 4.114, "step": 10761 }, { "epoch": 211.18, "learning_rate": 0.00014358496351490015, "loss": 0.5782, "step": 10770 }, { "epoch": 211.37, "learning_rate": 0.00014356855406783578, "loss": 0.5756, "step": 10780 }, { "epoch": 211.57, "learning_rate": 0.00014355212460028283, "loss": 0.5755, "step": 10790 }, { "epoch": 211.76, "learning_rate": 0.00014353567511703836, "loss": 0.5736, "step": 10800 }, { "epoch": 211.96, "learning_rate": 0.00014351920562290525, "loss": 0.5706, "step": 10810 }, { "epoch": 212.0, "eval_loss": 0.575522243976593, "eval_runtime": 2.1831, "eval_samples_per_second": 1043.938, "eval_steps_per_second": 4.123, "step": 10812 }, { "epoch": 212.16, "learning_rate": 0.00014350271612269223, "loss": 0.5736, "step": 10820 }, { "epoch": 212.35, "learning_rate": 0.00014348620662121386, "loss": 0.5775, "step": 10830 }, { "epoch": 212.55, "learning_rate": 0.00014346967712329053, "loss": 0.573, "step": 10840 }, { "epoch": 212.75, "learning_rate": 0.00014345312763374852, "loss": 0.5774, "step": 10850 }, { "epoch": 212.94, "learning_rate": 0.00014343655815741987, "loss": 0.5737, "step": 10860 }, { "epoch": 213.0, "eval_loss": 0.5787567496299744, "eval_runtime": 2.1398, "eval_samples_per_second": 1065.048, "eval_steps_per_second": 4.206, "step": 10863 }, { "epoch": 213.14, "learning_rate": 0.00014341996869914254, "loss": 0.5767, "step": 10870 }, { "epoch": 213.33, "learning_rate": 0.00014340335926376027, "loss": 0.5779, "step": 10880 }, { "epoch": 213.53, "learning_rate": 0.00014338672985612263, "loss": 0.5761, "step": 10890 }, { "epoch": 213.73, "learning_rate": 0.00014337008048108504, "loss": 0.5742, "step": 10900 }, { "epoch": 213.92, "learning_rate": 0.00014335341114350876, "loss": 0.5791, "step": 10910 }, { "epoch": 214.0, "eval_loss": 0.576858639717102, "eval_runtime": 2.1295, "eval_samples_per_second": 1070.195, "eval_steps_per_second": 4.226, "step": 10914 }, { "epoch": 214.12, "learning_rate": 0.00014333672184826086, "loss": 0.5749, "step": 10920 }, { "epoch": 214.31, "learning_rate": 0.00014332001260021422, "loss": 0.574, "step": 10930 }, { "epoch": 214.51, "learning_rate": 0.00014330328340424759, "loss": 0.5756, "step": 10940 }, { "epoch": 214.71, "learning_rate": 0.0001432865342652455, "loss": 0.5759, "step": 10950 }, { "epoch": 214.9, "learning_rate": 0.00014326976518809836, "loss": 0.5712, "step": 10960 }, { "epoch": 215.0, "eval_loss": 0.576650083065033, "eval_runtime": 2.2593, "eval_samples_per_second": 1008.739, "eval_steps_per_second": 3.984, "step": 10965 }, { "epoch": 215.1, "learning_rate": 0.00014325297617770238, "loss": 0.5725, "step": 10970 }, { "epoch": 215.29, "learning_rate": 0.00014323616723895953, "loss": 0.5747, "step": 10980 }, { "epoch": 215.49, "learning_rate": 0.00014321933837677762, "loss": 0.5712, "step": 10990 }, { "epoch": 215.69, "learning_rate": 0.00014320248959607038, "loss": 0.5758, "step": 11000 }, { "epoch": 215.88, "learning_rate": 0.00014318562090175722, "loss": 0.567, "step": 11010 }, { "epoch": 216.0, "eval_loss": 0.5790178179740906, "eval_runtime": 2.1116, "eval_samples_per_second": 1079.269, "eval_steps_per_second": 4.262, "step": 11016 }, { "epoch": 216.08, "learning_rate": 0.00014316873229876345, "loss": 0.5795, "step": 11020 }, { "epoch": 216.27, "learning_rate": 0.00014315182379202017, "loss": 0.5833, "step": 11030 }, { "epoch": 216.47, "learning_rate": 0.00014313489538646425, "loss": 0.579, "step": 11040 }, { "epoch": 216.67, "learning_rate": 0.00014311794708703847, "loss": 0.5771, "step": 11050 }, { "epoch": 216.86, "learning_rate": 0.00014310097889869128, "loss": 0.5671, "step": 11060 }, { "epoch": 217.0, "eval_loss": 0.5734152793884277, "eval_runtime": 2.2121, "eval_samples_per_second": 1030.244, "eval_steps_per_second": 4.069, "step": 11067 }, { "epoch": 217.06, "learning_rate": 0.0001430839908263771, "loss": 0.5713, "step": 11070 }, { "epoch": 217.25, "learning_rate": 0.00014306698287505596, "loss": 0.576, "step": 11080 }, { "epoch": 217.45, "learning_rate": 0.00014304995504969392, "loss": 0.5701, "step": 11090 }, { "epoch": 217.65, "learning_rate": 0.00014303290735526262, "loss": 0.5727, "step": 11100 }, { "epoch": 217.84, "learning_rate": 0.00014301583979673966, "loss": 0.5733, "step": 11110 }, { "epoch": 218.0, "eval_loss": 0.5721628665924072, "eval_runtime": 2.1562, "eval_samples_per_second": 1056.94, "eval_steps_per_second": 4.174, "step": 11118 }, { "epoch": 218.04, "learning_rate": 0.0001429987523791084, "loss": 0.5717, "step": 11120 }, { "epoch": 218.24, "learning_rate": 0.00014298164510735795, "loss": 0.5692, "step": 11130 }, { "epoch": 218.43, "learning_rate": 0.00014296451798648328, "loss": 0.5677, "step": 11140 }, { "epoch": 218.63, "learning_rate": 0.0001429473710214851, "loss": 0.5708, "step": 11150 }, { "epoch": 218.82, "learning_rate": 0.00014293020421736997, "loss": 0.5673, "step": 11160 }, { "epoch": 219.0, "eval_loss": 0.5806319117546082, "eval_runtime": 2.2602, "eval_samples_per_second": 1008.303, "eval_steps_per_second": 3.982, "step": 11169 }, { "epoch": 219.02, "learning_rate": 0.0001429130175791502, "loss": 0.5765, "step": 11170 }, { "epoch": 219.22, "learning_rate": 0.00014289581111184388, "loss": 0.5706, "step": 11180 }, { "epoch": 219.41, "learning_rate": 0.00014287858482047493, "loss": 0.5729, "step": 11190 }, { "epoch": 219.61, "learning_rate": 0.0001428613387100731, "loss": 0.5661, "step": 11200 }, { "epoch": 219.8, "learning_rate": 0.0001428440727856738, "loss": 0.5713, "step": 11210 }, { "epoch": 220.0, "learning_rate": 0.00014282678705231832, "loss": 0.5713, "step": 11220 }, { "epoch": 220.0, "eval_loss": 0.5764245986938477, "eval_runtime": 2.235, "eval_samples_per_second": 1019.686, "eval_steps_per_second": 4.027, "step": 11220 }, { "epoch": 220.2, "learning_rate": 0.00014280948151505367, "loss": 0.5748, "step": 11230 }, { "epoch": 220.39, "learning_rate": 0.00014279215617893275, "loss": 0.5672, "step": 11240 }, { "epoch": 220.59, "learning_rate": 0.00014277481104901413, "loss": 0.5696, "step": 11250 }, { "epoch": 220.78, "learning_rate": 0.00014275744613036223, "loss": 0.5736, "step": 11260 }, { "epoch": 220.98, "learning_rate": 0.00014274006142804714, "loss": 0.5669, "step": 11270 }, { "epoch": 221.0, "eval_loss": 0.5693748593330383, "eval_runtime": 2.174, "eval_samples_per_second": 1048.299, "eval_steps_per_second": 4.14, "step": 11271 }, { "epoch": 221.18, "learning_rate": 0.00014272265694714492, "loss": 0.5725, "step": 11280 }, { "epoch": 221.37, "learning_rate": 0.0001427052326927372, "loss": 0.5697, "step": 11290 }, { "epoch": 221.57, "learning_rate": 0.0001426877886699115, "loss": 0.5718, "step": 11300 }, { "epoch": 221.76, "learning_rate": 0.00014267032488376113, "loss": 0.5724, "step": 11310 }, { "epoch": 221.96, "learning_rate": 0.00014265284133938507, "loss": 0.5669, "step": 11320 }, { "epoch": 222.0, "eval_loss": 0.5748663544654846, "eval_runtime": 2.1119, "eval_samples_per_second": 1079.109, "eval_steps_per_second": 4.262, "step": 11322 }, { "epoch": 222.16, "learning_rate": 0.00014263533804188813, "loss": 0.5645, "step": 11330 }, { "epoch": 222.35, "learning_rate": 0.00014261781499638092, "loss": 0.5696, "step": 11340 }, { "epoch": 222.55, "learning_rate": 0.00014260027220797976, "loss": 0.5726, "step": 11350 }, { "epoch": 222.75, "learning_rate": 0.00014258270968180674, "loss": 0.5702, "step": 11360 }, { "epoch": 222.94, "learning_rate": 0.0001425651274229897, "loss": 0.5665, "step": 11370 }, { "epoch": 223.0, "eval_loss": 0.573235273361206, "eval_runtime": 2.1302, "eval_samples_per_second": 1069.829, "eval_steps_per_second": 4.225, "step": 11373 }, { "epoch": 223.14, "learning_rate": 0.00014254752543666234, "loss": 0.5678, "step": 11380 }, { "epoch": 223.33, "learning_rate": 0.000142529903727964, "loss": 0.5682, "step": 11390 }, { "epoch": 223.53, "learning_rate": 0.00014251226230203984, "loss": 0.5727, "step": 11400 }, { "epoch": 223.73, "learning_rate": 0.00014249460116404073, "loss": 0.5643, "step": 11410 }, { "epoch": 223.92, "learning_rate": 0.0001424769203191234, "loss": 0.5676, "step": 11420 }, { "epoch": 224.0, "eval_loss": 0.5675996541976929, "eval_runtime": 2.2583, "eval_samples_per_second": 1009.18, "eval_steps_per_second": 3.985, "step": 11424 }, { "epoch": 224.12, "learning_rate": 0.00014245921977245018, "loss": 0.5733, "step": 11430 }, { "epoch": 224.31, "learning_rate": 0.00014244149952918927, "loss": 0.5716, "step": 11440 }, { "epoch": 224.51, "learning_rate": 0.00014242375959451462, "loss": 0.5697, "step": 11450 }, { "epoch": 224.71, "learning_rate": 0.00014240599997360583, "loss": 0.5662, "step": 11460 }, { "epoch": 224.9, "learning_rate": 0.00014238822067164837, "loss": 0.5621, "step": 11470 }, { "epoch": 225.0, "eval_loss": 0.5676630735397339, "eval_runtime": 2.1447, "eval_samples_per_second": 1062.606, "eval_steps_per_second": 4.196, "step": 11475 }, { "epoch": 225.1, "learning_rate": 0.00014237042169383337, "loss": 0.5671, "step": 11480 }, { "epoch": 225.29, "learning_rate": 0.00014235260304535776, "loss": 0.5671, "step": 11490 }, { "epoch": 225.49, "learning_rate": 0.00014233476473142414, "loss": 0.5673, "step": 11500 }, { "epoch": 225.69, "learning_rate": 0.00014231690675724096, "loss": 0.5709, "step": 11510 }, { "epoch": 225.88, "learning_rate": 0.0001422990291280223, "loss": 0.5623, "step": 11520 }, { "epoch": 226.0, "eval_loss": 0.5714594125747681, "eval_runtime": 2.2157, "eval_samples_per_second": 1028.587, "eval_steps_per_second": 4.062, "step": 11526 }, { "epoch": 226.08, "learning_rate": 0.00014228113184898804, "loss": 0.5665, "step": 11530 }, { "epoch": 226.27, "learning_rate": 0.0001422632149253638, "loss": 0.5607, "step": 11540 }, { "epoch": 226.47, "learning_rate": 0.00014224527836238093, "loss": 0.57, "step": 11550 }, { "epoch": 226.67, "learning_rate": 0.0001422273221652765, "loss": 0.5676, "step": 11560 }, { "epoch": 226.86, "learning_rate": 0.0001422093463392933, "loss": 0.5695, "step": 11570 }, { "epoch": 227.0, "eval_loss": 0.5675697326660156, "eval_runtime": 2.2269, "eval_samples_per_second": 1023.399, "eval_steps_per_second": 4.042, "step": 11577 }, { "epoch": 227.06, "learning_rate": 0.00014219135088967987, "loss": 0.5649, "step": 11580 }, { "epoch": 227.25, "learning_rate": 0.00014217333582169052, "loss": 0.5683, "step": 11590 }, { "epoch": 227.45, "learning_rate": 0.00014215530114058522, "loss": 0.5651, "step": 11600 }, { "epoch": 227.65, "learning_rate": 0.00014213724685162968, "loss": 0.5641, "step": 11610 }, { "epoch": 227.84, "learning_rate": 0.00014211917296009534, "loss": 0.5657, "step": 11620 }, { "epoch": 228.0, "eval_loss": 0.5667091608047485, "eval_runtime": 2.1477, "eval_samples_per_second": 1061.131, "eval_steps_per_second": 4.191, "step": 11628 }, { "epoch": 228.04, "learning_rate": 0.00014210107947125943, "loss": 0.5616, "step": 11630 }, { "epoch": 228.24, "learning_rate": 0.00014208296639040482, "loss": 0.5638, "step": 11640 }, { "epoch": 228.43, "learning_rate": 0.0001420648337228201, "loss": 0.5682, "step": 11650 }, { "epoch": 228.63, "learning_rate": 0.00014204668147379962, "loss": 0.5694, "step": 11660 }, { "epoch": 228.82, "learning_rate": 0.00014202850964864348, "loss": 0.565, "step": 11670 }, { "epoch": 229.0, "eval_loss": 0.5644382238388062, "eval_runtime": 2.1257, "eval_samples_per_second": 1072.111, "eval_steps_per_second": 4.234, "step": 11679 }, { "epoch": 229.02, "learning_rate": 0.00014201031825265736, "loss": 0.5655, "step": 11680 }, { "epoch": 229.22, "learning_rate": 0.0001419921072911528, "loss": 0.5682, "step": 11690 }, { "epoch": 229.41, "learning_rate": 0.00014197387676944697, "loss": 0.5641, "step": 11700 }, { "epoch": 229.61, "learning_rate": 0.00014195562669286278, "loss": 0.5655, "step": 11710 }, { "epoch": 229.8, "learning_rate": 0.00014193735706672888, "loss": 0.5598, "step": 11720 }, { "epoch": 230.0, "learning_rate": 0.00014191906789637955, "loss": 0.5617, "step": 11730 }, { "epoch": 230.0, "eval_loss": 0.5650487542152405, "eval_runtime": 2.122, "eval_samples_per_second": 1074.007, "eval_steps_per_second": 4.241, "step": 11730 }, { "epoch": 230.2, "learning_rate": 0.00014190075918715483, "loss": 0.5645, "step": 11740 }, { "epoch": 230.39, "learning_rate": 0.00014188243094440047, "loss": 0.5655, "step": 11750 }, { "epoch": 230.59, "learning_rate": 0.00014186408317346788, "loss": 0.563, "step": 11760 }, { "epoch": 230.78, "learning_rate": 0.00014184571587971424, "loss": 0.564, "step": 11770 }, { "epoch": 230.98, "learning_rate": 0.00014182732906850234, "loss": 0.5587, "step": 11780 }, { "epoch": 231.0, "eval_loss": 0.5637187957763672, "eval_runtime": 2.1964, "eval_samples_per_second": 1037.624, "eval_steps_per_second": 4.098, "step": 11781 }, { "epoch": 231.18, "learning_rate": 0.00014180892274520075, "loss": 0.5592, "step": 11790 }, { "epoch": 231.37, "learning_rate": 0.0001417904969151837, "loss": 0.563, "step": 11800 }, { "epoch": 231.57, "learning_rate": 0.00014177205158383114, "loss": 0.5652, "step": 11810 }, { "epoch": 231.76, "learning_rate": 0.00014175358675652867, "loss": 0.5653, "step": 11820 }, { "epoch": 231.96, "learning_rate": 0.00014173510243866764, "loss": 0.5591, "step": 11830 }, { "epoch": 232.0, "eval_loss": 0.5652225017547607, "eval_runtime": 2.1694, "eval_samples_per_second": 1050.52, "eval_steps_per_second": 4.149, "step": 11832 }, { "epoch": 232.16, "learning_rate": 0.000141716598635645, "loss": 0.5669, "step": 11840 }, { "epoch": 232.35, "learning_rate": 0.0001416980753528635, "loss": 0.5572, "step": 11850 }, { "epoch": 232.55, "learning_rate": 0.0001416795325957315, "loss": 0.563, "step": 11860 }, { "epoch": 232.75, "learning_rate": 0.0001416609703696631, "loss": 0.5621, "step": 11870 }, { "epoch": 232.94, "learning_rate": 0.00014164238868007801, "loss": 0.5607, "step": 11880 }, { "epoch": 233.0, "eval_loss": 0.5647706985473633, "eval_runtime": 2.2045, "eval_samples_per_second": 1033.785, "eval_steps_per_second": 4.083, "step": 11883 }, { "epoch": 233.14, "learning_rate": 0.00014162378753240171, "loss": 0.5612, "step": 11890 }, { "epoch": 233.33, "learning_rate": 0.0001416051669320653, "loss": 0.5632, "step": 11900 }, { "epoch": 233.53, "learning_rate": 0.00014158652688450558, "loss": 0.568, "step": 11910 }, { "epoch": 233.73, "learning_rate": 0.00014156786739516505, "loss": 0.5595, "step": 11920 }, { "epoch": 233.92, "learning_rate": 0.00014154918846949184, "loss": 0.559, "step": 11930 }, { "epoch": 234.0, "eval_loss": 0.5681033730506897, "eval_runtime": 2.1383, "eval_samples_per_second": 1065.785, "eval_steps_per_second": 4.209, "step": 11934 }, { "epoch": 234.12, "learning_rate": 0.0001415304901129398, "loss": 0.5634, "step": 11940 }, { "epoch": 234.31, "learning_rate": 0.0001415117723309684, "loss": 0.5601, "step": 11950 }, { "epoch": 234.51, "learning_rate": 0.00014149303512904284, "loss": 0.5612, "step": 11960 }, { "epoch": 234.71, "learning_rate": 0.00014147427851263398, "loss": 0.5619, "step": 11970 }, { "epoch": 234.9, "learning_rate": 0.00014145550248721828, "loss": 0.5601, "step": 11980 }, { "epoch": 235.0, "eval_loss": 0.5636653304100037, "eval_runtime": 2.2295, "eval_samples_per_second": 1022.208, "eval_steps_per_second": 4.037, "step": 11985 }, { "epoch": 235.1, "learning_rate": 0.00014143670705827797, "loss": 0.5592, "step": 11990 }, { "epoch": 235.29, "learning_rate": 0.00014141789223130088, "loss": 0.5628, "step": 12000 }, { "epoch": 235.49, "learning_rate": 0.00014139905801178055, "loss": 0.5556, "step": 12010 }, { "epoch": 235.69, "learning_rate": 0.0001413802044052161, "loss": 0.5593, "step": 12020 }, { "epoch": 235.88, "learning_rate": 0.00014136133141711237, "loss": 0.5605, "step": 12030 }, { "epoch": 236.0, "eval_loss": 0.5697084069252014, "eval_runtime": 2.1042, "eval_samples_per_second": 1083.049, "eval_steps_per_second": 4.277, "step": 12036 }, { "epoch": 236.08, "learning_rate": 0.0001413424390529799, "loss": 0.5606, "step": 12040 }, { "epoch": 236.27, "learning_rate": 0.00014132352731833478, "loss": 0.5586, "step": 12050 }, { "epoch": 236.47, "learning_rate": 0.00014130459621869884, "loss": 0.5618, "step": 12060 }, { "epoch": 236.67, "learning_rate": 0.00014128564575959957, "loss": 0.5602, "step": 12070 }, { "epoch": 236.86, "learning_rate": 0.00014126667594657, "loss": 0.5555, "step": 12080 }, { "epoch": 237.0, "eval_loss": 0.5593078136444092, "eval_runtime": 2.1583, "eval_samples_per_second": 1055.931, "eval_steps_per_second": 4.17, "step": 12087 }, { "epoch": 237.06, "learning_rate": 0.000141247686785149, "loss": 0.5566, "step": 12090 }, { "epoch": 237.25, "learning_rate": 0.0001412286782808809, "loss": 0.5645, "step": 12100 }, { "epoch": 237.45, "learning_rate": 0.0001412096504393158, "loss": 0.5599, "step": 12110 }, { "epoch": 237.65, "learning_rate": 0.00014119060326600938, "loss": 0.5636, "step": 12120 }, { "epoch": 237.84, "learning_rate": 0.000141171536766523, "loss": 0.5602, "step": 12130 }, { "epoch": 238.0, "eval_loss": 0.5682786107063293, "eval_runtime": 2.2249, "eval_samples_per_second": 1024.331, "eval_steps_per_second": 4.045, "step": 12138 }, { "epoch": 238.04, "learning_rate": 0.00014115245094642364, "loss": 0.5597, "step": 12140 }, { "epoch": 238.24, "learning_rate": 0.00014113334581128395, "loss": 0.5586, "step": 12150 }, { "epoch": 238.43, "learning_rate": 0.00014111422136668222, "loss": 0.559, "step": 12160 }, { "epoch": 238.63, "learning_rate": 0.00014109507761820233, "loss": 0.5573, "step": 12170 }, { "epoch": 238.82, "learning_rate": 0.00014107591457143383, "loss": 0.5647, "step": 12180 }, { "epoch": 239.0, "eval_loss": 0.562912106513977, "eval_runtime": 2.2937, "eval_samples_per_second": 993.61, "eval_steps_per_second": 3.924, "step": 12189 }, { "epoch": 239.02, "learning_rate": 0.00014105673223197191, "loss": 0.5602, "step": 12190 }, { "epoch": 239.22, "learning_rate": 0.0001410375306054174, "loss": 0.5584, "step": 12200 }, { "epoch": 239.41, "learning_rate": 0.00014101830969737674, "loss": 0.5558, "step": 12210 }, { "epoch": 239.61, "learning_rate": 0.00014099906951346196, "loss": 0.5594, "step": 12220 }, { "epoch": 239.8, "learning_rate": 0.00014097981005929087, "loss": 0.5571, "step": 12230 }, { "epoch": 240.0, "learning_rate": 0.00014096053134048667, "loss": 0.5575, "step": 12240 }, { "epoch": 240.0, "eval_loss": 0.5610710978507996, "eval_runtime": 2.1604, "eval_samples_per_second": 1054.903, "eval_steps_per_second": 4.166, "step": 12240 }, { "epoch": 240.2, "learning_rate": 0.00014094123336267842, "loss": 0.5567, "step": 12250 }, { "epoch": 240.39, "learning_rate": 0.00014092191613150062, "loss": 0.5612, "step": 12260 }, { "epoch": 240.59, "learning_rate": 0.00014090257965259357, "loss": 0.5601, "step": 12270 }, { "epoch": 240.78, "learning_rate": 0.00014088322393160298, "loss": 0.5577, "step": 12280 }, { "epoch": 240.98, "learning_rate": 0.00014086384897418037, "loss": 0.5577, "step": 12290 }, { "epoch": 241.0, "eval_loss": 0.5588154792785645, "eval_runtime": 2.1478, "eval_samples_per_second": 1061.078, "eval_steps_per_second": 4.19, "step": 12291 }, { "epoch": 241.18, "learning_rate": 0.00014084445478598274, "loss": 0.5576, "step": 12300 }, { "epoch": 241.37, "learning_rate": 0.00014082504137267283, "loss": 0.5548, "step": 12310 }, { "epoch": 241.57, "learning_rate": 0.00014080560873991883, "loss": 0.5584, "step": 12320 }, { "epoch": 241.76, "learning_rate": 0.0001407861568933947, "loss": 0.556, "step": 12330 }, { "epoch": 241.96, "learning_rate": 0.00014076668583877993, "loss": 0.5514, "step": 12340 }, { "epoch": 242.0, "eval_loss": 0.5583884119987488, "eval_runtime": 2.1247, "eval_samples_per_second": 1072.62, "eval_steps_per_second": 4.236, "step": 12342 }, { "epoch": 242.16, "learning_rate": 0.00014074719558175968, "loss": 0.5522, "step": 12350 }, { "epoch": 242.35, "learning_rate": 0.0001407276861280246, "loss": 0.5528, "step": 12360 }, { "epoch": 242.55, "learning_rate": 0.000140708157483271, "loss": 0.5548, "step": 12370 }, { "epoch": 242.75, "learning_rate": 0.0001406886096532009, "loss": 0.5559, "step": 12380 }, { "epoch": 242.94, "learning_rate": 0.00014066904264352175, "loss": 0.5581, "step": 12390 }, { "epoch": 243.0, "eval_loss": 0.5565963983535767, "eval_runtime": 2.1477, "eval_samples_per_second": 1061.154, "eval_steps_per_second": 4.191, "step": 12393 }, { "epoch": 243.14, "learning_rate": 0.0001406494564599467, "loss": 0.5556, "step": 12400 }, { "epoch": 243.33, "learning_rate": 0.00014062985110819453, "loss": 0.5603, "step": 12410 }, { "epoch": 243.53, "learning_rate": 0.0001406102265939895, "loss": 0.5578, "step": 12420 }, { "epoch": 243.73, "learning_rate": 0.00014059058292306155, "loss": 0.5555, "step": 12430 }, { "epoch": 243.92, "learning_rate": 0.0001405709201011462, "loss": 0.555, "step": 12440 }, { "epoch": 244.0, "eval_loss": 0.5562523007392883, "eval_runtime": 2.1073, "eval_samples_per_second": 1081.465, "eval_steps_per_second": 4.271, "step": 12444 }, { "epoch": 244.12, "learning_rate": 0.00014055123813398455, "loss": 0.5546, "step": 12450 }, { "epoch": 244.31, "learning_rate": 0.00014053153702732333, "loss": 0.5548, "step": 12460 }, { "epoch": 244.51, "learning_rate": 0.00014051181678691475, "loss": 0.5543, "step": 12470 }, { "epoch": 244.71, "learning_rate": 0.00014049207741851676, "loss": 0.5547, "step": 12480 }, { "epoch": 244.9, "learning_rate": 0.00014047231892789274, "loss": 0.5571, "step": 12490 }, { "epoch": 245.0, "eval_loss": 0.5540693402290344, "eval_runtime": 2.2162, "eval_samples_per_second": 1028.327, "eval_steps_per_second": 4.061, "step": 12495 }, { "epoch": 245.1, "learning_rate": 0.0001404525413208118, "loss": 0.5528, "step": 12500 }, { "epoch": 245.29, "learning_rate": 0.0001404327446030485, "loss": 0.5579, "step": 12510 }, { "epoch": 245.49, "learning_rate": 0.00014041292878038308, "loss": 0.557, "step": 12520 }, { "epoch": 245.69, "learning_rate": 0.00014039309385860133, "loss": 0.5521, "step": 12530 }, { "epoch": 245.88, "learning_rate": 0.00014037323984349454, "loss": 0.5549, "step": 12540 }, { "epoch": 246.0, "eval_loss": 0.5541282296180725, "eval_runtime": 2.1726, "eval_samples_per_second": 1048.955, "eval_steps_per_second": 4.142, "step": 12546 }, { "epoch": 246.08, "learning_rate": 0.00014035336674085973, "loss": 0.5601, "step": 12550 }, { "epoch": 246.27, "learning_rate": 0.0001403334745564993, "loss": 0.5522, "step": 12560 }, { "epoch": 246.47, "learning_rate": 0.00014031356329622142, "loss": 0.5521, "step": 12570 }, { "epoch": 246.67, "learning_rate": 0.00014029363296583967, "loss": 0.5543, "step": 12580 }, { "epoch": 246.86, "learning_rate": 0.00014027368357117327, "loss": 0.5521, "step": 12590 }, { "epoch": 247.0, "eval_loss": 0.5520634651184082, "eval_runtime": 2.1385, "eval_samples_per_second": 1065.694, "eval_steps_per_second": 4.209, "step": 12597 }, { "epoch": 247.06, "learning_rate": 0.00014025371511804704, "loss": 0.5545, "step": 12600 }, { "epoch": 247.25, "learning_rate": 0.0001402337276122913, "loss": 0.5564, "step": 12610 }, { "epoch": 247.45, "learning_rate": 0.00014021372105974192, "loss": 0.5577, "step": 12620 }, { "epoch": 247.65, "learning_rate": 0.00014019369546624041, "loss": 0.5555, "step": 12630 }, { "epoch": 247.84, "learning_rate": 0.0001401736508376338, "loss": 0.55, "step": 12640 }, { "epoch": 248.0, "eval_loss": 0.5567444562911987, "eval_runtime": 2.1464, "eval_samples_per_second": 1061.759, "eval_steps_per_second": 4.193, "step": 12648 }, { "epoch": 248.04, "learning_rate": 0.00014015358717977462, "loss": 0.5494, "step": 12650 }, { "epoch": 248.24, "learning_rate": 0.00014013350449852108, "loss": 0.5543, "step": 12660 }, { "epoch": 248.43, "learning_rate": 0.00014011340279973685, "loss": 0.556, "step": 12670 }, { "epoch": 248.63, "learning_rate": 0.00014009328208929115, "loss": 0.5497, "step": 12680 }, { "epoch": 248.82, "learning_rate": 0.00014007314237305882, "loss": 0.5518, "step": 12690 }, { "epoch": 249.0, "eval_loss": 0.5559237599372864, "eval_runtime": 2.164, "eval_samples_per_second": 1053.148, "eval_steps_per_second": 4.159, "step": 12699 }, { "epoch": 249.02, "learning_rate": 0.00014005298365692018, "loss": 0.5504, "step": 12700 }, { "epoch": 249.22, "learning_rate": 0.00014003280594676113, "loss": 0.5543, "step": 12710 }, { "epoch": 249.41, "learning_rate": 0.00014001260924847314, "loss": 0.5519, "step": 12720 }, { "epoch": 249.61, "learning_rate": 0.00013999239356795315, "loss": 0.55, "step": 12730 }, { "epoch": 249.8, "learning_rate": 0.0001399721589111037, "loss": 0.5529, "step": 12740 }, { "epoch": 250.0, "learning_rate": 0.0001399519052838329, "loss": 0.5522, "step": 12750 }, { "epoch": 250.0, "eval_loss": 0.5535538792610168, "eval_runtime": 2.0888, "eval_samples_per_second": 1091.067, "eval_steps_per_second": 4.309, "step": 12750 }, { "epoch": 250.2, "learning_rate": 0.00013993163269205428, "loss": 0.555, "step": 12760 }, { "epoch": 250.39, "learning_rate": 0.00013991134114168708, "loss": 0.5497, "step": 12770 }, { "epoch": 250.59, "learning_rate": 0.0001398910306386559, "loss": 0.5566, "step": 12780 }, { "epoch": 250.78, "learning_rate": 0.000139870701188891, "loss": 0.5536, "step": 12790 }, { "epoch": 250.98, "learning_rate": 0.00013985035279832808, "loss": 0.5481, "step": 12800 }, { "epoch": 251.0, "eval_loss": 0.5503749251365662, "eval_runtime": 2.191, "eval_samples_per_second": 1040.186, "eval_steps_per_second": 4.108, "step": 12801 }, { "epoch": 251.18, "learning_rate": 0.00013982998547290847, "loss": 0.5522, "step": 12810 }, { "epoch": 251.37, "learning_rate": 0.00013980959921857893, "loss": 0.5494, "step": 12820 }, { "epoch": 251.57, "learning_rate": 0.00013978919404129185, "loss": 0.5526, "step": 12830 }, { "epoch": 251.76, "learning_rate": 0.00013976876994700502, "loss": 0.5524, "step": 12840 }, { "epoch": 251.96, "learning_rate": 0.00013974832694168188, "loss": 0.5516, "step": 12850 }, { "epoch": 252.0, "eval_loss": 0.5562964081764221, "eval_runtime": 2.1875, "eval_samples_per_second": 1041.847, "eval_steps_per_second": 4.114, "step": 12852 }, { "epoch": 252.16, "learning_rate": 0.00013972786503129125, "loss": 0.5528, "step": 12860 }, { "epoch": 252.35, "learning_rate": 0.00013970738422180765, "loss": 0.5506, "step": 12870 }, { "epoch": 252.55, "learning_rate": 0.00013968688451921094, "loss": 0.5546, "step": 12880 }, { "epoch": 252.75, "learning_rate": 0.00013966636592948662, "loss": 0.5459, "step": 12890 }, { "epoch": 252.94, "learning_rate": 0.00013964582845862566, "loss": 0.5524, "step": 12900 }, { "epoch": 253.0, "eval_loss": 0.5502599477767944, "eval_runtime": 2.2087, "eval_samples_per_second": 1031.838, "eval_steps_per_second": 4.075, "step": 12903 }, { "epoch": 253.14, "learning_rate": 0.0001396252721126245, "loss": 0.5477, "step": 12910 }, { "epoch": 253.33, "learning_rate": 0.0001396046968974852, "loss": 0.5459, "step": 12920 }, { "epoch": 253.53, "learning_rate": 0.00013958410281921522, "loss": 0.5495, "step": 12930 }, { "epoch": 253.73, "learning_rate": 0.00013956348988382756, "loss": 0.55, "step": 12940 }, { "epoch": 253.92, "learning_rate": 0.00013954285809734078, "loss": 0.5582, "step": 12950 }, { "epoch": 254.0, "eval_loss": 0.5519425272941589, "eval_runtime": 2.2043, "eval_samples_per_second": 1033.891, "eval_steps_per_second": 4.083, "step": 12954 }, { "epoch": 254.12, "learning_rate": 0.00013952220746577887, "loss": 0.5544, "step": 12960 }, { "epoch": 254.31, "learning_rate": 0.0001395015379951714, "loss": 0.5509, "step": 12970 }, { "epoch": 254.51, "learning_rate": 0.00013948084969155332, "loss": 0.554, "step": 12980 }, { "epoch": 254.71, "learning_rate": 0.00013946014256096523, "loss": 0.5538, "step": 12990 }, { "epoch": 254.9, "learning_rate": 0.0001394394166094531, "loss": 0.5514, "step": 13000 }, { "epoch": 255.0, "eval_loss": 0.5504211187362671, "eval_runtime": 2.1612, "eval_samples_per_second": 1054.513, "eval_steps_per_second": 4.164, "step": 13005 }, { "epoch": 255.1, "learning_rate": 0.00013941867184306841, "loss": 0.5475, "step": 13010 }, { "epoch": 255.29, "learning_rate": 0.00013939790826786826, "loss": 0.5481, "step": 13020 }, { "epoch": 255.49, "learning_rate": 0.00013937712588991513, "loss": 0.5661, "step": 13030 }, { "epoch": 255.69, "learning_rate": 0.00013935632471527692, "loss": 0.5604, "step": 13040 }, { "epoch": 255.88, "learning_rate": 0.00013933550475002724, "loss": 0.5498, "step": 13050 }, { "epoch": 256.0, "eval_loss": 0.5519892573356628, "eval_runtime": 2.1905, "eval_samples_per_second": 1040.423, "eval_steps_per_second": 4.109, "step": 13056 }, { "epoch": 256.08, "learning_rate": 0.00013931466600024497, "loss": 0.5487, "step": 13060 }, { "epoch": 256.27, "learning_rate": 0.00013929380847201462, "loss": 0.5514, "step": 13070 }, { "epoch": 256.47, "learning_rate": 0.00013927293217142603, "loss": 0.5507, "step": 13080 }, { "epoch": 256.67, "learning_rate": 0.0001392520371045747, "loss": 0.5525, "step": 13090 }, { "epoch": 256.86, "learning_rate": 0.0001392311232775615, "loss": 0.5481, "step": 13100 }, { "epoch": 257.0, "eval_loss": 0.5540376305580139, "eval_runtime": 2.2252, "eval_samples_per_second": 1024.175, "eval_steps_per_second": 4.045, "step": 13107 }, { "epoch": 257.06, "learning_rate": 0.0001392101906964928, "loss": 0.5545, "step": 13110 }, { "epoch": 257.25, "learning_rate": 0.00013918923936748044, "loss": 0.5499, "step": 13120 }, { "epoch": 257.45, "learning_rate": 0.00013916826929664171, "loss": 0.5492, "step": 13130 }, { "epoch": 257.65, "learning_rate": 0.0001391472804900995, "loss": 0.5522, "step": 13140 }, { "epoch": 257.84, "learning_rate": 0.00013912627295398195, "loss": 0.551, "step": 13150 }, { "epoch": 258.0, "eval_loss": 0.5503237843513489, "eval_runtime": 2.1524, "eval_samples_per_second": 1058.821, "eval_steps_per_second": 4.181, "step": 13158 }, { "epoch": 258.04, "learning_rate": 0.00013910524669442288, "loss": 0.5414, "step": 13160 }, { "epoch": 258.24, "learning_rate": 0.00013908420171756145, "loss": 0.5502, "step": 13170 }, { "epoch": 258.43, "learning_rate": 0.00013906313802954234, "loss": 0.5495, "step": 13180 }, { "epoch": 258.63, "learning_rate": 0.00013904205563651566, "loss": 0.5498, "step": 13190 }, { "epoch": 258.82, "learning_rate": 0.00013902095454463705, "loss": 0.5495, "step": 13200 }, { "epoch": 259.0, "eval_loss": 0.5490508079528809, "eval_runtime": 2.1616, "eval_samples_per_second": 1054.332, "eval_steps_per_second": 4.164, "step": 13209 }, { "epoch": 259.02, "learning_rate": 0.0001389998347600675, "loss": 0.5475, "step": 13210 }, { "epoch": 259.22, "learning_rate": 0.0001389786962889735, "loss": 0.5538, "step": 13220 }, { "epoch": 259.41, "learning_rate": 0.0001389575391375271, "loss": 0.5466, "step": 13230 }, { "epoch": 259.61, "learning_rate": 0.00013893636331190564, "loss": 0.542, "step": 13240 }, { "epoch": 259.8, "learning_rate": 0.00013891516881829198, "loss": 0.5466, "step": 13250 }, { "epoch": 260.0, "learning_rate": 0.0001388939556628745, "loss": 0.5483, "step": 13260 }, { "epoch": 260.0, "eval_loss": 0.5461385846138, "eval_runtime": 2.2603, "eval_samples_per_second": 1008.293, "eval_steps_per_second": 3.982, "step": 13260 }, { "epoch": 260.2, "learning_rate": 0.00013887272385184696, "loss": 0.5471, "step": 13270 }, { "epoch": 260.39, "learning_rate": 0.00013885147339140854, "loss": 0.5463, "step": 13280 }, { "epoch": 260.59, "learning_rate": 0.00013883020428776392, "loss": 0.5462, "step": 13290 }, { "epoch": 260.78, "learning_rate": 0.00013880891654712317, "loss": 0.542, "step": 13300 }, { "epoch": 260.98, "learning_rate": 0.0001387876101757019, "loss": 0.5468, "step": 13310 }, { "epoch": 261.0, "eval_loss": 0.5586115121841431, "eval_runtime": 2.2776, "eval_samples_per_second": 1000.623, "eval_steps_per_second": 3.952, "step": 13311 }, { "epoch": 261.18, "learning_rate": 0.00013876628517972106, "loss": 0.5529, "step": 13320 }, { "epoch": 261.37, "learning_rate": 0.00013874494156540707, "loss": 0.5473, "step": 13330 }, { "epoch": 261.57, "learning_rate": 0.00013872357933899176, "loss": 0.5472, "step": 13340 }, { "epoch": 261.76, "learning_rate": 0.0001387021985067125, "loss": 0.5433, "step": 13350 }, { "epoch": 261.96, "learning_rate": 0.00013868079907481196, "loss": 0.5454, "step": 13360 }, { "epoch": 262.0, "eval_loss": 0.5494788885116577, "eval_runtime": 2.2512, "eval_samples_per_second": 1012.37, "eval_steps_per_second": 3.998, "step": 13362 }, { "epoch": 262.16, "learning_rate": 0.0001386593810495383, "loss": 0.5498, "step": 13370 }, { "epoch": 262.35, "learning_rate": 0.0001386379444371451, "loss": 0.5504, "step": 13380 }, { "epoch": 262.55, "learning_rate": 0.00013861648924389143, "loss": 0.5442, "step": 13390 }, { "epoch": 262.75, "learning_rate": 0.00013859501547604166, "loss": 0.5405, "step": 13400 }, { "epoch": 262.94, "learning_rate": 0.00013857352313986567, "loss": 0.5447, "step": 13410 }, { "epoch": 263.0, "eval_loss": 0.5454888939857483, "eval_runtime": 2.1676, "eval_samples_per_second": 1051.378, "eval_steps_per_second": 4.152, "step": 13413 }, { "epoch": 263.14, "learning_rate": 0.00013855201224163876, "loss": 0.5415, "step": 13420 }, { "epoch": 263.33, "learning_rate": 0.00013853048278764164, "loss": 0.5447, "step": 13430 }, { "epoch": 263.53, "learning_rate": 0.0001385089347841604, "loss": 0.5452, "step": 13440 }, { "epoch": 263.73, "learning_rate": 0.00013848736823748658, "loss": 0.5429, "step": 13450 }, { "epoch": 263.92, "learning_rate": 0.00013846578315391715, "loss": 0.5475, "step": 13460 }, { "epoch": 264.0, "eval_loss": 0.5510943531990051, "eval_runtime": 2.2148, "eval_samples_per_second": 1028.991, "eval_steps_per_second": 4.064, "step": 13464 }, { "epoch": 264.12, "learning_rate": 0.00013844417953975445, "loss": 0.5442, "step": 13470 }, { "epoch": 264.31, "learning_rate": 0.0001384225574013063, "loss": 0.5478, "step": 13480 }, { "epoch": 264.51, "learning_rate": 0.0001384009167448858, "loss": 0.5402, "step": 13490 }, { "epoch": 264.71, "learning_rate": 0.00013837925757681163, "loss": 0.5466, "step": 13500 }, { "epoch": 264.9, "learning_rate": 0.00013835757990340774, "loss": 0.5439, "step": 13510 }, { "epoch": 265.0, "eval_loss": 0.5452569723129272, "eval_runtime": 2.2132, "eval_samples_per_second": 1029.718, "eval_steps_per_second": 4.066, "step": 13515 }, { "epoch": 265.1, "learning_rate": 0.0001383358837310035, "loss": 0.5433, "step": 13520 }, { "epoch": 265.29, "learning_rate": 0.00013831416906593376, "loss": 0.5426, "step": 13530 }, { "epoch": 265.49, "learning_rate": 0.0001382924359145387, "loss": 0.5445, "step": 13540 }, { "epoch": 265.69, "learning_rate": 0.0001382706842831639, "loss": 0.5431, "step": 13550 }, { "epoch": 265.88, "learning_rate": 0.00013824891417816036, "loss": 0.542, "step": 13560 }, { "epoch": 266.0, "eval_loss": 0.5477101802825928, "eval_runtime": 2.1889, "eval_samples_per_second": 1041.148, "eval_steps_per_second": 4.112, "step": 13566 }, { "epoch": 266.08, "learning_rate": 0.0001382271256058845, "loss": 0.5452, "step": 13570 }, { "epoch": 266.27, "learning_rate": 0.000138205318572698, "loss": 0.5451, "step": 13580 }, { "epoch": 266.47, "learning_rate": 0.00013818349308496812, "loss": 0.5472, "step": 13590 }, { "epoch": 266.67, "learning_rate": 0.0001381616491490674, "loss": 0.5468, "step": 13600 }, { "epoch": 266.86, "learning_rate": 0.00013813978677137379, "loss": 0.5437, "step": 13610 }, { "epoch": 267.0, "eval_loss": 0.5501764416694641, "eval_runtime": 2.2879, "eval_samples_per_second": 996.126, "eval_steps_per_second": 3.934, "step": 13617 }, { "epoch": 267.06, "learning_rate": 0.00013811790595827058, "loss": 0.5383, "step": 13620 }, { "epoch": 267.25, "learning_rate": 0.00013809600671614648, "loss": 0.5438, "step": 13630 }, { "epoch": 267.45, "learning_rate": 0.00013807408905139562, "loss": 0.5413, "step": 13640 }, { "epoch": 267.65, "learning_rate": 0.00013805215297041742, "loss": 0.5441, "step": 13650 }, { "epoch": 267.84, "learning_rate": 0.00013803019847961675, "loss": 0.5452, "step": 13660 }, { "epoch": 268.0, "eval_loss": 0.5432447195053101, "eval_runtime": 2.2193, "eval_samples_per_second": 1026.904, "eval_steps_per_second": 4.055, "step": 13668 }, { "epoch": 268.04, "learning_rate": 0.00013800822558540386, "loss": 0.544, "step": 13670 }, { "epoch": 268.24, "learning_rate": 0.0001379862342941943, "loss": 0.5393, "step": 13680 }, { "epoch": 268.43, "learning_rate": 0.00013796422461240907, "loss": 0.5409, "step": 13690 }, { "epoch": 268.63, "learning_rate": 0.0001379421965464745, "loss": 0.5421, "step": 13700 }, { "epoch": 268.82, "learning_rate": 0.00013792015010282227, "loss": 0.5397, "step": 13710 }, { "epoch": 269.0, "eval_loss": 0.5443356037139893, "eval_runtime": 2.1268, "eval_samples_per_second": 1071.567, "eval_steps_per_second": 4.232, "step": 13719 }, { "epoch": 269.02, "learning_rate": 0.00013789808528788945, "loss": 0.5451, "step": 13720 }, { "epoch": 269.22, "learning_rate": 0.00013787600210811852, "loss": 0.5417, "step": 13730 }, { "epoch": 269.41, "learning_rate": 0.0001378539005699572, "loss": 0.5466, "step": 13740 }, { "epoch": 269.61, "learning_rate": 0.00013783178067985875, "loss": 0.5429, "step": 13750 }, { "epoch": 269.8, "learning_rate": 0.0001378096424442816, "loss": 0.5416, "step": 13760 }, { "epoch": 270.0, "learning_rate": 0.00013778748586968962, "loss": 0.5424, "step": 13770 }, { "epoch": 270.0, "eval_loss": 0.5410163998603821, "eval_runtime": 2.2841, "eval_samples_per_second": 997.745, "eval_steps_per_second": 3.94, "step": 13770 }, { "epoch": 270.2, "learning_rate": 0.0001377653109625521, "loss": 0.5391, "step": 13780 }, { "epoch": 270.39, "learning_rate": 0.00013774311772934357, "loss": 0.5427, "step": 13790 }, { "epoch": 270.59, "learning_rate": 0.000137720906176544, "loss": 0.5395, "step": 13800 }, { "epoch": 270.78, "learning_rate": 0.00013769867631063858, "loss": 0.5391, "step": 13810 }, { "epoch": 270.98, "learning_rate": 0.00013767642813811802, "loss": 0.5391, "step": 13820 }, { "epoch": 271.0, "eval_loss": 0.5419728755950928, "eval_runtime": 2.1784, "eval_samples_per_second": 1046.187, "eval_steps_per_second": 4.131, "step": 13821 }, { "epoch": 271.18, "learning_rate": 0.00013765416166547825, "loss": 0.5435, "step": 13830 }, { "epoch": 271.37, "learning_rate": 0.00013763187689922062, "loss": 0.5401, "step": 13840 }, { "epoch": 271.57, "learning_rate": 0.00013760957384585174, "loss": 0.5355, "step": 13850 }, { "epoch": 271.76, "learning_rate": 0.00013758725251188366, "loss": 0.5373, "step": 13860 }, { "epoch": 271.96, "learning_rate": 0.00013756491290383365, "loss": 0.5368, "step": 13870 }, { "epoch": 272.0, "eval_loss": 0.5402165651321411, "eval_runtime": 2.2021, "eval_samples_per_second": 1034.911, "eval_steps_per_second": 4.087, "step": 13872 }, { "epoch": 272.16, "learning_rate": 0.0001375425550282244, "loss": 0.5368, "step": 13880 }, { "epoch": 272.35, "learning_rate": 0.00013752017889158394, "loss": 0.5368, "step": 13890 }, { "epoch": 272.55, "learning_rate": 0.00013749778450044558, "loss": 0.5407, "step": 13900 }, { "epoch": 272.75, "learning_rate": 0.00013747537186134797, "loss": 0.545, "step": 13910 }, { "epoch": 272.94, "learning_rate": 0.0001374529409808351, "loss": 0.5387, "step": 13920 }, { "epoch": 273.0, "eval_loss": 0.5400860905647278, "eval_runtime": 2.1991, "eval_samples_per_second": 1036.352, "eval_steps_per_second": 4.093, "step": 13923 }, { "epoch": 273.14, "learning_rate": 0.00013743049186545631, "loss": 0.5387, "step": 13930 }, { "epoch": 273.33, "learning_rate": 0.00013740802452176626, "loss": 0.5415, "step": 13940 }, { "epoch": 273.53, "learning_rate": 0.00013738553895632484, "loss": 0.5341, "step": 13950 }, { "epoch": 273.73, "learning_rate": 0.0001373630351756974, "loss": 0.5385, "step": 13960 }, { "epoch": 273.92, "learning_rate": 0.00013734051318645452, "loss": 0.5362, "step": 13970 }, { "epoch": 274.0, "eval_loss": 0.5413815975189209, "eval_runtime": 2.1276, "eval_samples_per_second": 1071.167, "eval_steps_per_second": 4.23, "step": 13974 }, { "epoch": 274.12, "learning_rate": 0.0001373179729951721, "loss": 0.5348, "step": 13980 }, { "epoch": 274.31, "learning_rate": 0.0001372954146084314, "loss": 0.5412, "step": 13990 }, { "epoch": 274.51, "learning_rate": 0.00013727283803281894, "loss": 0.5411, "step": 14000 }, { "epoch": 274.71, "learning_rate": 0.00013725024327492663, "loss": 0.537, "step": 14010 }, { "epoch": 274.9, "learning_rate": 0.00013722763034135156, "loss": 0.5374, "step": 14020 }, { "epoch": 275.0, "eval_loss": 0.5417589545249939, "eval_runtime": 2.2039, "eval_samples_per_second": 1034.085, "eval_steps_per_second": 4.084, "step": 14025 }, { "epoch": 275.1, "learning_rate": 0.00013720499923869627, "loss": 0.5404, "step": 14030 }, { "epoch": 275.29, "learning_rate": 0.0001371823499735685, "loss": 0.5364, "step": 14040 }, { "epoch": 275.49, "learning_rate": 0.00013715968255258136, "loss": 0.5415, "step": 14050 }, { "epoch": 275.69, "learning_rate": 0.0001371369969823532, "loss": 0.5358, "step": 14060 }, { "epoch": 275.88, "learning_rate": 0.0001371142932695077, "loss": 0.5375, "step": 14070 }, { "epoch": 276.0, "eval_loss": 0.5415284633636475, "eval_runtime": 2.3244, "eval_samples_per_second": 980.457, "eval_steps_per_second": 3.872, "step": 14076 }, { "epoch": 276.08, "learning_rate": 0.00013709157142067382, "loss": 0.5416, "step": 14080 }, { "epoch": 276.27, "learning_rate": 0.0001370688314424859, "loss": 0.5305, "step": 14090 }, { "epoch": 276.47, "learning_rate": 0.00013704607334158347, "loss": 0.534, "step": 14100 }, { "epoch": 276.67, "learning_rate": 0.00013702329712461135, "loss": 0.5341, "step": 14110 }, { "epoch": 276.86, "learning_rate": 0.00013700050279821975, "loss": 0.5427, "step": 14120 }, { "epoch": 277.0, "eval_loss": 0.5435522794723511, "eval_runtime": 2.2092, "eval_samples_per_second": 1031.595, "eval_steps_per_second": 4.074, "step": 14127 }, { "epoch": 277.06, "learning_rate": 0.0001369776903690641, "loss": 0.5404, "step": 14130 }, { "epoch": 277.25, "learning_rate": 0.00013695485984380505, "loss": 0.5369, "step": 14140 }, { "epoch": 277.45, "learning_rate": 0.0001369320112291087, "loss": 0.5361, "step": 14150 }, { "epoch": 277.65, "learning_rate": 0.00013690914453164625, "loss": 0.5347, "step": 14160 }, { "epoch": 277.84, "learning_rate": 0.0001368862597580943, "loss": 0.5382, "step": 14170 }, { "epoch": 278.0, "eval_loss": 0.5365801453590393, "eval_runtime": 2.2436, "eval_samples_per_second": 1015.78, "eval_steps_per_second": 4.011, "step": 14178 }, { "epoch": 278.04, "learning_rate": 0.00013686335691513474, "loss": 0.5329, "step": 14180 }, { "epoch": 278.24, "learning_rate": 0.0001368404360094546, "loss": 0.5299, "step": 14190 }, { "epoch": 278.43, "learning_rate": 0.00013681749704774637, "loss": 0.5368, "step": 14200 }, { "epoch": 278.63, "learning_rate": 0.00013679454003670766, "loss": 0.5429, "step": 14210 }, { "epoch": 278.82, "learning_rate": 0.00013677156498304136, "loss": 0.5341, "step": 14220 }, { "epoch": 279.0, "eval_loss": 0.5410821437835693, "eval_runtime": 2.2163, "eval_samples_per_second": 1028.301, "eval_steps_per_second": 4.061, "step": 14229 }, { "epoch": 279.02, "learning_rate": 0.00013674857189345578, "loss": 0.5361, "step": 14230 }, { "epoch": 279.22, "learning_rate": 0.0001367255607746643, "loss": 0.5367, "step": 14240 }, { "epoch": 279.41, "learning_rate": 0.00013670253163338572, "loss": 0.5382, "step": 14250 }, { "epoch": 279.61, "learning_rate": 0.00013667948447634398, "loss": 0.5391, "step": 14260 }, { "epoch": 279.8, "learning_rate": 0.00013665641931026837, "loss": 0.5336, "step": 14270 }, { "epoch": 280.0, "learning_rate": 0.00013663333614189336, "loss": 0.5348, "step": 14280 }, { "epoch": 280.0, "eval_loss": 0.5377461314201355, "eval_runtime": 2.1689, "eval_samples_per_second": 1050.764, "eval_steps_per_second": 4.15, "step": 14280 }, { "epoch": 280.2, "learning_rate": 0.00013661023497795878, "loss": 0.5394, "step": 14290 }, { "epoch": 280.39, "learning_rate": 0.00013658711582520964, "loss": 0.5338, "step": 14300 }, { "epoch": 280.59, "learning_rate": 0.0001365639786903962, "loss": 0.5384, "step": 14310 }, { "epoch": 280.78, "learning_rate": 0.00013654082358027398, "loss": 0.5359, "step": 14320 }, { "epoch": 280.98, "learning_rate": 0.00013651765050160376, "loss": 0.5339, "step": 14330 }, { "epoch": 281.0, "eval_loss": 0.5393053293228149, "eval_runtime": 2.1543, "eval_samples_per_second": 1057.88, "eval_steps_per_second": 4.178, "step": 14331 }, { "epoch": 281.18, "learning_rate": 0.0001364944594611516, "loss": 0.5356, "step": 14340 }, { "epoch": 281.37, "learning_rate": 0.0001364712504656887, "loss": 0.5362, "step": 14350 }, { "epoch": 281.57, "learning_rate": 0.00013644802352199165, "loss": 0.5325, "step": 14360 }, { "epoch": 281.76, "learning_rate": 0.0001364247786368421, "loss": 0.5318, "step": 14370 }, { "epoch": 281.96, "learning_rate": 0.00013640151581702716, "loss": 0.5359, "step": 14380 }, { "epoch": 282.0, "eval_loss": 0.5359378457069397, "eval_runtime": 2.2586, "eval_samples_per_second": 1009.032, "eval_steps_per_second": 3.985, "step": 14382 }, { "epoch": 282.16, "learning_rate": 0.00013637823506933893, "loss": 0.5393, "step": 14390 }, { "epoch": 282.35, "learning_rate": 0.00013635493640057496, "loss": 0.5329, "step": 14400 }, { "epoch": 282.55, "learning_rate": 0.0001363316198175379, "loss": 0.5317, "step": 14410 }, { "epoch": 282.75, "learning_rate": 0.00013630828532703568, "loss": 0.5279, "step": 14420 }, { "epoch": 282.94, "learning_rate": 0.00013628493293588143, "loss": 0.536, "step": 14430 }, { "epoch": 283.0, "eval_loss": 0.5368289947509766, "eval_runtime": 2.2197, "eval_samples_per_second": 1026.704, "eval_steps_per_second": 4.055, "step": 14433 }, { "epoch": 283.14, "learning_rate": 0.00013626156265089358, "loss": 0.5332, "step": 14440 }, { "epoch": 283.33, "learning_rate": 0.00013623817447889572, "loss": 0.5327, "step": 14450 }, { "epoch": 283.53, "learning_rate": 0.00013621476842671663, "loss": 0.5371, "step": 14460 }, { "epoch": 283.73, "learning_rate": 0.00013619134450119035, "loss": 0.5358, "step": 14470 }, { "epoch": 283.92, "learning_rate": 0.00013616790270915623, "loss": 0.5362, "step": 14480 }, { "epoch": 284.0, "eval_loss": 0.5383955240249634, "eval_runtime": 2.221, "eval_samples_per_second": 1026.116, "eval_steps_per_second": 4.052, "step": 14484 }, { "epoch": 284.12, "learning_rate": 0.00013614444305745866, "loss": 0.5362, "step": 14490 }, { "epoch": 284.31, "learning_rate": 0.00013612096555294737, "loss": 0.536, "step": 14500 }, { "epoch": 284.51, "learning_rate": 0.00013609747020247728, "loss": 0.5365, "step": 14510 }, { "epoch": 284.71, "learning_rate": 0.00013607395701290852, "loss": 0.5349, "step": 14520 }, { "epoch": 284.9, "learning_rate": 0.00013605042599110635, "loss": 0.532, "step": 14530 }, { "epoch": 285.0, "eval_loss": 0.5345928072929382, "eval_runtime": 2.2377, "eval_samples_per_second": 1018.459, "eval_steps_per_second": 4.022, "step": 14535 }, { "epoch": 285.1, "learning_rate": 0.00013602687714394138, "loss": 0.5355, "step": 14540 }, { "epoch": 285.29, "learning_rate": 0.00013600331047828928, "loss": 0.537, "step": 14550 }, { "epoch": 285.49, "learning_rate": 0.00013597972600103107, "loss": 0.5363, "step": 14560 }, { "epoch": 285.69, "learning_rate": 0.00013595612371905284, "loss": 0.536, "step": 14570 }, { "epoch": 285.88, "learning_rate": 0.0001359325036392459, "loss": 0.5298, "step": 14580 }, { "epoch": 286.0, "eval_loss": 0.5376359820365906, "eval_runtime": 2.2303, "eval_samples_per_second": 1021.823, "eval_steps_per_second": 4.035, "step": 14586 }, { "epoch": 286.08, "learning_rate": 0.00013590886576850684, "loss": 0.5338, "step": 14590 }, { "epoch": 286.27, "learning_rate": 0.0001358852101137374, "loss": 0.5338, "step": 14600 }, { "epoch": 286.47, "learning_rate": 0.00013586153668184445, "loss": 0.5345, "step": 14610 }, { "epoch": 286.67, "learning_rate": 0.00013583784547974015, "loss": 0.5332, "step": 14620 }, { "epoch": 286.86, "learning_rate": 0.0001358141365143418, "loss": 0.5352, "step": 14630 }, { "epoch": 287.0, "eval_loss": 0.5373082160949707, "eval_runtime": 2.3172, "eval_samples_per_second": 983.516, "eval_steps_per_second": 3.884, "step": 14637 }, { "epoch": 287.06, "learning_rate": 0.00013579040979257184, "loss": 0.5325, "step": 14640 }, { "epoch": 287.25, "learning_rate": 0.000135766665321358, "loss": 0.5268, "step": 14650 }, { "epoch": 287.45, "learning_rate": 0.0001357429031076331, "loss": 0.5323, "step": 14660 }, { "epoch": 287.65, "learning_rate": 0.0001357191231583352, "loss": 0.5298, "step": 14670 }, { "epoch": 287.84, "learning_rate": 0.0001356953254804075, "loss": 0.5344, "step": 14680 }, { "epoch": 288.0, "eval_loss": 0.5358995199203491, "eval_runtime": 2.3331, "eval_samples_per_second": 976.806, "eval_steps_per_second": 3.858, "step": 14688 }, { "epoch": 288.04, "learning_rate": 0.0001356715100807984, "loss": 0.5339, "step": 14690 }, { "epoch": 288.24, "learning_rate": 0.00013564767696646148, "loss": 0.5311, "step": 14700 }, { "epoch": 288.43, "learning_rate": 0.00013562382614435543, "loss": 0.5301, "step": 14710 }, { "epoch": 288.63, "learning_rate": 0.00013559995762144422, "loss": 0.5341, "step": 14720 }, { "epoch": 288.82, "learning_rate": 0.00013557607140469687, "loss": 0.5399, "step": 14730 }, { "epoch": 289.0, "eval_loss": 0.5426952838897705, "eval_runtime": 2.2284, "eval_samples_per_second": 1022.697, "eval_steps_per_second": 4.039, "step": 14739 }, { "epoch": 289.02, "learning_rate": 0.00013555216750108767, "loss": 0.5402, "step": 14740 }, { "epoch": 289.22, "learning_rate": 0.00013552824591759596, "loss": 0.5383, "step": 14750 }, { "epoch": 289.41, "learning_rate": 0.00013550430666120638, "loss": 0.5334, "step": 14760 }, { "epoch": 289.61, "learning_rate": 0.00013548034973890865, "loss": 0.5359, "step": 14770 }, { "epoch": 289.8, "learning_rate": 0.0001354563751576976, "loss": 0.5325, "step": 14780 }, { "epoch": 290.0, "learning_rate": 0.00013543238292457334, "loss": 0.5329, "step": 14790 }, { "epoch": 290.0, "eval_loss": 0.5349271297454834, "eval_runtime": 2.1752, "eval_samples_per_second": 1047.702, "eval_steps_per_second": 4.137, "step": 14790 }, { "epoch": 290.2, "learning_rate": 0.00013540837304654103, "loss": 0.5372, "step": 14800 }, { "epoch": 290.39, "learning_rate": 0.00013538434553061104, "loss": 0.5305, "step": 14810 }, { "epoch": 290.59, "learning_rate": 0.00013536030038379884, "loss": 0.5318, "step": 14820 }, { "epoch": 290.78, "learning_rate": 0.00013533623761312512, "loss": 0.5301, "step": 14830 }, { "epoch": 290.98, "learning_rate": 0.00013531215722561562, "loss": 0.531, "step": 14840 }, { "epoch": 291.0, "eval_loss": 0.5320532321929932, "eval_runtime": 2.0985, "eval_samples_per_second": 1086.033, "eval_steps_per_second": 4.289, "step": 14841 }, { "epoch": 291.18, "learning_rate": 0.0001352880592283013, "loss": 0.529, "step": 14850 }, { "epoch": 291.37, "learning_rate": 0.00013526394362821826, "loss": 0.5338, "step": 14860 }, { "epoch": 291.57, "learning_rate": 0.0001352398104324077, "loss": 0.5283, "step": 14870 }, { "epoch": 291.76, "learning_rate": 0.00013521565964791593, "loss": 0.5317, "step": 14880 }, { "epoch": 291.96, "learning_rate": 0.00013519149128179452, "loss": 0.5317, "step": 14890 }, { "epoch": 292.0, "eval_loss": 0.5360802412033081, "eval_runtime": 2.2127, "eval_samples_per_second": 1029.986, "eval_steps_per_second": 4.068, "step": 14892 }, { "epoch": 292.16, "learning_rate": 0.00013516730534110004, "loss": 0.5329, "step": 14900 }, { "epoch": 292.35, "learning_rate": 0.00013514310183289425, "loss": 0.5334, "step": 14910 }, { "epoch": 292.55, "learning_rate": 0.00013511888076424408, "loss": 0.5318, "step": 14920 }, { "epoch": 292.75, "learning_rate": 0.0001350946421422215, "loss": 0.5329, "step": 14930 }, { "epoch": 292.94, "learning_rate": 0.00013507038597390363, "loss": 0.5303, "step": 14940 }, { "epoch": 293.0, "eval_loss": 0.5295526385307312, "eval_runtime": 2.2128, "eval_samples_per_second": 1029.894, "eval_steps_per_second": 4.067, "step": 14943 }, { "epoch": 293.14, "learning_rate": 0.0001350461122663728, "loss": 0.5312, "step": 14950 }, { "epoch": 293.33, "learning_rate": 0.0001350218210267163, "loss": 0.5275, "step": 14960 }, { "epoch": 293.53, "learning_rate": 0.0001349975122620267, "loss": 0.5299, "step": 14970 }, { "epoch": 293.73, "learning_rate": 0.00013497318597940157, "loss": 0.532, "step": 14980 }, { "epoch": 293.92, "learning_rate": 0.00013494884218594367, "loss": 0.5291, "step": 14990 }, { "epoch": 294.0, "eval_loss": 0.5311741828918457, "eval_runtime": 2.198, "eval_samples_per_second": 1036.87, "eval_steps_per_second": 4.095, "step": 14994 }, { "epoch": 294.12, "learning_rate": 0.00013492448088876088, "loss": 0.5217, "step": 15000 }, { "epoch": 294.31, "learning_rate": 0.00013490010209496608, "loss": 0.5287, "step": 15010 }, { "epoch": 294.51, "learning_rate": 0.0001348757058116774, "loss": 0.5309, "step": 15020 }, { "epoch": 294.71, "learning_rate": 0.00013485129204601797, "loss": 0.5265, "step": 15030 }, { "epoch": 294.9, "learning_rate": 0.00013482686080511604, "loss": 0.5335, "step": 15040 }, { "epoch": 295.0, "eval_loss": 0.5243921875953674, "eval_runtime": 2.1979, "eval_samples_per_second": 1036.922, "eval_steps_per_second": 4.095, "step": 15045 }, { "epoch": 295.1, "learning_rate": 0.00013480241209610504, "loss": 0.5291, "step": 15050 }, { "epoch": 295.29, "learning_rate": 0.00013477794592612347, "loss": 0.5267, "step": 15060 }, { "epoch": 295.49, "learning_rate": 0.00013475346230231483, "loss": 0.5312, "step": 15070 }, { "epoch": 295.69, "learning_rate": 0.00013472896123182783, "loss": 0.5315, "step": 15080 }, { "epoch": 295.88, "learning_rate": 0.00013470444272181624, "loss": 0.5309, "step": 15090 }, { "epoch": 296.0, "eval_loss": 0.5251594185829163, "eval_runtime": 2.2406, "eval_samples_per_second": 1017.139, "eval_steps_per_second": 4.017, "step": 15096 }, { "epoch": 296.08, "learning_rate": 0.00013467990677943893, "loss": 0.5258, "step": 15100 }, { "epoch": 296.27, "learning_rate": 0.00013465535341185983, "loss": 0.5265, "step": 15110 }, { "epoch": 296.47, "learning_rate": 0.00013463078262624796, "loss": 0.5241, "step": 15120 }, { "epoch": 296.67, "learning_rate": 0.00013460619442977746, "loss": 0.5273, "step": 15130 }, { "epoch": 296.86, "learning_rate": 0.00013458158882962754, "loss": 0.5251, "step": 15140 }, { "epoch": 297.0, "eval_loss": 0.531033992767334, "eval_runtime": 2.2049, "eval_samples_per_second": 1033.623, "eval_steps_per_second": 4.082, "step": 15147 }, { "epoch": 297.06, "learning_rate": 0.0001345569658329825, "loss": 0.5324, "step": 15150 }, { "epoch": 297.25, "learning_rate": 0.00013453232544703163, "loss": 0.5246, "step": 15160 }, { "epoch": 297.45, "learning_rate": 0.00013450766767896948, "loss": 0.5249, "step": 15170 }, { "epoch": 297.65, "learning_rate": 0.0001344829925359955, "loss": 0.5332, "step": 15180 }, { "epoch": 297.84, "learning_rate": 0.0001344583000253143, "loss": 0.5266, "step": 15190 }, { "epoch": 298.0, "eval_loss": 0.5300943851470947, "eval_runtime": 2.2014, "eval_samples_per_second": 1035.249, "eval_steps_per_second": 4.088, "step": 15198 }, { "epoch": 298.04, "learning_rate": 0.00013443359015413554, "loss": 0.5276, "step": 15200 }, { "epoch": 298.24, "learning_rate": 0.00013440886292967396, "loss": 0.5231, "step": 15210 }, { "epoch": 298.43, "learning_rate": 0.00013438411835914934, "loss": 0.5227, "step": 15220 }, { "epoch": 298.63, "learning_rate": 0.00013435935644978656, "loss": 0.5226, "step": 15230 }, { "epoch": 298.82, "learning_rate": 0.00013433457720881555, "loss": 0.5279, "step": 15240 }, { "epoch": 299.0, "eval_loss": 0.530785322189331, "eval_runtime": 2.2538, "eval_samples_per_second": 1011.201, "eval_steps_per_second": 3.993, "step": 15249 }, { "epoch": 299.02, "learning_rate": 0.00013430978064347127, "loss": 0.5257, "step": 15250 }, { "epoch": 299.22, "learning_rate": 0.00013428496676099377, "loss": 0.529, "step": 15260 }, { "epoch": 299.41, "learning_rate": 0.0001342601355686282, "loss": 0.5249, "step": 15270 }, { "epoch": 299.61, "learning_rate": 0.00013423528707362463, "loss": 0.5313, "step": 15280 }, { "epoch": 299.8, "learning_rate": 0.0001342104212832383, "loss": 0.528, "step": 15290 }, { "epoch": 300.0, "learning_rate": 0.00013418553820472953, "loss": 0.5261, "step": 15300 }, { "epoch": 300.0, "eval_loss": 0.5249952077865601, "eval_runtime": 2.3137, "eval_samples_per_second": 985.006, "eval_steps_per_second": 3.89, "step": 15300 }, { "epoch": 300.2, "learning_rate": 0.00013416063784536353, "loss": 0.5222, "step": 15310 }, { "epoch": 300.39, "learning_rate": 0.00013413572021241067, "loss": 0.5292, "step": 15320 }, { "epoch": 300.59, "learning_rate": 0.0001341107853131464, "loss": 0.5291, "step": 15330 }, { "epoch": 300.78, "learning_rate": 0.00013408583315485111, "loss": 0.5241, "step": 15340 }, { "epoch": 300.98, "learning_rate": 0.0001340608637448103, "loss": 0.5214, "step": 15350 }, { "epoch": 301.0, "eval_loss": 0.5252038836479187, "eval_runtime": 2.2486, "eval_samples_per_second": 1013.503, "eval_steps_per_second": 4.002, "step": 15351 }, { "epoch": 301.18, "learning_rate": 0.00013403587709031443, "loss": 0.5175, "step": 15360 }, { "epoch": 301.37, "learning_rate": 0.00013401087319865908, "loss": 0.5245, "step": 15370 }, { "epoch": 301.57, "learning_rate": 0.00013398585207714483, "loss": 0.5325, "step": 15380 }, { "epoch": 301.76, "learning_rate": 0.00013396081373307728, "loss": 0.5292, "step": 15390 }, { "epoch": 301.96, "learning_rate": 0.0001339357581737671, "loss": 0.5269, "step": 15400 }, { "epoch": 302.0, "eval_loss": 0.5306328535079956, "eval_runtime": 2.1462, "eval_samples_per_second": 1061.875, "eval_steps_per_second": 4.193, "step": 15402 }, { "epoch": 302.16, "learning_rate": 0.0001339106854065299, "loss": 0.5247, "step": 15410 }, { "epoch": 302.35, "learning_rate": 0.00013388559543868643, "loss": 0.5235, "step": 15420 }, { "epoch": 302.55, "learning_rate": 0.0001338604882775623, "loss": 0.5239, "step": 15430 }, { "epoch": 302.75, "learning_rate": 0.00013383536393048837, "loss": 0.5259, "step": 15440 }, { "epoch": 302.94, "learning_rate": 0.00013381022240480033, "loss": 0.5229, "step": 15450 }, { "epoch": 303.0, "eval_loss": 0.5264057517051697, "eval_runtime": 2.2349, "eval_samples_per_second": 1019.731, "eval_steps_per_second": 4.027, "step": 15453 }, { "epoch": 303.14, "learning_rate": 0.00013378506370783892, "loss": 0.5252, "step": 15460 }, { "epoch": 303.33, "learning_rate": 0.00013375988784694994, "loss": 0.5258, "step": 15470 }, { "epoch": 303.53, "learning_rate": 0.0001337346948294842, "loss": 0.526, "step": 15480 }, { "epoch": 303.73, "learning_rate": 0.00013370948466279747, "loss": 0.5235, "step": 15490 }, { "epoch": 303.92, "learning_rate": 0.00013368425735425056, "loss": 0.5234, "step": 15500 }, { "epoch": 304.0, "eval_loss": 0.526339590549469, "eval_runtime": 2.245, "eval_samples_per_second": 1015.135, "eval_steps_per_second": 4.009, "step": 15504 }, { "epoch": 304.12, "learning_rate": 0.0001336590129112093, "loss": 0.5267, "step": 15510 }, { "epoch": 304.31, "learning_rate": 0.0001336337513410445, "loss": 0.5271, "step": 15520 }, { "epoch": 304.51, "learning_rate": 0.00013360847265113195, "loss": 0.5247, "step": 15530 }, { "epoch": 304.71, "learning_rate": 0.00013358317684885248, "loss": 0.53, "step": 15540 }, { "epoch": 304.9, "learning_rate": 0.00013355786394159193, "loss": 0.5271, "step": 15550 }, { "epoch": 305.0, "eval_loss": 0.5279854536056519, "eval_runtime": 2.2065, "eval_samples_per_second": 1032.864, "eval_steps_per_second": 4.079, "step": 15555 }, { "epoch": 305.1, "learning_rate": 0.00013353253393674105, "loss": 0.5284, "step": 15560 }, { "epoch": 305.29, "learning_rate": 0.00013350718684169567, "loss": 0.5224, "step": 15570 }, { "epoch": 305.49, "learning_rate": 0.00013348182266385654, "loss": 0.5236, "step": 15580 }, { "epoch": 305.69, "learning_rate": 0.0001334564414106295, "loss": 0.5206, "step": 15590 }, { "epoch": 305.88, "learning_rate": 0.00013343104308942527, "loss": 0.525, "step": 15600 }, { "epoch": 306.0, "eval_loss": 0.5233384370803833, "eval_runtime": 2.1412, "eval_samples_per_second": 1064.366, "eval_steps_per_second": 4.203, "step": 15606 }, { "epoch": 306.08, "learning_rate": 0.0001334056277076596, "loss": 0.5251, "step": 15610 }, { "epoch": 306.27, "learning_rate": 0.00013338019527275318, "loss": 0.5208, "step": 15620 }, { "epoch": 306.47, "learning_rate": 0.0001333547457921318, "loss": 0.5224, "step": 15630 }, { "epoch": 306.67, "learning_rate": 0.00013332927927322603, "loss": 0.5226, "step": 15640 }, { "epoch": 306.86, "learning_rate": 0.0001333037957234716, "loss": 0.5216, "step": 15650 }, { "epoch": 307.0, "eval_loss": 0.5210602879524231, "eval_runtime": 2.2572, "eval_samples_per_second": 1009.641, "eval_steps_per_second": 3.987, "step": 15657 }, { "epoch": 307.06, "learning_rate": 0.00013327829515030913, "loss": 0.5253, "step": 15660 }, { "epoch": 307.25, "learning_rate": 0.0001332527775611842, "loss": 0.5211, "step": 15670 }, { "epoch": 307.45, "learning_rate": 0.0001332272429635474, "loss": 0.5272, "step": 15680 }, { "epoch": 307.65, "learning_rate": 0.00013320169136485423, "loss": 0.5209, "step": 15690 }, { "epoch": 307.84, "learning_rate": 0.0001331761227725652, "loss": 0.5247, "step": 15700 }, { "epoch": 308.0, "eval_loss": 0.5245583653450012, "eval_runtime": 2.2788, "eval_samples_per_second": 1000.088, "eval_steps_per_second": 3.949, "step": 15708 }, { "epoch": 308.04, "learning_rate": 0.00013315053719414579, "loss": 0.5215, "step": 15710 }, { "epoch": 308.24, "learning_rate": 0.0001331249346370664, "loss": 0.5243, "step": 15720 }, { "epoch": 308.43, "learning_rate": 0.0001330993151088024, "loss": 0.5232, "step": 15730 }, { "epoch": 308.63, "learning_rate": 0.00013307367861683413, "loss": 0.5242, "step": 15740 }, { "epoch": 308.82, "learning_rate": 0.00013304802516864687, "loss": 0.5203, "step": 15750 }, { "epoch": 309.0, "eval_loss": 0.5279257893562317, "eval_runtime": 2.2285, "eval_samples_per_second": 1022.663, "eval_steps_per_second": 4.039, "step": 15759 }, { "epoch": 309.02, "learning_rate": 0.00013302235477173087, "loss": 0.5277, "step": 15760 }, { "epoch": 309.22, "learning_rate": 0.0001329966674335813, "loss": 0.5237, "step": 15770 }, { "epoch": 309.41, "learning_rate": 0.0001329709631616983, "loss": 0.5241, "step": 15780 }, { "epoch": 309.61, "learning_rate": 0.0001329452419635869, "loss": 0.521, "step": 15790 }, { "epoch": 309.8, "learning_rate": 0.00013291950384675718, "loss": 0.5215, "step": 15800 }, { "epoch": 310.0, "learning_rate": 0.00013289374881872404, "loss": 0.5201, "step": 15810 }, { "epoch": 310.0, "eval_loss": 0.5245955586433411, "eval_runtime": 2.1278, "eval_samples_per_second": 1071.041, "eval_steps_per_second": 4.23, "step": 15810 }, { "epoch": 310.2, "learning_rate": 0.00013286797688700743, "loss": 0.5191, "step": 15820 }, { "epoch": 310.39, "learning_rate": 0.00013284218805913214, "loss": 0.5204, "step": 15830 }, { "epoch": 310.59, "learning_rate": 0.00013281638234262795, "loss": 0.5198, "step": 15840 }, { "epoch": 310.78, "learning_rate": 0.00013279055974502952, "loss": 0.5225, "step": 15850 }, { "epoch": 310.98, "learning_rate": 0.00013276472027387652, "loss": 0.5254, "step": 15860 }, { "epoch": 311.0, "eval_loss": 0.5306283235549927, "eval_runtime": 2.1978, "eval_samples_per_second": 1036.929, "eval_steps_per_second": 4.095, "step": 15861 }, { "epoch": 311.18, "learning_rate": 0.00013273886393671348, "loss": 0.5232, "step": 15870 }, { "epoch": 311.37, "learning_rate": 0.00013271299074108986, "loss": 0.518, "step": 15880 }, { "epoch": 311.57, "learning_rate": 0.00013268710069456007, "loss": 0.5248, "step": 15890 }, { "epoch": 311.76, "learning_rate": 0.00013266119380468344, "loss": 0.5186, "step": 15900 }, { "epoch": 311.96, "learning_rate": 0.00013263527007902417, "loss": 0.5166, "step": 15910 }, { "epoch": 312.0, "eval_loss": 0.5223502516746521, "eval_runtime": 2.1971, "eval_samples_per_second": 1037.273, "eval_steps_per_second": 4.096, "step": 15912 }, { "epoch": 312.16, "learning_rate": 0.00013260932952515145, "loss": 0.5217, "step": 15920 }, { "epoch": 312.35, "learning_rate": 0.0001325833721506393, "loss": 0.5172, "step": 15930 }, { "epoch": 312.55, "learning_rate": 0.00013255739796306671, "loss": 0.5131, "step": 15940 }, { "epoch": 312.75, "learning_rate": 0.0001325314069700176, "loss": 0.522, "step": 15950 }, { "epoch": 312.94, "learning_rate": 0.0001325053991790807, "loss": 0.525, "step": 15960 }, { "epoch": 313.0, "eval_loss": 0.5192234516143799, "eval_runtime": 2.1577, "eval_samples_per_second": 1056.214, "eval_steps_per_second": 4.171, "step": 15963 }, { "epoch": 313.14, "learning_rate": 0.00013247937459784975, "loss": 0.5198, "step": 15970 }, { "epoch": 313.33, "learning_rate": 0.00013245333323392333, "loss": 0.5172, "step": 15980 }, { "epoch": 313.53, "learning_rate": 0.00013242727509490496, "loss": 0.5195, "step": 15990 }, { "epoch": 313.73, "learning_rate": 0.000132401200188403, "loss": 0.5243, "step": 16000 }, { "epoch": 313.92, "learning_rate": 0.00013237510852203072, "loss": 0.5224, "step": 16010 }, { "epoch": 314.0, "eval_loss": 0.5246869921684265, "eval_runtime": 2.2265, "eval_samples_per_second": 1023.575, "eval_steps_per_second": 4.042, "step": 16014 }, { "epoch": 314.12, "learning_rate": 0.00013234900010340638, "loss": 0.527, "step": 16020 }, { "epoch": 314.31, "learning_rate": 0.000132322874940153, "loss": 0.5218, "step": 16030 }, { "epoch": 314.51, "learning_rate": 0.00013229673303989857, "loss": 0.5198, "step": 16040 }, { "epoch": 314.71, "learning_rate": 0.00013227057441027594, "loss": 0.518, "step": 16050 }, { "epoch": 314.9, "learning_rate": 0.00013224439905892282, "loss": 0.5195, "step": 16060 }, { "epoch": 315.0, "eval_loss": 0.5229699015617371, "eval_runtime": 2.3109, "eval_samples_per_second": 986.188, "eval_steps_per_second": 3.895, "step": 16065 }, { "epoch": 315.1, "learning_rate": 0.00013221820699348187, "loss": 0.5143, "step": 16070 }, { "epoch": 315.29, "learning_rate": 0.00013219199822160058, "loss": 0.5203, "step": 16080 }, { "epoch": 315.49, "learning_rate": 0.00013216577275093126, "loss": 0.5183, "step": 16090 }, { "epoch": 315.69, "learning_rate": 0.00013213953058913126, "loss": 0.5175, "step": 16100 }, { "epoch": 315.88, "learning_rate": 0.00013211327174386266, "loss": 0.5189, "step": 16110 }, { "epoch": 316.0, "eval_loss": 0.523881733417511, "eval_runtime": 2.3072, "eval_samples_per_second": 987.77, "eval_steps_per_second": 3.901, "step": 16116 }, { "epoch": 316.08, "learning_rate": 0.00013208699622279247, "loss": 0.5161, "step": 16120 }, { "epoch": 316.27, "learning_rate": 0.0001320607040335925, "loss": 0.523, "step": 16130 }, { "epoch": 316.47, "learning_rate": 0.00013203439518393956, "loss": 0.5188, "step": 16140 }, { "epoch": 316.67, "learning_rate": 0.00013200806968151522, "loss": 0.5192, "step": 16150 }, { "epoch": 316.86, "learning_rate": 0.00013198172753400595, "loss": 0.5226, "step": 16160 }, { "epoch": 317.0, "eval_loss": 0.5179664492607117, "eval_runtime": 2.3076, "eval_samples_per_second": 987.623, "eval_steps_per_second": 3.9, "step": 16167 }, { "epoch": 317.06, "learning_rate": 0.00013195536874910304, "loss": 0.515, "step": 16170 }, { "epoch": 317.25, "learning_rate": 0.00013192899333450264, "loss": 0.5194, "step": 16180 }, { "epoch": 317.45, "learning_rate": 0.0001319026012979059, "loss": 0.5164, "step": 16190 }, { "epoch": 317.65, "learning_rate": 0.0001318761926470186, "loss": 0.5135, "step": 16200 }, { "epoch": 317.84, "learning_rate": 0.0001318497673895515, "loss": 0.5166, "step": 16210 }, { "epoch": 318.0, "eval_loss": 0.5197197794914246, "eval_runtime": 2.3294, "eval_samples_per_second": 978.381, "eval_steps_per_second": 3.864, "step": 16218 }, { "epoch": 318.04, "learning_rate": 0.00013182332553322021, "loss": 0.5186, "step": 16220 }, { "epoch": 318.24, "learning_rate": 0.00013179686708574513, "loss": 0.5179, "step": 16230 }, { "epoch": 318.43, "learning_rate": 0.00013177039205485158, "loss": 0.5234, "step": 16240 }, { "epoch": 318.63, "learning_rate": 0.00013174390044826963, "loss": 0.5198, "step": 16250 }, { "epoch": 318.82, "learning_rate": 0.00013171739227373427, "loss": 0.5159, "step": 16260 }, { "epoch": 319.0, "eval_loss": 0.5156288743019104, "eval_runtime": 2.1651, "eval_samples_per_second": 1052.585, "eval_steps_per_second": 4.157, "step": 16269 }, { "epoch": 319.02, "learning_rate": 0.0001316908675389853, "loss": 0.516, "step": 16270 }, { "epoch": 319.22, "learning_rate": 0.0001316643262517673, "loss": 0.5215, "step": 16280 }, { "epoch": 319.41, "learning_rate": 0.00013163776841982981, "loss": 0.5148, "step": 16290 }, { "epoch": 319.61, "learning_rate": 0.00013161119405092708, "loss": 0.5153, "step": 16300 }, { "epoch": 319.8, "learning_rate": 0.0001315846031528182, "loss": 0.5167, "step": 16310 }, { "epoch": 320.0, "learning_rate": 0.00013155799573326722, "loss": 0.5156, "step": 16320 }, { "epoch": 320.0, "eval_loss": 0.5203654170036316, "eval_runtime": 2.301, "eval_samples_per_second": 990.435, "eval_steps_per_second": 3.911, "step": 16320 }, { "epoch": 320.2, "learning_rate": 0.00013153137180004282, "loss": 0.522, "step": 16330 }, { "epoch": 320.39, "learning_rate": 0.0001315047313609186, "loss": 0.5152, "step": 16340 }, { "epoch": 320.59, "learning_rate": 0.00013147807442367304, "loss": 0.5187, "step": 16350 }, { "epoch": 320.78, "learning_rate": 0.00013145140099608932, "loss": 0.5196, "step": 16360 }, { "epoch": 320.98, "learning_rate": 0.00013142471108595552, "loss": 0.5179, "step": 16370 }, { "epoch": 321.0, "eval_loss": 0.5215443968772888, "eval_runtime": 2.1498, "eval_samples_per_second": 1060.099, "eval_steps_per_second": 4.186, "step": 16371 }, { "epoch": 321.18, "learning_rate": 0.00013139800470106443, "loss": 0.5163, "step": 16380 }, { "epoch": 321.37, "learning_rate": 0.00013137128184921378, "loss": 0.5231, "step": 16390 }, { "epoch": 321.57, "learning_rate": 0.00013134454253820605, "loss": 0.5158, "step": 16400 }, { "epoch": 321.76, "learning_rate": 0.0001313177867758485, "loss": 0.5173, "step": 16410 }, { "epoch": 321.96, "learning_rate": 0.0001312910145699532, "loss": 0.5194, "step": 16420 }, { "epoch": 322.0, "eval_loss": 0.5210621953010559, "eval_runtime": 2.2088, "eval_samples_per_second": 1031.76, "eval_steps_per_second": 4.075, "step": 16422 }, { "epoch": 322.16, "learning_rate": 0.0001312642259283371, "loss": 0.5175, "step": 16430 }, { "epoch": 322.35, "learning_rate": 0.0001312374208588218, "loss": 0.5196, "step": 16440 }, { "epoch": 322.55, "learning_rate": 0.00013121059936923384, "loss": 0.5194, "step": 16450 }, { "epoch": 322.75, "learning_rate": 0.00013118376146740448, "loss": 0.5213, "step": 16460 }, { "epoch": 322.94, "learning_rate": 0.00013115690716116978, "loss": 0.519, "step": 16470 }, { "epoch": 323.0, "eval_loss": 0.5211889147758484, "eval_runtime": 2.1133, "eval_samples_per_second": 1078.407, "eval_steps_per_second": 4.259, "step": 16473 }, { "epoch": 323.14, "learning_rate": 0.00013113003645837064, "loss": 0.5173, "step": 16480 }, { "epoch": 323.33, "learning_rate": 0.00013110314936685264, "loss": 0.5228, "step": 16490 }, { "epoch": 323.53, "learning_rate": 0.00013107624589446626, "loss": 0.5124, "step": 16500 }, { "epoch": 323.73, "learning_rate": 0.0001310493260490667, "loss": 0.5142, "step": 16510 }, { "epoch": 323.92, "learning_rate": 0.00013102238983851396, "loss": 0.5112, "step": 16520 }, { "epoch": 324.0, "eval_loss": 0.5174744725227356, "eval_runtime": 2.2342, "eval_samples_per_second": 1020.039, "eval_steps_per_second": 4.028, "step": 16524 }, { "epoch": 324.12, "learning_rate": 0.0001309954372706728, "loss": 0.5118, "step": 16530 }, { "epoch": 324.31, "learning_rate": 0.0001309684683534128, "loss": 0.5161, "step": 16540 }, { "epoch": 324.51, "learning_rate": 0.00013094148309460824, "loss": 0.5108, "step": 16550 }, { "epoch": 324.71, "learning_rate": 0.00013091448150213825, "loss": 0.5161, "step": 16560 }, { "epoch": 324.9, "learning_rate": 0.00013088746358388666, "loss": 0.5163, "step": 16570 }, { "epoch": 325.0, "eval_loss": 0.522521436214447, "eval_runtime": 2.273, "eval_samples_per_second": 1002.657, "eval_steps_per_second": 3.96, "step": 16575 }, { "epoch": 325.1, "learning_rate": 0.0001308604293477421, "loss": 0.5235, "step": 16580 }, { "epoch": 325.29, "learning_rate": 0.00013083337880159798, "loss": 0.5158, "step": 16590 }, { "epoch": 325.49, "learning_rate": 0.0001308063119533525, "loss": 0.5184, "step": 16600 }, { "epoch": 325.69, "learning_rate": 0.00013077922881090848, "loss": 0.5217, "step": 16610 }, { "epoch": 325.88, "learning_rate": 0.00013075212938217366, "loss": 0.5165, "step": 16620 }, { "epoch": 326.0, "eval_loss": 0.5172427296638489, "eval_runtime": 2.2537, "eval_samples_per_second": 1011.23, "eval_steps_per_second": 3.993, "step": 16626 }, { "epoch": 326.08, "learning_rate": 0.00013072501367506045, "loss": 0.5107, "step": 16630 }, { "epoch": 326.27, "learning_rate": 0.00013069788169748606, "loss": 0.5164, "step": 16640 }, { "epoch": 326.47, "learning_rate": 0.00013067073345737236, "loss": 0.5147, "step": 16650 }, { "epoch": 326.67, "learning_rate": 0.0001306435689626461, "loss": 0.5154, "step": 16660 }, { "epoch": 326.86, "learning_rate": 0.00013061638822123867, "loss": 0.5104, "step": 16670 }, { "epoch": 327.0, "eval_loss": 0.5200419425964355, "eval_runtime": 2.266, "eval_samples_per_second": 1005.748, "eval_steps_per_second": 3.972, "step": 16677 }, { "epoch": 327.06, "learning_rate": 0.00013058919124108625, "loss": 0.515, "step": 16680 }, { "epoch": 327.25, "learning_rate": 0.00013056197803012972, "loss": 0.5194, "step": 16690 }, { "epoch": 327.45, "learning_rate": 0.0001305347485963148, "loss": 0.5123, "step": 16700 }, { "epoch": 327.65, "learning_rate": 0.00013050750294759178, "loss": 0.5114, "step": 16710 }, { "epoch": 327.84, "learning_rate": 0.00013048024109191587, "loss": 0.51, "step": 16720 }, { "epoch": 328.0, "eval_loss": 0.5156450271606445, "eval_runtime": 2.1441, "eval_samples_per_second": 1062.933, "eval_steps_per_second": 4.198, "step": 16728 }, { "epoch": 328.04, "learning_rate": 0.0001304529630372469, "loss": 0.5106, "step": 16730 }, { "epoch": 328.24, "learning_rate": 0.00013042566879154942, "loss": 0.5113, "step": 16740 }, { "epoch": 328.43, "learning_rate": 0.00013039835836279278, "loss": 0.5138, "step": 16750 }, { "epoch": 328.63, "learning_rate": 0.000130371031758951, "loss": 0.5174, "step": 16760 }, { "epoch": 328.82, "learning_rate": 0.00013034368898800282, "loss": 0.5129, "step": 16770 }, { "epoch": 329.0, "eval_loss": 0.5160460472106934, "eval_runtime": 2.1772, "eval_samples_per_second": 1046.767, "eval_steps_per_second": 4.134, "step": 16779 }, { "epoch": 329.02, "learning_rate": 0.00013031633005793175, "loss": 0.5099, "step": 16780 }, { "epoch": 329.22, "learning_rate": 0.000130288954976726, "loss": 0.5173, "step": 16790 }, { "epoch": 329.41, "learning_rate": 0.00013026156375237844, "loss": 0.5143, "step": 16800 }, { "epoch": 329.61, "learning_rate": 0.00013023415639288675, "loss": 0.5119, "step": 16810 }, { "epoch": 329.8, "learning_rate": 0.0001302067329062532, "loss": 0.5138, "step": 16820 }, { "epoch": 330.0, "learning_rate": 0.00013017929330048485, "loss": 0.5084, "step": 16830 }, { "epoch": 330.0, "eval_loss": 0.5207294821739197, "eval_runtime": 2.1796, "eval_samples_per_second": 1045.587, "eval_steps_per_second": 4.129, "step": 16830 }, { "epoch": 330.2, "learning_rate": 0.00013015183758359353, "loss": 0.5211, "step": 16840 }, { "epoch": 330.39, "learning_rate": 0.0001301243657635956, "loss": 0.5176, "step": 16850 }, { "epoch": 330.59, "learning_rate": 0.00013009687784851226, "loss": 0.5126, "step": 16860 }, { "epoch": 330.78, "learning_rate": 0.00013006937384636938, "loss": 0.5135, "step": 16870 }, { "epoch": 330.98, "learning_rate": 0.0001300418537651975, "loss": 0.5159, "step": 16880 }, { "epoch": 331.0, "eval_loss": 0.5146752595901489, "eval_runtime": 2.3002, "eval_samples_per_second": 990.782, "eval_steps_per_second": 3.913, "step": 16881 }, { "epoch": 331.18, "learning_rate": 0.00013001431761303187, "loss": 0.512, "step": 16890 }, { "epoch": 331.37, "learning_rate": 0.00012998676539791246, "loss": 0.5153, "step": 16900 }, { "epoch": 331.57, "learning_rate": 0.00012995919712788383, "loss": 0.5155, "step": 16910 }, { "epoch": 331.76, "learning_rate": 0.00012993161281099538, "loss": 0.5066, "step": 16920 }, { "epoch": 331.96, "learning_rate": 0.00012990401245530108, "loss": 0.5126, "step": 16930 }, { "epoch": 332.0, "eval_loss": 0.5158648490905762, "eval_runtime": 2.2177, "eval_samples_per_second": 1027.619, "eval_steps_per_second": 4.058, "step": 16932 }, { "epoch": 332.16, "learning_rate": 0.00012987639606885964, "loss": 0.5167, "step": 16940 }, { "epoch": 332.35, "learning_rate": 0.0001298487636597344, "loss": 0.5125, "step": 16950 }, { "epoch": 332.55, "learning_rate": 0.0001298211152359934, "loss": 0.5113, "step": 16960 }, { "epoch": 332.75, "learning_rate": 0.0001297934508057094, "loss": 0.5142, "step": 16970 }, { "epoch": 332.94, "learning_rate": 0.00012976577037695974, "loss": 0.5132, "step": 16980 }, { "epoch": 333.0, "eval_loss": 0.5156267881393433, "eval_runtime": 2.1373, "eval_samples_per_second": 1066.28, "eval_steps_per_second": 4.211, "step": 16983 }, { "epoch": 333.14, "learning_rate": 0.00012973807395782655, "loss": 0.5137, "step": 16990 }, { "epoch": 333.33, "learning_rate": 0.00012971036155639656, "loss": 0.5127, "step": 17000 }, { "epoch": 333.53, "learning_rate": 0.00012968263318076113, "loss": 0.5107, "step": 17010 }, { "epoch": 333.73, "learning_rate": 0.00012965488883901635, "loss": 0.5133, "step": 17020 }, { "epoch": 333.92, "learning_rate": 0.00012962712853926297, "loss": 0.5092, "step": 17030 }, { "epoch": 334.0, "eval_loss": 0.5151438117027283, "eval_runtime": 2.2763, "eval_samples_per_second": 1001.176, "eval_steps_per_second": 3.954, "step": 17034 }, { "epoch": 334.12, "learning_rate": 0.00012959935228960636, "loss": 0.5148, "step": 17040 }, { "epoch": 334.31, "learning_rate": 0.00012957156009815656, "loss": 0.5087, "step": 17050 }, { "epoch": 334.51, "learning_rate": 0.00012954375197302826, "loss": 0.5124, "step": 17060 }, { "epoch": 334.71, "learning_rate": 0.00012951592792234085, "loss": 0.5121, "step": 17070 }, { "epoch": 334.9, "learning_rate": 0.00012948808795421827, "loss": 0.5116, "step": 17080 }, { "epoch": 335.0, "eval_loss": 0.514667809009552, "eval_runtime": 2.1506, "eval_samples_per_second": 1059.712, "eval_steps_per_second": 4.185, "step": 17085 }, { "epoch": 335.1, "learning_rate": 0.00012946023207678926, "loss": 0.5167, "step": 17090 }, { "epoch": 335.29, "learning_rate": 0.00012943236029818703, "loss": 0.5145, "step": 17100 }, { "epoch": 335.49, "learning_rate": 0.0001294044726265496, "loss": 0.5099, "step": 17110 }, { "epoch": 335.69, "learning_rate": 0.00012937656907001944, "loss": 0.5061, "step": 17120 }, { "epoch": 335.88, "learning_rate": 0.00012934864963674386, "loss": 0.5113, "step": 17130 }, { "epoch": 336.0, "eval_loss": 0.5120841264724731, "eval_runtime": 2.2524, "eval_samples_per_second": 1011.8, "eval_steps_per_second": 3.996, "step": 17136 }, { "epoch": 336.08, "learning_rate": 0.00012932071433487466, "loss": 0.5057, "step": 17140 }, { "epoch": 336.27, "learning_rate": 0.00012929276317256836, "loss": 0.5119, "step": 17150 }, { "epoch": 336.47, "learning_rate": 0.00012926479615798606, "loss": 0.516, "step": 17160 }, { "epoch": 336.67, "learning_rate": 0.0001292368132992935, "loss": 0.5067, "step": 17170 }, { "epoch": 336.86, "learning_rate": 0.00012920881460466106, "loss": 0.5076, "step": 17180 }, { "epoch": 337.0, "eval_loss": 0.5100632309913635, "eval_runtime": 2.2378, "eval_samples_per_second": 1018.414, "eval_steps_per_second": 4.022, "step": 17187 }, { "epoch": 337.06, "learning_rate": 0.0001291808000822637, "loss": 0.5091, "step": 17190 }, { "epoch": 337.25, "learning_rate": 0.0001291527697402811, "loss": 0.5122, "step": 17200 }, { "epoch": 337.45, "learning_rate": 0.00012912472358689745, "loss": 0.5101, "step": 17210 }, { "epoch": 337.65, "learning_rate": 0.0001290966616303016, "loss": 0.5114, "step": 17220 }, { "epoch": 337.84, "learning_rate": 0.00012906858387868705, "loss": 0.5106, "step": 17230 }, { "epoch": 338.0, "eval_loss": 0.5111255645751953, "eval_runtime": 2.1515, "eval_samples_per_second": 1059.276, "eval_steps_per_second": 4.183, "step": 17238 }, { "epoch": 338.04, "learning_rate": 0.00012904049034025183, "loss": 0.5066, "step": 17240 }, { "epoch": 338.24, "learning_rate": 0.0001290123810231987, "loss": 0.506, "step": 17250 }, { "epoch": 338.43, "learning_rate": 0.00012898425593573483, "loss": 0.5117, "step": 17260 }, { "epoch": 338.63, "learning_rate": 0.0001289561150860722, "loss": 0.5159, "step": 17270 }, { "epoch": 338.82, "learning_rate": 0.00012892795848242736, "loss": 0.5117, "step": 17280 }, { "epoch": 339.0, "eval_loss": 0.5093927383422852, "eval_runtime": 2.2305, "eval_samples_per_second": 1021.723, "eval_steps_per_second": 4.035, "step": 17289 }, { "epoch": 339.02, "learning_rate": 0.0001288997861330213, "loss": 0.5084, "step": 17290 }, { "epoch": 339.22, "learning_rate": 0.00012887159804607983, "loss": 0.5062, "step": 17300 }, { "epoch": 339.41, "learning_rate": 0.00012884339422983314, "loss": 0.5113, "step": 17310 }, { "epoch": 339.61, "learning_rate": 0.00012881517469251616, "loss": 0.5081, "step": 17320 }, { "epoch": 339.8, "learning_rate": 0.00012878693944236836, "loss": 0.5099, "step": 17330 }, { "epoch": 340.0, "learning_rate": 0.00012875868848763385, "loss": 0.5086, "step": 17340 }, { "epoch": 340.0, "eval_loss": 0.513230562210083, "eval_runtime": 2.2424, "eval_samples_per_second": 1016.333, "eval_steps_per_second": 4.014, "step": 17340 }, { "epoch": 340.2, "learning_rate": 0.0001287304218365612, "loss": 0.5062, "step": 17350 }, { "epoch": 340.39, "learning_rate": 0.0001287021394974037, "loss": 0.5102, "step": 17360 }, { "epoch": 340.59, "learning_rate": 0.0001286738414784191, "loss": 0.5076, "step": 17370 }, { "epoch": 340.78, "learning_rate": 0.00012864552778786984, "loss": 0.5017, "step": 17380 }, { "epoch": 340.98, "learning_rate": 0.0001286171984340229, "loss": 0.5034, "step": 17390 }, { "epoch": 341.0, "eval_loss": 0.5161563754081726, "eval_runtime": 2.1498, "eval_samples_per_second": 1060.082, "eval_steps_per_second": 4.186, "step": 17391 }, { "epoch": 341.18, "learning_rate": 0.0001285888534251498, "loss": 0.5104, "step": 17400 }, { "epoch": 341.37, "learning_rate": 0.00012856049276952663, "loss": 0.509, "step": 17410 }, { "epoch": 341.57, "learning_rate": 0.0001285321164754341, "loss": 0.5104, "step": 17420 }, { "epoch": 341.76, "learning_rate": 0.00012850372455115746, "loss": 0.5075, "step": 17430 }, { "epoch": 341.96, "learning_rate": 0.00012847531700498646, "loss": 0.5061, "step": 17440 }, { "epoch": 342.0, "eval_loss": 0.5142490267753601, "eval_runtime": 2.2582, "eval_samples_per_second": 1009.211, "eval_steps_per_second": 3.985, "step": 17442 }, { "epoch": 342.16, "learning_rate": 0.00012844689384521553, "loss": 0.5063, "step": 17450 }, { "epoch": 342.35, "learning_rate": 0.00012841845508014356, "loss": 0.5092, "step": 17460 }, { "epoch": 342.55, "learning_rate": 0.00012839000071807407, "loss": 0.5075, "step": 17470 }, { "epoch": 342.75, "learning_rate": 0.0001283615307673151, "loss": 0.5106, "step": 17480 }, { "epoch": 342.94, "learning_rate": 0.0001283330452361792, "loss": 0.5101, "step": 17490 }, { "epoch": 343.0, "eval_loss": 0.5135881304740906, "eval_runtime": 2.3064, "eval_samples_per_second": 988.109, "eval_steps_per_second": 3.902, "step": 17493 }, { "epoch": 343.14, "learning_rate": 0.00012830454413298353, "loss": 0.5114, "step": 17500 }, { "epoch": 343.33, "learning_rate": 0.00012827602746604978, "loss": 0.5087, "step": 17510 }, { "epoch": 343.53, "learning_rate": 0.0001282474952437042, "loss": 0.5092, "step": 17520 }, { "epoch": 343.73, "learning_rate": 0.00012821894747427754, "loss": 0.507, "step": 17530 }, { "epoch": 343.92, "learning_rate": 0.0001281903841661051, "loss": 0.5042, "step": 17540 }, { "epoch": 344.0, "eval_loss": 0.5135248899459839, "eval_runtime": 2.2046, "eval_samples_per_second": 1033.765, "eval_steps_per_second": 4.082, "step": 17544 }, { "epoch": 344.12, "learning_rate": 0.00012816180532752676, "loss": 0.5102, "step": 17550 }, { "epoch": 344.31, "learning_rate": 0.0001281332109668869, "loss": 0.5064, "step": 17560 }, { "epoch": 344.51, "learning_rate": 0.00012810460109253437, "loss": 0.506, "step": 17570 }, { "epoch": 344.71, "learning_rate": 0.00012807597571282272, "loss": 0.5015, "step": 17580 }, { "epoch": 344.9, "learning_rate": 0.00012804733483610982, "loss": 0.5091, "step": 17590 }, { "epoch": 345.0, "eval_loss": 0.5083193182945251, "eval_runtime": 2.1876, "eval_samples_per_second": 1041.783, "eval_steps_per_second": 4.114, "step": 17595 }, { "epoch": 345.1, "learning_rate": 0.00012801867847075826, "loss": 0.5072, "step": 17600 }, { "epoch": 345.29, "learning_rate": 0.000127990006625135, "loss": 0.5105, "step": 17610 }, { "epoch": 345.49, "learning_rate": 0.0001279613193076116, "loss": 0.508, "step": 17620 }, { "epoch": 345.69, "learning_rate": 0.00012793261652656413, "loss": 0.5034, "step": 17630 }, { "epoch": 345.88, "learning_rate": 0.00012790389829037314, "loss": 0.5095, "step": 17640 }, { "epoch": 346.0, "eval_loss": 0.5112407803535461, "eval_runtime": 2.2322, "eval_samples_per_second": 1020.968, "eval_steps_per_second": 4.032, "step": 17646 }, { "epoch": 346.08, "learning_rate": 0.00012787516460742372, "loss": 0.5101, "step": 17650 }, { "epoch": 346.27, "learning_rate": 0.00012784641548610546, "loss": 0.5126, "step": 17660 }, { "epoch": 346.47, "learning_rate": 0.00012781765093481247, "loss": 0.5, "step": 17670 }, { "epoch": 346.67, "learning_rate": 0.00012778887096194334, "loss": 0.5035, "step": 17680 }, { "epoch": 346.86, "learning_rate": 0.00012776007557590123, "loss": 0.5058, "step": 17690 }, { "epoch": 347.0, "eval_loss": 0.5121301412582397, "eval_runtime": 2.2108, "eval_samples_per_second": 1030.829, "eval_steps_per_second": 4.071, "step": 17697 }, { "epoch": 347.06, "learning_rate": 0.00012773126478509369, "loss": 0.5002, "step": 17700 }, { "epoch": 347.25, "learning_rate": 0.00012770243859793284, "loss": 0.5069, "step": 17710 }, { "epoch": 347.45, "learning_rate": 0.00012767359702283533, "loss": 0.5106, "step": 17720 }, { "epoch": 347.65, "learning_rate": 0.00012764474006822223, "loss": 0.5048, "step": 17730 }, { "epoch": 347.84, "learning_rate": 0.0001276158677425191, "loss": 0.504, "step": 17740 }, { "epoch": 348.0, "eval_loss": 0.508244514465332, "eval_runtime": 2.1836, "eval_samples_per_second": 1043.712, "eval_steps_per_second": 4.122, "step": 17748 }, { "epoch": 348.04, "learning_rate": 0.00012758698005415603, "loss": 0.505, "step": 17750 }, { "epoch": 348.24, "learning_rate": 0.0001275580770115676, "loss": 0.5107, "step": 17760 }, { "epoch": 348.43, "learning_rate": 0.00012752915862319285, "loss": 0.5037, "step": 17770 }, { "epoch": 348.63, "learning_rate": 0.00012750022489747527, "loss": 0.5065, "step": 17780 }, { "epoch": 348.82, "learning_rate": 0.0001274712758428629, "loss": 0.5016, "step": 17790 }, { "epoch": 349.0, "eval_loss": 0.5075437426567078, "eval_runtime": 2.1943, "eval_samples_per_second": 1038.611, "eval_steps_per_second": 4.102, "step": 17799 }, { "epoch": 349.02, "learning_rate": 0.00012744231146780821, "loss": 0.5055, "step": 17800 }, { "epoch": 349.22, "learning_rate": 0.00012741333178076816, "loss": 0.5014, "step": 17810 }, { "epoch": 349.41, "learning_rate": 0.00012738433679020412, "loss": 0.5062, "step": 17820 }, { "epoch": 349.61, "learning_rate": 0.00012735532650458208, "loss": 0.5094, "step": 17830 }, { "epoch": 349.8, "learning_rate": 0.0001273263009323723, "loss": 0.506, "step": 17840 }, { "epoch": 350.0, "learning_rate": 0.00012729726008204963, "loss": 0.5042, "step": 17850 }, { "epoch": 350.0, "eval_loss": 0.5090273022651672, "eval_runtime": 2.2206, "eval_samples_per_second": 1026.319, "eval_steps_per_second": 4.053, "step": 17850 }, { "epoch": 350.2, "learning_rate": 0.0001272682039620934, "loss": 0.5059, "step": 17860 }, { "epoch": 350.39, "learning_rate": 0.00012723913258098728, "loss": 0.5087, "step": 17870 }, { "epoch": 350.59, "learning_rate": 0.0001272100459472195, "loss": 0.5047, "step": 17880 }, { "epoch": 350.78, "learning_rate": 0.0001271809440692827, "loss": 0.5051, "step": 17890 }, { "epoch": 350.98, "learning_rate": 0.00012715182695567396, "loss": 0.5036, "step": 17900 }, { "epoch": 351.0, "eval_loss": 0.5089225172996521, "eval_runtime": 2.2425, "eval_samples_per_second": 1016.26, "eval_steps_per_second": 4.013, "step": 17901 }, { "epoch": 351.18, "learning_rate": 0.00012712269461489487, "loss": 0.5095, "step": 17910 }, { "epoch": 351.37, "learning_rate": 0.00012709354705545136, "loss": 0.5009, "step": 17920 }, { "epoch": 351.57, "learning_rate": 0.00012706438428585395, "loss": 0.5073, "step": 17930 }, { "epoch": 351.76, "learning_rate": 0.00012703520631461747, "loss": 0.5043, "step": 17940 }, { "epoch": 351.96, "learning_rate": 0.00012700601315026124, "loss": 0.5045, "step": 17950 }, { "epoch": 352.0, "eval_loss": 0.5094715356826782, "eval_runtime": 2.2171, "eval_samples_per_second": 1027.922, "eval_steps_per_second": 4.059, "step": 17952 }, { "epoch": 352.16, "learning_rate": 0.00012697680480130904, "loss": 0.5054, "step": 17960 }, { "epoch": 352.35, "learning_rate": 0.000126947581276289, "loss": 0.5047, "step": 17970 }, { "epoch": 352.55, "learning_rate": 0.0001269183425837338, "loss": 0.508, "step": 17980 }, { "epoch": 352.75, "learning_rate": 0.00012688908873218044, "loss": 0.4993, "step": 17990 }, { "epoch": 352.94, "learning_rate": 0.00012685981973017038, "loss": 0.5067, "step": 18000 }, { "epoch": 353.0, "eval_loss": 0.5087113976478577, "eval_runtime": 2.19, "eval_samples_per_second": 1040.626, "eval_steps_per_second": 4.11, "step": 18003 }, { "epoch": 353.14, "learning_rate": 0.0001268305355862496, "loss": 0.5036, "step": 18010 }, { "epoch": 353.33, "learning_rate": 0.0001268012363089683, "loss": 0.5057, "step": 18020 }, { "epoch": 353.53, "learning_rate": 0.00012677192190688134, "loss": 0.5041, "step": 18030 }, { "epoch": 353.73, "learning_rate": 0.00012674259238854778, "loss": 0.5021, "step": 18040 }, { "epoch": 353.92, "learning_rate": 0.00012671324776253123, "loss": 0.5026, "step": 18050 }, { "epoch": 354.0, "eval_loss": 0.5063843727111816, "eval_runtime": 2.1337, "eval_samples_per_second": 1068.082, "eval_steps_per_second": 4.218, "step": 18054 }, { "epoch": 354.12, "learning_rate": 0.00012668388803739963, "loss": 0.5051, "step": 18060 }, { "epoch": 354.31, "learning_rate": 0.0001266545132217254, "loss": 0.5015, "step": 18070 }, { "epoch": 354.51, "learning_rate": 0.00012662512332408532, "loss": 0.5058, "step": 18080 }, { "epoch": 354.71, "learning_rate": 0.00012659571835306057, "loss": 0.5064, "step": 18090 }, { "epoch": 354.9, "learning_rate": 0.00012656629831723674, "loss": 0.5001, "step": 18100 }, { "epoch": 355.0, "eval_loss": 0.5055447220802307, "eval_runtime": 2.2709, "eval_samples_per_second": 1003.563, "eval_steps_per_second": 3.963, "step": 18105 }, { "epoch": 355.1, "learning_rate": 0.00012653686322520387, "loss": 0.5007, "step": 18110 }, { "epoch": 355.29, "learning_rate": 0.0001265074130855563, "loss": 0.5039, "step": 18120 }, { "epoch": 355.49, "learning_rate": 0.00012647794790689285, "loss": 0.501, "step": 18130 }, { "epoch": 355.69, "learning_rate": 0.00012644846769781668, "loss": 0.5054, "step": 18140 }, { "epoch": 355.88, "learning_rate": 0.00012641897246693534, "loss": 0.5036, "step": 18150 }, { "epoch": 356.0, "eval_loss": 0.5056591629981995, "eval_runtime": 2.1488, "eval_samples_per_second": 1060.609, "eval_steps_per_second": 4.188, "step": 18156 }, { "epoch": 356.08, "learning_rate": 0.00012638946222286082, "loss": 0.4965, "step": 18160 }, { "epoch": 356.27, "learning_rate": 0.00012635993697420942, "loss": 0.499, "step": 18170 }, { "epoch": 356.47, "learning_rate": 0.00012633039672960183, "loss": 0.5056, "step": 18180 }, { "epoch": 356.67, "learning_rate": 0.00012630084149766322, "loss": 0.5045, "step": 18190 }, { "epoch": 356.86, "learning_rate": 0.000126271271287023, "loss": 0.5012, "step": 18200 }, { "epoch": 357.0, "eval_loss": 0.508310079574585, "eval_runtime": 2.2643, "eval_samples_per_second": 1006.498, "eval_steps_per_second": 3.975, "step": 18207 }, { "epoch": 357.06, "learning_rate": 0.00012624168610631502, "loss": 0.501, "step": 18210 }, { "epoch": 357.25, "learning_rate": 0.00012621208596417748, "loss": 0.4977, "step": 18220 }, { "epoch": 357.45, "learning_rate": 0.00012618247086925298, "loss": 0.4988, "step": 18230 }, { "epoch": 357.65, "learning_rate": 0.0001261528408301885, "loss": 0.5085, "step": 18240 }, { "epoch": 357.84, "learning_rate": 0.0001261231958556353, "loss": 0.5031, "step": 18250 }, { "epoch": 358.0, "eval_loss": 0.5109713077545166, "eval_runtime": 2.2405, "eval_samples_per_second": 1017.183, "eval_steps_per_second": 4.017, "step": 18258 }, { "epoch": 358.04, "learning_rate": 0.00012609353595424905, "loss": 0.506, "step": 18260 }, { "epoch": 358.24, "learning_rate": 0.0001260638611346898, "loss": 0.5048, "step": 18270 }, { "epoch": 358.43, "learning_rate": 0.00012603417140562195, "loss": 0.5056, "step": 18280 }, { "epoch": 358.63, "learning_rate": 0.00012600446677571423, "loss": 0.5023, "step": 18290 }, { "epoch": 358.82, "learning_rate": 0.0001259747472536397, "loss": 0.5021, "step": 18300 }, { "epoch": 359.0, "eval_loss": 0.5127790570259094, "eval_runtime": 2.1395, "eval_samples_per_second": 1065.184, "eval_steps_per_second": 4.207, "step": 18309 }, { "epoch": 359.02, "learning_rate": 0.00012594501284807582, "loss": 0.503, "step": 18310 }, { "epoch": 359.22, "learning_rate": 0.00012591526356770438, "loss": 0.5041, "step": 18320 }, { "epoch": 359.41, "learning_rate": 0.0001258854994212115, "loss": 0.5002, "step": 18330 }, { "epoch": 359.61, "learning_rate": 0.00012585572041728764, "loss": 0.5071, "step": 18340 }, { "epoch": 359.8, "learning_rate": 0.00012582592656462763, "loss": 0.5027, "step": 18350 }, { "epoch": 360.0, "learning_rate": 0.00012579611787193057, "loss": 0.4973, "step": 18360 }, { "epoch": 360.0, "eval_loss": 0.501369833946228, "eval_runtime": 2.3093, "eval_samples_per_second": 986.882, "eval_steps_per_second": 3.897, "step": 18360 }, { "epoch": 360.2, "learning_rate": 0.00012576629434789995, "loss": 0.4983, "step": 18370 }, { "epoch": 360.39, "learning_rate": 0.0001257364560012436, "loss": 0.5008, "step": 18380 }, { "epoch": 360.59, "learning_rate": 0.00012570660284067363, "loss": 0.5024, "step": 18390 }, { "epoch": 360.78, "learning_rate": 0.00012567673487490647, "loss": 0.5037, "step": 18400 }, { "epoch": 360.98, "learning_rate": 0.00012564685211266294, "loss": 0.4988, "step": 18410 }, { "epoch": 361.0, "eval_loss": 0.5028321743011475, "eval_runtime": 2.297, "eval_samples_per_second": 992.185, "eval_steps_per_second": 3.918, "step": 18411 }, { "epoch": 361.18, "learning_rate": 0.00012561695456266817, "loss": 0.5011, "step": 18420 }, { "epoch": 361.37, "learning_rate": 0.00012558704223365147, "loss": 0.5029, "step": 18430 }, { "epoch": 361.57, "learning_rate": 0.00012555711513434668, "loss": 0.5038, "step": 18440 }, { "epoch": 361.76, "learning_rate": 0.00012552717327349178, "loss": 0.5029, "step": 18450 }, { "epoch": 361.96, "learning_rate": 0.00012549721665982915, "loss": 0.5013, "step": 18460 }, { "epoch": 362.0, "eval_loss": 0.5034978985786438, "eval_runtime": 2.1156, "eval_samples_per_second": 1077.26, "eval_steps_per_second": 4.254, "step": 18462 }, { "epoch": 362.16, "learning_rate": 0.00012546724530210546, "loss": 0.4995, "step": 18470 }, { "epoch": 362.35, "learning_rate": 0.00012543725920907169, "loss": 0.498, "step": 18480 }, { "epoch": 362.55, "learning_rate": 0.00012540725838948308, "loss": 0.5007, "step": 18490 }, { "epoch": 362.75, "learning_rate": 0.0001253772428520992, "loss": 0.4995, "step": 18500 }, { "epoch": 362.94, "learning_rate": 0.00012534721260568392, "loss": 0.5001, "step": 18510 }, { "epoch": 363.0, "eval_loss": 0.5039771199226379, "eval_runtime": 2.2553, "eval_samples_per_second": 1010.502, "eval_steps_per_second": 3.991, "step": 18513 }, { "epoch": 363.14, "learning_rate": 0.00012531716765900545, "loss": 0.4966, "step": 18520 }, { "epoch": 363.33, "learning_rate": 0.00012528710802083617, "loss": 0.5034, "step": 18530 }, { "epoch": 363.53, "learning_rate": 0.00012525703369995286, "loss": 0.4964, "step": 18540 }, { "epoch": 363.73, "learning_rate": 0.00012522694470513658, "loss": 0.5035, "step": 18550 }, { "epoch": 363.92, "learning_rate": 0.00012519684104517258, "loss": 0.4972, "step": 18560 }, { "epoch": 364.0, "eval_loss": 0.5055744051933289, "eval_runtime": 2.1783, "eval_samples_per_second": 1046.212, "eval_steps_per_second": 4.132, "step": 18564 }, { "epoch": 364.12, "learning_rate": 0.0001251667227288505, "loss": 0.5019, "step": 18570 }, { "epoch": 364.31, "learning_rate": 0.00012513658976496424, "loss": 0.4983, "step": 18580 }, { "epoch": 364.51, "learning_rate": 0.00012510644216231188, "loss": 0.4998, "step": 18590 }, { "epoch": 364.71, "learning_rate": 0.0001250762799296959, "loss": 0.4977, "step": 18600 }, { "epoch": 364.9, "learning_rate": 0.000125046103075923, "loss": 0.4994, "step": 18610 }, { "epoch": 365.0, "eval_loss": 0.5070434808731079, "eval_runtime": 2.2145, "eval_samples_per_second": 1029.113, "eval_steps_per_second": 4.064, "step": 18615 }, { "epoch": 365.1, "learning_rate": 0.00012501591160980416, "loss": 0.4925, "step": 18620 }, { "epoch": 365.29, "learning_rate": 0.00012498570554015458, "loss": 0.5013, "step": 18630 }, { "epoch": 365.49, "learning_rate": 0.00012495548487579376, "loss": 0.4956, "step": 18640 }, { "epoch": 365.69, "learning_rate": 0.00012492524962554548, "loss": 0.4945, "step": 18650 }, { "epoch": 365.88, "learning_rate": 0.00012489499979823773, "loss": 0.5005, "step": 18660 }, { "epoch": 366.0, "eval_loss": 0.5070408582687378, "eval_runtime": 2.2321, "eval_samples_per_second": 1021.03, "eval_steps_per_second": 4.032, "step": 18666 }, { "epoch": 366.08, "learning_rate": 0.00012486473540270282, "loss": 0.4997, "step": 18670 }, { "epoch": 366.27, "learning_rate": 0.00012483445644777727, "loss": 0.4993, "step": 18680 }, { "epoch": 366.47, "learning_rate": 0.00012480416294230186, "loss": 0.497, "step": 18690 }, { "epoch": 366.67, "learning_rate": 0.00012477385489512158, "loss": 0.497, "step": 18700 }, { "epoch": 366.86, "learning_rate": 0.00012474353231508578, "loss": 0.4993, "step": 18710 }, { "epoch": 367.0, "eval_loss": 0.505254864692688, "eval_runtime": 2.3073, "eval_samples_per_second": 987.722, "eval_steps_per_second": 3.901, "step": 18717 }, { "epoch": 367.06, "learning_rate": 0.00012471319521104788, "loss": 0.5011, "step": 18720 }, { "epoch": 367.25, "learning_rate": 0.00012468284359186575, "loss": 0.5013, "step": 18730 }, { "epoch": 367.45, "learning_rate": 0.00012465247746640127, "loss": 0.5033, "step": 18740 }, { "epoch": 367.65, "learning_rate": 0.00012462209684352077, "loss": 0.5043, "step": 18750 }, { "epoch": 367.84, "learning_rate": 0.00012459170173209467, "loss": 0.4975, "step": 18760 }, { "epoch": 368.0, "eval_loss": 0.5035672187805176, "eval_runtime": 2.1149, "eval_samples_per_second": 1077.607, "eval_steps_per_second": 4.256, "step": 18768 }, { "epoch": 368.04, "learning_rate": 0.00012456129214099762, "loss": 0.5014, "step": 18770 }, { "epoch": 368.24, "learning_rate": 0.00012453086807910862, "loss": 0.5, "step": 18780 }, { "epoch": 368.43, "learning_rate": 0.0001245004295553108, "loss": 0.4988, "step": 18790 }, { "epoch": 368.63, "learning_rate": 0.0001244699765784915, "loss": 0.501, "step": 18800 }, { "epoch": 368.82, "learning_rate": 0.00012443950915754233, "loss": 0.4967, "step": 18810 }, { "epoch": 369.0, "eval_loss": 0.5026499629020691, "eval_runtime": 2.1523, "eval_samples_per_second": 1058.877, "eval_steps_per_second": 4.182, "step": 18819 }, { "epoch": 369.02, "learning_rate": 0.00012440902730135908, "loss": 0.5034, "step": 18820 }, { "epoch": 369.22, "learning_rate": 0.00012437853101884182, "loss": 0.5019, "step": 18830 }, { "epoch": 369.41, "learning_rate": 0.00012434802031889474, "loss": 0.5041, "step": 18840 }, { "epoch": 369.61, "learning_rate": 0.00012431749521042628, "loss": 0.5081, "step": 18850 }, { "epoch": 369.8, "learning_rate": 0.00012428695570234908, "loss": 0.5021, "step": 18860 }, { "epoch": 370.0, "learning_rate": 0.00012425640180358007, "loss": 0.4968, "step": 18870 }, { "epoch": 370.0, "eval_loss": 0.5011078119277954, "eval_runtime": 2.1561, "eval_samples_per_second": 1057.002, "eval_steps_per_second": 4.174, "step": 18870 }, { "epoch": 370.2, "learning_rate": 0.00012422583352304025, "loss": 0.5004, "step": 18880 }, { "epoch": 370.39, "learning_rate": 0.00012419525086965487, "loss": 0.4996, "step": 18890 }, { "epoch": 370.59, "learning_rate": 0.0001241646538523534, "loss": 0.4972, "step": 18900 }, { "epoch": 370.78, "learning_rate": 0.00012413404248006946, "loss": 0.4946, "step": 18910 }, { "epoch": 370.98, "learning_rate": 0.00012410341676174095, "loss": 0.498, "step": 18920 }, { "epoch": 371.0, "eval_loss": 0.4990316927433014, "eval_runtime": 2.3201, "eval_samples_per_second": 982.269, "eval_steps_per_second": 3.879, "step": 18921 }, { "epoch": 371.18, "learning_rate": 0.00012407277670630984, "loss": 0.4921, "step": 18930 }, { "epoch": 371.37, "learning_rate": 0.00012404212232272236, "loss": 0.5002, "step": 18940 }, { "epoch": 371.57, "learning_rate": 0.0001240114536199289, "loss": 0.4923, "step": 18950 }, { "epoch": 371.76, "learning_rate": 0.00012398077060688407, "loss": 0.4992, "step": 18960 }, { "epoch": 371.96, "learning_rate": 0.0001239500732925466, "loss": 0.5022, "step": 18970 }, { "epoch": 372.0, "eval_loss": 0.5031718611717224, "eval_runtime": 2.2742, "eval_samples_per_second": 1002.091, "eval_steps_per_second": 3.957, "step": 18972 }, { "epoch": 372.16, "learning_rate": 0.00012391936168587938, "loss": 0.4934, "step": 18980 }, { "epoch": 372.35, "learning_rate": 0.0001238886357958496, "loss": 0.4988, "step": 18990 }, { "epoch": 372.55, "learning_rate": 0.00012385789563142848, "loss": 0.4995, "step": 19000 }, { "epoch": 372.75, "learning_rate": 0.00012382714120159143, "loss": 0.4984, "step": 19010 }, { "epoch": 372.94, "learning_rate": 0.00012379637251531814, "loss": 0.4959, "step": 19020 }, { "epoch": 373.0, "eval_loss": 0.4971892833709717, "eval_runtime": 2.1232, "eval_samples_per_second": 1073.368, "eval_steps_per_second": 4.239, "step": 19023 }, { "epoch": 373.14, "learning_rate": 0.00012376558958159233, "loss": 0.497, "step": 19030 }, { "epoch": 373.33, "learning_rate": 0.00012373479240940198, "loss": 0.4931, "step": 19040 }, { "epoch": 373.53, "learning_rate": 0.0001237039810077391, "loss": 0.4958, "step": 19050 }, { "epoch": 373.73, "learning_rate": 0.00012367315538559996, "loss": 0.4988, "step": 19060 }, { "epoch": 373.92, "learning_rate": 0.00012364231555198497, "loss": 0.4921, "step": 19070 }, { "epoch": 374.0, "eval_loss": 0.4967401623725891, "eval_runtime": 2.1905, "eval_samples_per_second": 1040.405, "eval_steps_per_second": 4.109, "step": 19074 }, { "epoch": 374.12, "learning_rate": 0.00012361146151589866, "loss": 0.4956, "step": 19080 }, { "epoch": 374.31, "learning_rate": 0.00012358059328634974, "loss": 0.498, "step": 19090 }, { "epoch": 374.51, "learning_rate": 0.00012354971087235106, "loss": 0.4947, "step": 19100 }, { "epoch": 374.71, "learning_rate": 0.00012351881428291953, "loss": 0.4924, "step": 19110 }, { "epoch": 374.9, "learning_rate": 0.00012348790352707632, "loss": 0.4936, "step": 19120 }, { "epoch": 375.0, "eval_loss": 0.49671775102615356, "eval_runtime": 2.1863, "eval_samples_per_second": 1042.405, "eval_steps_per_second": 4.117, "step": 19125 }, { "epoch": 375.1, "learning_rate": 0.00012345697861384667, "loss": 0.4902, "step": 19130 }, { "epoch": 375.29, "learning_rate": 0.00012342603955225995, "loss": 0.5001, "step": 19140 }, { "epoch": 375.49, "learning_rate": 0.0001233950863513497, "loss": 0.4989, "step": 19150 }, { "epoch": 375.69, "learning_rate": 0.0001233641190201535, "loss": 0.4882, "step": 19160 }, { "epoch": 375.88, "learning_rate": 0.00012333313756771324, "loss": 0.496, "step": 19170 }, { "epoch": 376.0, "eval_loss": 0.5000470280647278, "eval_runtime": 2.1974, "eval_samples_per_second": 1037.127, "eval_steps_per_second": 4.096, "step": 19176 }, { "epoch": 376.08, "learning_rate": 0.0001233021420030747, "loss": 0.4939, "step": 19180 }, { "epoch": 376.27, "learning_rate": 0.00012327113233528796, "loss": 0.4955, "step": 19190 }, { "epoch": 376.47, "learning_rate": 0.00012324010857340712, "loss": 0.502, "step": 19200 }, { "epoch": 376.67, "learning_rate": 0.00012320907072649044, "loss": 0.4931, "step": 19210 }, { "epoch": 376.86, "learning_rate": 0.00012317801880360027, "loss": 0.4941, "step": 19220 }, { "epoch": 377.0, "eval_loss": 0.49797841906547546, "eval_runtime": 2.17, "eval_samples_per_second": 1050.218, "eval_steps_per_second": 4.147, "step": 19227 }, { "epoch": 377.06, "learning_rate": 0.00012314695281380307, "loss": 0.4961, "step": 19230 }, { "epoch": 377.25, "learning_rate": 0.00012311587276616945, "loss": 0.4972, "step": 19240 }, { "epoch": 377.45, "learning_rate": 0.0001230847786697741, "loss": 0.4962, "step": 19250 }, { "epoch": 377.65, "learning_rate": 0.0001230536705336957, "loss": 0.498, "step": 19260 }, { "epoch": 377.84, "learning_rate": 0.00012302254836701724, "loss": 0.4937, "step": 19270 }, { "epoch": 378.0, "eval_loss": 0.49754011631011963, "eval_runtime": 2.1141, "eval_samples_per_second": 1077.991, "eval_steps_per_second": 4.257, "step": 19278 }, { "epoch": 378.04, "learning_rate": 0.00012299141217882569, "loss": 0.4955, "step": 19280 }, { "epoch": 378.24, "learning_rate": 0.00012296026197821205, "loss": 0.4938, "step": 19290 }, { "epoch": 378.43, "learning_rate": 0.0001229290977742716, "loss": 0.4989, "step": 19300 }, { "epoch": 378.63, "learning_rate": 0.00012289791957610343, "loss": 0.4934, "step": 19310 }, { "epoch": 378.82, "learning_rate": 0.000122866727392811, "loss": 0.4979, "step": 19320 }, { "epoch": 379.0, "eval_loss": 0.49748286604881287, "eval_runtime": 2.2523, "eval_samples_per_second": 1011.858, "eval_steps_per_second": 3.996, "step": 19329 }, { "epoch": 379.02, "learning_rate": 0.00012283552123350174, "loss": 0.4992, "step": 19330 }, { "epoch": 379.22, "learning_rate": 0.0001228043011072871, "loss": 0.4946, "step": 19340 }, { "epoch": 379.41, "learning_rate": 0.00012277306702328266, "loss": 0.495, "step": 19350 }, { "epoch": 379.61, "learning_rate": 0.0001227418189906081, "loss": 0.4932, "step": 19360 }, { "epoch": 379.8, "learning_rate": 0.00012271055701838714, "loss": 0.5027, "step": 19370 }, { "epoch": 380.0, "learning_rate": 0.00012267928111574762, "loss": 0.4996, "step": 19380 }, { "epoch": 380.0, "eval_loss": 0.4932139813899994, "eval_runtime": 2.2597, "eval_samples_per_second": 1008.552, "eval_steps_per_second": 3.983, "step": 19380 }, { "epoch": 380.2, "learning_rate": 0.0001226479912918213, "loss": 0.4889, "step": 19390 }, { "epoch": 380.39, "learning_rate": 0.0001226166875557442, "loss": 0.4946, "step": 19400 }, { "epoch": 380.59, "learning_rate": 0.00012258536991665629, "loss": 0.4995, "step": 19410 }, { "epoch": 380.78, "learning_rate": 0.00012255403838370163, "loss": 0.4914, "step": 19420 }, { "epoch": 380.98, "learning_rate": 0.0001225226929660283, "loss": 0.4961, "step": 19430 }, { "epoch": 381.0, "eval_loss": 0.49828577041625977, "eval_runtime": 2.1395, "eval_samples_per_second": 1065.208, "eval_steps_per_second": 4.207, "step": 19431 }, { "epoch": 381.18, "learning_rate": 0.0001224913336727885, "loss": 0.4935, "step": 19440 }, { "epoch": 381.37, "learning_rate": 0.00012245996051313843, "loss": 0.4931, "step": 19450 }, { "epoch": 381.57, "learning_rate": 0.00012242857349623835, "loss": 0.49, "step": 19460 }, { "epoch": 381.76, "learning_rate": 0.00012239717263125256, "loss": 0.4936, "step": 19470 }, { "epoch": 381.96, "learning_rate": 0.00012236575792734942, "loss": 0.4903, "step": 19480 }, { "epoch": 382.0, "eval_loss": 0.497437059879303, "eval_runtime": 2.2133, "eval_samples_per_second": 1029.705, "eval_steps_per_second": 4.066, "step": 19482 }, { "epoch": 382.16, "learning_rate": 0.00012233432939370132, "loss": 0.4949, "step": 19490 }, { "epoch": 382.35, "learning_rate": 0.0001223028870394847, "loss": 0.4926, "step": 19500 }, { "epoch": 382.55, "learning_rate": 0.00012227143087388003, "loss": 0.4876, "step": 19510 }, { "epoch": 382.75, "learning_rate": 0.0001222399609060718, "loss": 0.4913, "step": 19520 }, { "epoch": 382.94, "learning_rate": 0.00012220847714524853, "loss": 0.4899, "step": 19530 }, { "epoch": 383.0, "eval_loss": 0.49533191323280334, "eval_runtime": 2.2011, "eval_samples_per_second": 1035.378, "eval_steps_per_second": 4.089, "step": 19533 }, { "epoch": 383.14, "learning_rate": 0.00012217697960060277, "loss": 0.4923, "step": 19540 }, { "epoch": 383.33, "learning_rate": 0.00012214546828133113, "loss": 0.49, "step": 19550 }, { "epoch": 383.53, "learning_rate": 0.00012211394319663421, "loss": 0.4957, "step": 19560 }, { "epoch": 383.73, "learning_rate": 0.00012208240435571664, "loss": 0.4883, "step": 19570 }, { "epoch": 383.92, "learning_rate": 0.00012205085176778698, "loss": 0.4924, "step": 19580 }, { "epoch": 384.0, "eval_loss": 0.4952709674835205, "eval_runtime": 2.2127, "eval_samples_per_second": 1029.974, "eval_steps_per_second": 4.067, "step": 19584 }, { "epoch": 384.12, "learning_rate": 0.000122019285442058, "loss": 0.4929, "step": 19590 }, { "epoch": 384.31, "learning_rate": 0.00012198770538774624, "loss": 0.4935, "step": 19600 }, { "epoch": 384.51, "learning_rate": 0.00012195611161407247, "loss": 0.4876, "step": 19610 }, { "epoch": 384.71, "learning_rate": 0.00012192450413026132, "loss": 0.4953, "step": 19620 }, { "epoch": 384.9, "learning_rate": 0.00012189288294554149, "loss": 0.4895, "step": 19630 }, { "epoch": 385.0, "eval_loss": 0.49638909101486206, "eval_runtime": 2.3077, "eval_samples_per_second": 987.57, "eval_steps_per_second": 3.9, "step": 19635 }, { "epoch": 385.1, "learning_rate": 0.00012186124806914566, "loss": 0.4886, "step": 19640 }, { "epoch": 385.29, "learning_rate": 0.00012182959951031048, "loss": 0.4964, "step": 19650 }, { "epoch": 385.49, "learning_rate": 0.00012179793727827667, "loss": 0.4962, "step": 19660 }, { "epoch": 385.69, "learning_rate": 0.00012176626138228886, "loss": 0.4935, "step": 19670 }, { "epoch": 385.88, "learning_rate": 0.0001217345718315957, "loss": 0.4965, "step": 19680 }, { "epoch": 386.0, "eval_loss": 0.5006343722343445, "eval_runtime": 2.1604, "eval_samples_per_second": 1054.911, "eval_steps_per_second": 4.166, "step": 19686 }, { "epoch": 386.08, "learning_rate": 0.00012170286863544986, "loss": 0.4929, "step": 19690 }, { "epoch": 386.27, "learning_rate": 0.00012167115180310793, "loss": 0.495, "step": 19700 }, { "epoch": 386.47, "learning_rate": 0.0001216394213438306, "loss": 0.4947, "step": 19710 }, { "epoch": 386.67, "learning_rate": 0.00012160767726688234, "loss": 0.4911, "step": 19720 }, { "epoch": 386.86, "learning_rate": 0.00012157591958153181, "loss": 0.4896, "step": 19730 }, { "epoch": 387.0, "eval_loss": 0.49377307295799255, "eval_runtime": 2.2645, "eval_samples_per_second": 1006.384, "eval_steps_per_second": 3.974, "step": 19737 }, { "epoch": 387.06, "learning_rate": 0.00012154414829705148, "loss": 0.4905, "step": 19740 }, { "epoch": 387.25, "learning_rate": 0.00012151236342271788, "loss": 0.4938, "step": 19750 }, { "epoch": 387.45, "learning_rate": 0.0001214805649678115, "loss": 0.4934, "step": 19760 }, { "epoch": 387.65, "learning_rate": 0.00012144875294161676, "loss": 0.4969, "step": 19770 }, { "epoch": 387.84, "learning_rate": 0.00012141692735342209, "loss": 0.497, "step": 19780 }, { "epoch": 388.0, "eval_loss": 0.4956132471561432, "eval_runtime": 2.1319, "eval_samples_per_second": 1069.001, "eval_steps_per_second": 4.222, "step": 19788 }, { "epoch": 388.04, "learning_rate": 0.0001213850882125198, "loss": 0.4898, "step": 19790 }, { "epoch": 388.24, "learning_rate": 0.00012135323552820626, "loss": 0.4936, "step": 19800 }, { "epoch": 388.43, "learning_rate": 0.00012132136930978172, "loss": 0.491, "step": 19810 }, { "epoch": 388.63, "learning_rate": 0.00012128948956655038, "loss": 0.4929, "step": 19820 }, { "epoch": 388.82, "learning_rate": 0.00012125759630782047, "loss": 0.4924, "step": 19830 }, { "epoch": 389.0, "eval_loss": 0.4960061311721802, "eval_runtime": 2.2224, "eval_samples_per_second": 1025.454, "eval_steps_per_second": 4.05, "step": 19839 }, { "epoch": 389.02, "learning_rate": 0.00012122568954290409, "loss": 0.4914, "step": 19840 }, { "epoch": 389.22, "learning_rate": 0.00012119376928111729, "loss": 0.4931, "step": 19850 }, { "epoch": 389.41, "learning_rate": 0.00012116183553178008, "loss": 0.4938, "step": 19860 }, { "epoch": 389.61, "learning_rate": 0.00012112988830421638, "loss": 0.4899, "step": 19870 }, { "epoch": 389.8, "learning_rate": 0.00012109792760775413, "loss": 0.4897, "step": 19880 }, { "epoch": 390.0, "learning_rate": 0.00012106595345172509, "loss": 0.4904, "step": 19890 }, { "epoch": 390.0, "eval_loss": 0.49724245071411133, "eval_runtime": 2.2173, "eval_samples_per_second": 1027.831, "eval_steps_per_second": 4.059, "step": 19890 }, { "epoch": 390.2, "learning_rate": 0.00012103396584546499, "loss": 0.4905, "step": 19900 }, { "epoch": 390.39, "learning_rate": 0.00012100196479831355, "loss": 0.4975, "step": 19910 }, { "epoch": 390.59, "learning_rate": 0.00012096995031961432, "loss": 0.5014, "step": 19920 }, { "epoch": 390.78, "learning_rate": 0.00012093792241871481, "loss": 0.4979, "step": 19930 }, { "epoch": 390.98, "learning_rate": 0.00012090588110496649, "loss": 0.5, "step": 19940 }, { "epoch": 391.0, "eval_loss": 0.4958445131778717, "eval_runtime": 2.2084, "eval_samples_per_second": 1031.971, "eval_steps_per_second": 4.075, "step": 19941 }, { "epoch": 391.18, "learning_rate": 0.00012087382638772467, "loss": 0.4953, "step": 19950 }, { "epoch": 391.37, "learning_rate": 0.00012084175827634866, "loss": 0.4903, "step": 19960 }, { "epoch": 391.57, "learning_rate": 0.00012080967678020158, "loss": 0.4878, "step": 19970 }, { "epoch": 391.76, "learning_rate": 0.00012077758190865055, "loss": 0.491, "step": 19980 }, { "epoch": 391.96, "learning_rate": 0.00012074547367106652, "loss": 0.4961, "step": 19990 }, { "epoch": 392.0, "eval_loss": 0.4906347990036011, "eval_runtime": 2.203, "eval_samples_per_second": 1034.495, "eval_steps_per_second": 4.085, "step": 19992 }, { "epoch": 392.16, "learning_rate": 0.00012071335207682442, "loss": 0.4875, "step": 20000 }, { "epoch": 392.35, "learning_rate": 0.00012068121713530302, "loss": 0.4917, "step": 20010 }, { "epoch": 392.55, "learning_rate": 0.00012064906885588497, "loss": 0.4949, "step": 20020 }, { "epoch": 392.75, "learning_rate": 0.00012061690724795693, "loss": 0.492, "step": 20030 }, { "epoch": 392.94, "learning_rate": 0.0001205847323209093, "loss": 0.491, "step": 20040 }, { "epoch": 393.0, "eval_loss": 0.49177783727645874, "eval_runtime": 2.2703, "eval_samples_per_second": 1003.851, "eval_steps_per_second": 3.964, "step": 20043 }, { "epoch": 393.14, "learning_rate": 0.00012055254408413652, "loss": 0.4926, "step": 20050 }, { "epoch": 393.33, "learning_rate": 0.00012052034254703675, "loss": 0.4889, "step": 20060 }, { "epoch": 393.53, "learning_rate": 0.00012048812771901217, "loss": 0.4897, "step": 20070 }, { "epoch": 393.73, "learning_rate": 0.00012045589960946876, "loss": 0.4903, "step": 20080 }, { "epoch": 393.92, "learning_rate": 0.00012042365822781646, "loss": 0.4878, "step": 20090 }, { "epoch": 394.0, "eval_loss": 0.49539539217948914, "eval_runtime": 2.1871, "eval_samples_per_second": 1042.041, "eval_steps_per_second": 4.115, "step": 20094 }, { "epoch": 394.12, "learning_rate": 0.00012039140358346896, "loss": 0.4904, "step": 20100 }, { "epoch": 394.31, "learning_rate": 0.00012035913568584398, "loss": 0.4898, "step": 20110 }, { "epoch": 394.51, "learning_rate": 0.00012032685454436298, "loss": 0.4889, "step": 20120 }, { "epoch": 394.71, "learning_rate": 0.00012029456016845132, "loss": 0.4908, "step": 20130 }, { "epoch": 394.9, "learning_rate": 0.00012026225256753828, "loss": 0.4881, "step": 20140 }, { "epoch": 395.0, "eval_loss": 0.4915597438812256, "eval_runtime": 2.2004, "eval_samples_per_second": 1035.724, "eval_steps_per_second": 4.09, "step": 20145 }, { "epoch": 395.1, "learning_rate": 0.00012022993175105693, "loss": 0.49, "step": 20150 }, { "epoch": 395.29, "learning_rate": 0.00012019759772844423, "loss": 0.489, "step": 20160 }, { "epoch": 395.49, "learning_rate": 0.00012016525050914098, "loss": 0.4957, "step": 20170 }, { "epoch": 395.69, "learning_rate": 0.0001201328901025919, "loss": 0.4914, "step": 20180 }, { "epoch": 395.88, "learning_rate": 0.00012010051651824546, "loss": 0.49, "step": 20190 }, { "epoch": 396.0, "eval_loss": 0.4946361482143402, "eval_runtime": 2.1514, "eval_samples_per_second": 1059.324, "eval_steps_per_second": 4.183, "step": 20196 }, { "epoch": 396.08, "learning_rate": 0.000120068129765554, "loss": 0.4896, "step": 20200 }, { "epoch": 396.27, "learning_rate": 0.00012003572985397382, "loss": 0.4901, "step": 20210 }, { "epoch": 396.47, "learning_rate": 0.00012000331679296488, "loss": 0.4894, "step": 20220 }, { "epoch": 396.67, "learning_rate": 0.00011997089059199112, "loss": 0.4914, "step": 20230 }, { "epoch": 396.86, "learning_rate": 0.00011993845126052025, "loss": 0.4881, "step": 20240 }, { "epoch": 397.0, "eval_loss": 0.49236392974853516, "eval_runtime": 2.1606, "eval_samples_per_second": 1054.789, "eval_steps_per_second": 4.165, "step": 20247 }, { "epoch": 397.06, "learning_rate": 0.00011990599880802382, "loss": 0.4938, "step": 20250 }, { "epoch": 397.25, "learning_rate": 0.00011987353324397729, "loss": 0.4934, "step": 20260 }, { "epoch": 397.45, "learning_rate": 0.0001198410545778598, "loss": 0.4952, "step": 20270 }, { "epoch": 397.65, "learning_rate": 0.00011980856281915442, "loss": 0.4858, "step": 20280 }, { "epoch": 397.84, "learning_rate": 0.00011977605797734803, "loss": 0.4871, "step": 20290 }, { "epoch": 398.0, "eval_loss": 0.49587199091911316, "eval_runtime": 2.305, "eval_samples_per_second": 988.741, "eval_steps_per_second": 3.905, "step": 20298 }, { "epoch": 398.04, "learning_rate": 0.00011974354006193131, "loss": 0.4878, "step": 20300 }, { "epoch": 398.24, "learning_rate": 0.00011971100908239877, "loss": 0.4952, "step": 20310 }, { "epoch": 398.43, "learning_rate": 0.00011967846504824876, "loss": 0.4868, "step": 20320 }, { "epoch": 398.63, "learning_rate": 0.00011964590796898333, "loss": 0.4938, "step": 20330 }, { "epoch": 398.82, "learning_rate": 0.00011961333785410852, "loss": 0.492, "step": 20340 }, { "epoch": 399.0, "eval_loss": 0.48673364520072937, "eval_runtime": 2.1177, "eval_samples_per_second": 1076.163, "eval_steps_per_second": 4.25, "step": 20349 }, { "epoch": 399.02, "learning_rate": 0.00011958075471313399, "loss": 0.4877, "step": 20350 }, { "epoch": 399.22, "learning_rate": 0.00011954815855557338, "loss": 0.4819, "step": 20360 }, { "epoch": 399.41, "learning_rate": 0.00011951554939094395, "loss": 0.4908, "step": 20370 }, { "epoch": 399.61, "learning_rate": 0.0001194829272287669, "loss": 0.4934, "step": 20380 }, { "epoch": 399.8, "learning_rate": 0.00011945029207856717, "loss": 0.4891, "step": 20390 }, { "epoch": 400.0, "learning_rate": 0.0001194176439498735, "loss": 0.4883, "step": 20400 }, { "epoch": 400.0, "eval_loss": 0.4891131818294525, "eval_runtime": 2.2177, "eval_samples_per_second": 1027.662, "eval_steps_per_second": 4.058, "step": 20400 }, { "epoch": 400.2, "learning_rate": 0.0001193849828522184, "loss": 0.49, "step": 20410 }, { "epoch": 400.39, "learning_rate": 0.00011935230879513817, "loss": 0.4854, "step": 20420 }, { "epoch": 400.59, "learning_rate": 0.00011931962178817295, "loss": 0.4883, "step": 20430 }, { "epoch": 400.78, "learning_rate": 0.00011928692184086658, "loss": 0.487, "step": 20440 }, { "epoch": 400.98, "learning_rate": 0.00011925420896276673, "loss": 0.4864, "step": 20450 }, { "epoch": 401.0, "eval_loss": 0.49455273151397705, "eval_runtime": 2.1545, "eval_samples_per_second": 1057.788, "eval_steps_per_second": 4.177, "step": 20451 }, { "epoch": 401.18, "learning_rate": 0.00011922148316342483, "loss": 0.4938, "step": 20460 }, { "epoch": 401.37, "learning_rate": 0.00011918874445239606, "loss": 0.4971, "step": 20470 }, { "epoch": 401.57, "learning_rate": 0.00011915599283923944, "loss": 0.4905, "step": 20480 }, { "epoch": 401.76, "learning_rate": 0.00011912322833351768, "loss": 0.489, "step": 20490 }, { "epoch": 401.96, "learning_rate": 0.00011909045094479726, "loss": 0.4898, "step": 20500 }, { "epoch": 402.0, "eval_loss": 0.49220773577690125, "eval_runtime": 2.1981, "eval_samples_per_second": 1036.809, "eval_steps_per_second": 4.094, "step": 20502 }, { "epoch": 402.16, "learning_rate": 0.0001190576606826485, "loss": 0.4885, "step": 20510 }, { "epoch": 402.35, "learning_rate": 0.00011902485755664542, "loss": 0.489, "step": 20520 }, { "epoch": 402.55, "learning_rate": 0.00011899204157636577, "loss": 0.4877, "step": 20530 }, { "epoch": 402.75, "learning_rate": 0.00011895921275139109, "loss": 0.4857, "step": 20540 }, { "epoch": 402.94, "learning_rate": 0.00011892637109130667, "loss": 0.4841, "step": 20550 }, { "epoch": 403.0, "eval_loss": 0.49017834663391113, "eval_runtime": 2.2994, "eval_samples_per_second": 991.128, "eval_steps_per_second": 3.914, "step": 20553 }, { "epoch": 403.14, "learning_rate": 0.00011889351660570155, "loss": 0.4864, "step": 20560 }, { "epoch": 403.33, "learning_rate": 0.00011886064930416852, "loss": 0.487, "step": 20570 }, { "epoch": 403.53, "learning_rate": 0.00011882776919630406, "loss": 0.4857, "step": 20580 }, { "epoch": 403.73, "learning_rate": 0.00011879487629170845, "loss": 0.4874, "step": 20590 }, { "epoch": 403.92, "learning_rate": 0.00011876197059998569, "loss": 0.4879, "step": 20600 }, { "epoch": 404.0, "eval_loss": 0.49214035272598267, "eval_runtime": 2.1629, "eval_samples_per_second": 1053.659, "eval_steps_per_second": 4.161, "step": 20604 }, { "epoch": 404.12, "learning_rate": 0.00011872905213074348, "loss": 0.4914, "step": 20610 }, { "epoch": 404.31, "learning_rate": 0.00011869612089359333, "loss": 0.4845, "step": 20620 }, { "epoch": 404.51, "learning_rate": 0.0001186631768981504, "loss": 0.4848, "step": 20630 }, { "epoch": 404.71, "learning_rate": 0.00011863022015403356, "loss": 0.4905, "step": 20640 }, { "epoch": 404.9, "learning_rate": 0.00011859725067086551, "loss": 0.4801, "step": 20650 }, { "epoch": 405.0, "eval_loss": 0.49142250418663025, "eval_runtime": 2.2732, "eval_samples_per_second": 1002.561, "eval_steps_per_second": 3.959, "step": 20655 }, { "epoch": 405.1, "learning_rate": 0.00011856426845827259, "loss": 0.4882, "step": 20660 }, { "epoch": 405.29, "learning_rate": 0.00011853127352588484, "loss": 0.4859, "step": 20670 }, { "epoch": 405.49, "learning_rate": 0.00011849826588333606, "loss": 0.4866, "step": 20680 }, { "epoch": 405.69, "learning_rate": 0.00011846524554026375, "loss": 0.4828, "step": 20690 }, { "epoch": 405.88, "learning_rate": 0.00011843221250630909, "loss": 0.4877, "step": 20700 }, { "epoch": 406.0, "eval_loss": 0.48823556303977966, "eval_runtime": 2.2635, "eval_samples_per_second": 1006.835, "eval_steps_per_second": 3.976, "step": 20706 }, { "epoch": 406.08, "learning_rate": 0.00011839916679111705, "loss": 0.4797, "step": 20710 }, { "epoch": 406.27, "learning_rate": 0.00011836610840433619, "loss": 0.4864, "step": 20720 }, { "epoch": 406.47, "learning_rate": 0.00011833303735561884, "loss": 0.487, "step": 20730 }, { "epoch": 406.67, "learning_rate": 0.00011829995365462098, "loss": 0.4845, "step": 20740 }, { "epoch": 406.86, "learning_rate": 0.00011826685731100235, "loss": 0.4858, "step": 20750 }, { "epoch": 407.0, "eval_loss": 0.48820948600769043, "eval_runtime": 2.3072, "eval_samples_per_second": 987.76, "eval_steps_per_second": 3.901, "step": 20757 }, { "epoch": 407.06, "learning_rate": 0.00011823374833442632, "loss": 0.4859, "step": 20760 }, { "epoch": 407.25, "learning_rate": 0.00011820062673455999, "loss": 0.4768, "step": 20770 }, { "epoch": 407.45, "learning_rate": 0.00011816749252107412, "loss": 0.4855, "step": 20780 }, { "epoch": 407.65, "learning_rate": 0.00011813434570364315, "loss": 0.4875, "step": 20790 }, { "epoch": 407.84, "learning_rate": 0.00011810118629194525, "loss": 0.4856, "step": 20800 }, { "epoch": 408.0, "eval_loss": 0.48716312646865845, "eval_runtime": 2.1662, "eval_samples_per_second": 1052.052, "eval_steps_per_second": 4.155, "step": 20808 }, { "epoch": 408.04, "learning_rate": 0.00011806801429566218, "loss": 0.4819, "step": 20810 }, { "epoch": 408.24, "learning_rate": 0.00011803482972447946, "loss": 0.4848, "step": 20820 }, { "epoch": 408.43, "learning_rate": 0.00011800163258808624, "loss": 0.4851, "step": 20830 }, { "epoch": 408.63, "learning_rate": 0.00011796842289617532, "loss": 0.4844, "step": 20840 }, { "epoch": 408.82, "learning_rate": 0.00011793520065844319, "loss": 0.4825, "step": 20850 }, { "epoch": 409.0, "eval_loss": 0.4870782792568207, "eval_runtime": 2.1607, "eval_samples_per_second": 1054.746, "eval_steps_per_second": 4.165, "step": 20859 }, { "epoch": 409.02, "learning_rate": 0.00011790196588459002, "loss": 0.4766, "step": 20860 }, { "epoch": 409.22, "learning_rate": 0.00011786871858431966, "loss": 0.4798, "step": 20870 }, { "epoch": 409.41, "learning_rate": 0.00011783545876733949, "loss": 0.4842, "step": 20880 }, { "epoch": 409.61, "learning_rate": 0.0001178021864433607, "loss": 0.483, "step": 20890 }, { "epoch": 409.8, "learning_rate": 0.00011776890162209804, "loss": 0.4919, "step": 20900 }, { "epoch": 410.0, "learning_rate": 0.00011773560431326995, "loss": 0.4865, "step": 20910 }, { "epoch": 410.0, "eval_loss": 0.4852657616138458, "eval_runtime": 2.2662, "eval_samples_per_second": 1005.667, "eval_steps_per_second": 3.971, "step": 20910 }, { "epoch": 410.2, "learning_rate": 0.00011770229452659851, "loss": 0.4851, "step": 20920 }, { "epoch": 410.39, "learning_rate": 0.00011766897227180941, "loss": 0.4838, "step": 20930 }, { "epoch": 410.59, "learning_rate": 0.00011763563755863201, "loss": 0.485, "step": 20940 }, { "epoch": 410.78, "learning_rate": 0.00011760229039679933, "loss": 0.4842, "step": 20950 }, { "epoch": 410.98, "learning_rate": 0.00011756893079604795, "loss": 0.4834, "step": 20960 }, { "epoch": 411.0, "eval_loss": 0.4907666742801666, "eval_runtime": 2.2355, "eval_samples_per_second": 1019.481, "eval_steps_per_second": 4.026, "step": 20961 }, { "epoch": 411.18, "learning_rate": 0.00011753555876611818, "loss": 0.4817, "step": 20970 }, { "epoch": 411.37, "learning_rate": 0.00011750217431675389, "loss": 0.486, "step": 20980 }, { "epoch": 411.57, "learning_rate": 0.00011746877745770258, "loss": 0.4862, "step": 20990 }, { "epoch": 411.76, "learning_rate": 0.00011743536819871539, "loss": 0.4838, "step": 21000 }, { "epoch": 411.96, "learning_rate": 0.0001174019465495471, "loss": 0.4815, "step": 21010 }, { "epoch": 412.0, "eval_loss": 0.4847215414047241, "eval_runtime": 2.1743, "eval_samples_per_second": 1048.158, "eval_steps_per_second": 4.139, "step": 21012 }, { "epoch": 412.16, "learning_rate": 0.00011736851251995606, "loss": 0.4849, "step": 21020 }, { "epoch": 412.35, "learning_rate": 0.00011733506611970429, "loss": 0.4762, "step": 21030 }, { "epoch": 412.55, "learning_rate": 0.00011730160735855736, "loss": 0.4838, "step": 21040 }, { "epoch": 412.75, "learning_rate": 0.00011726813624628451, "loss": 0.48, "step": 21050 }, { "epoch": 412.94, "learning_rate": 0.00011723465279265853, "loss": 0.4828, "step": 21060 }, { "epoch": 413.0, "eval_loss": 0.49191340804100037, "eval_runtime": 2.3099, "eval_samples_per_second": 986.634, "eval_steps_per_second": 3.896, "step": 21063 }, { "epoch": 413.14, "learning_rate": 0.00011720115700745588, "loss": 0.4873, "step": 21070 }, { "epoch": 413.33, "learning_rate": 0.00011716764890045656, "loss": 0.486, "step": 21080 }, { "epoch": 413.53, "learning_rate": 0.00011713412848144419, "loss": 0.4863, "step": 21090 }, { "epoch": 413.73, "learning_rate": 0.00011710059576020595, "loss": 0.4863, "step": 21100 }, { "epoch": 413.92, "learning_rate": 0.00011706705074653273, "loss": 0.487, "step": 21110 }, { "epoch": 414.0, "eval_loss": 0.4898955821990967, "eval_runtime": 2.2311, "eval_samples_per_second": 1021.471, "eval_steps_per_second": 4.034, "step": 21114 }, { "epoch": 414.12, "learning_rate": 0.00011703349345021887, "loss": 0.4859, "step": 21120 }, { "epoch": 414.31, "learning_rate": 0.00011699992388106235, "loss": 0.4833, "step": 21130 }, { "epoch": 414.51, "learning_rate": 0.00011696634204886474, "loss": 0.4849, "step": 21140 }, { "epoch": 414.71, "learning_rate": 0.00011693274796343119, "loss": 0.4819, "step": 21150 }, { "epoch": 414.9, "learning_rate": 0.00011689914163457044, "loss": 0.4842, "step": 21160 }, { "epoch": 415.0, "eval_loss": 0.48760807514190674, "eval_runtime": 2.207, "eval_samples_per_second": 1032.62, "eval_steps_per_second": 4.078, "step": 21165 }, { "epoch": 415.1, "learning_rate": 0.00011686552307209477, "loss": 0.4853, "step": 21170 }, { "epoch": 415.29, "learning_rate": 0.00011683189228582005, "loss": 0.4834, "step": 21180 }, { "epoch": 415.49, "learning_rate": 0.00011679824928556573, "loss": 0.4834, "step": 21190 }, { "epoch": 415.69, "learning_rate": 0.00011676459408115479, "loss": 0.4787, "step": 21200 }, { "epoch": 415.88, "learning_rate": 0.00011673092668241384, "loss": 0.4902, "step": 21210 }, { "epoch": 416.0, "eval_loss": 0.48727309703826904, "eval_runtime": 2.1481, "eval_samples_per_second": 1060.942, "eval_steps_per_second": 4.19, "step": 21216 }, { "epoch": 416.08, "learning_rate": 0.000116697247099173, "loss": 0.4826, "step": 21220 }, { "epoch": 416.27, "learning_rate": 0.00011666355534126592, "loss": 0.4854, "step": 21230 }, { "epoch": 416.47, "learning_rate": 0.00011662985141852987, "loss": 0.4849, "step": 21240 }, { "epoch": 416.67, "learning_rate": 0.00011659613534080564, "loss": 0.4814, "step": 21250 }, { "epoch": 416.86, "learning_rate": 0.00011656240711793759, "loss": 0.4809, "step": 21260 }, { "epoch": 417.0, "eval_loss": 0.49133971333503723, "eval_runtime": 2.1325, "eval_samples_per_second": 1068.686, "eval_steps_per_second": 4.22, "step": 21267 }, { "epoch": 417.06, "learning_rate": 0.0001165286667597736, "loss": 0.4785, "step": 21270 }, { "epoch": 417.25, "learning_rate": 0.00011649491427616508, "loss": 0.4871, "step": 21280 }, { "epoch": 417.45, "learning_rate": 0.00011646114967696701, "loss": 0.4778, "step": 21290 }, { "epoch": 417.65, "learning_rate": 0.00011642737297203793, "loss": 0.4808, "step": 21300 }, { "epoch": 417.84, "learning_rate": 0.00011639358417123985, "loss": 0.4825, "step": 21310 }, { "epoch": 418.0, "eval_loss": 0.48316019773483276, "eval_runtime": 2.1954, "eval_samples_per_second": 1038.08, "eval_steps_per_second": 4.099, "step": 21318 }, { "epoch": 418.04, "learning_rate": 0.00011635978328443837, "loss": 0.4785, "step": 21320 }, { "epoch": 418.24, "learning_rate": 0.00011632597032150254, "loss": 0.4855, "step": 21330 }, { "epoch": 418.43, "learning_rate": 0.0001162921452923051, "loss": 0.4795, "step": 21340 }, { "epoch": 418.63, "learning_rate": 0.0001162583082067221, "loss": 0.4841, "step": 21350 }, { "epoch": 418.82, "learning_rate": 0.00011622445907463325, "loss": 0.4797, "step": 21360 }, { "epoch": 419.0, "eval_loss": 0.4872037172317505, "eval_runtime": 2.1935, "eval_samples_per_second": 1038.999, "eval_steps_per_second": 4.103, "step": 21369 }, { "epoch": 419.02, "learning_rate": 0.00011619059790592175, "loss": 0.483, "step": 21370 }, { "epoch": 419.22, "learning_rate": 0.0001161567247104743, "loss": 0.4835, "step": 21380 }, { "epoch": 419.41, "learning_rate": 0.00011612283949818115, "loss": 0.4799, "step": 21390 }, { "epoch": 419.61, "learning_rate": 0.00011608894227893595, "loss": 0.4868, "step": 21400 }, { "epoch": 419.8, "learning_rate": 0.00011605503306263599, "loss": 0.4777, "step": 21410 }, { "epoch": 420.0, "learning_rate": 0.00011602111185918203, "loss": 0.4852, "step": 21420 }, { "epoch": 420.0, "eval_loss": 0.4868069887161255, "eval_runtime": 2.1839, "eval_samples_per_second": 1043.561, "eval_steps_per_second": 4.121, "step": 21420 }, { "epoch": 420.2, "learning_rate": 0.00011598717867847822, "loss": 0.4811, "step": 21430 }, { "epoch": 420.39, "learning_rate": 0.00011595323353043236, "loss": 0.4864, "step": 21440 }, { "epoch": 420.59, "learning_rate": 0.00011591927642495564, "loss": 0.4823, "step": 21450 }, { "epoch": 420.78, "learning_rate": 0.0001158853073719628, "loss": 0.4804, "step": 21460 }, { "epoch": 420.98, "learning_rate": 0.00011585132638137203, "loss": 0.4879, "step": 21470 }, { "epoch": 421.0, "eval_loss": 0.48333823680877686, "eval_runtime": 2.2861, "eval_samples_per_second": 996.882, "eval_steps_per_second": 3.937, "step": 21471 }, { "epoch": 421.18, "learning_rate": 0.00011581733346310504, "loss": 0.4874, "step": 21480 }, { "epoch": 421.37, "learning_rate": 0.000115783328627087, "loss": 0.4826, "step": 21490 }, { "epoch": 421.57, "learning_rate": 0.00011574931188324656, "loss": 0.4757, "step": 21500 }, { "epoch": 421.76, "learning_rate": 0.00011571528324151581, "loss": 0.481, "step": 21510 }, { "epoch": 421.96, "learning_rate": 0.00011568124271183042, "loss": 0.4823, "step": 21520 }, { "epoch": 422.0, "eval_loss": 0.4823528826236725, "eval_runtime": 2.3957, "eval_samples_per_second": 951.291, "eval_steps_per_second": 3.757, "step": 21522 }, { "epoch": 422.16, "learning_rate": 0.00011564719030412944, "loss": 0.4827, "step": 21530 }, { "epoch": 422.35, "learning_rate": 0.00011561312602835541, "loss": 0.4851, "step": 21540 }, { "epoch": 422.55, "learning_rate": 0.00011557904989445434, "loss": 0.4831, "step": 21550 }, { "epoch": 422.75, "learning_rate": 0.0001155449619123757, "loss": 0.478, "step": 21560 }, { "epoch": 422.94, "learning_rate": 0.00011551086209207242, "loss": 0.4729, "step": 21570 }, { "epoch": 423.0, "eval_loss": 0.47928565740585327, "eval_runtime": 2.2457, "eval_samples_per_second": 1014.839, "eval_steps_per_second": 4.008, "step": 21573 }, { "epoch": 423.14, "learning_rate": 0.0001154767504435009, "loss": 0.4828, "step": 21580 }, { "epoch": 423.33, "learning_rate": 0.00011544262697662093, "loss": 0.4811, "step": 21590 }, { "epoch": 423.53, "learning_rate": 0.00011540849170139588, "loss": 0.4806, "step": 21600 }, { "epoch": 423.73, "learning_rate": 0.00011537434462779246, "loss": 0.4799, "step": 21610 }, { "epoch": 423.92, "learning_rate": 0.00011534018576578084, "loss": 0.4825, "step": 21620 }, { "epoch": 424.0, "eval_loss": 0.4812348186969757, "eval_runtime": 2.2984, "eval_samples_per_second": 991.549, "eval_steps_per_second": 3.916, "step": 21624 }, { "epoch": 424.12, "learning_rate": 0.00011530601512533462, "loss": 0.4796, "step": 21630 }, { "epoch": 424.31, "learning_rate": 0.00011527183271643091, "loss": 0.4806, "step": 21640 }, { "epoch": 424.51, "learning_rate": 0.0001152376385490502, "loss": 0.4799, "step": 21650 }, { "epoch": 424.71, "learning_rate": 0.00011520343263317641, "loss": 0.4829, "step": 21660 }, { "epoch": 424.9, "learning_rate": 0.00011516921497879693, "loss": 0.4739, "step": 21670 }, { "epoch": 425.0, "eval_loss": 0.4831399917602539, "eval_runtime": 2.2851, "eval_samples_per_second": 997.312, "eval_steps_per_second": 3.938, "step": 21675 }, { "epoch": 425.1, "learning_rate": 0.00011513498559590251, "loss": 0.4753, "step": 21680 }, { "epoch": 425.29, "learning_rate": 0.00011510074449448743, "loss": 0.4875, "step": 21690 }, { "epoch": 425.49, "learning_rate": 0.00011506649168454926, "loss": 0.4773, "step": 21700 }, { "epoch": 425.69, "learning_rate": 0.0001150322271760891, "loss": 0.478, "step": 21710 }, { "epoch": 425.88, "learning_rate": 0.00011499795097911141, "loss": 0.4767, "step": 21720 }, { "epoch": 426.0, "eval_loss": 0.4847799241542816, "eval_runtime": 2.3019, "eval_samples_per_second": 990.034, "eval_steps_per_second": 3.91, "step": 21726 }, { "epoch": 426.08, "learning_rate": 0.00011496366310362408, "loss": 0.4833, "step": 21730 }, { "epoch": 426.27, "learning_rate": 0.00011492936355963839, "loss": 0.4844, "step": 21740 }, { "epoch": 426.47, "learning_rate": 0.00011489505235716906, "loss": 0.4805, "step": 21750 }, { "epoch": 426.67, "learning_rate": 0.00011486072950623418, "loss": 0.4808, "step": 21760 }, { "epoch": 426.86, "learning_rate": 0.00011482639501685529, "loss": 0.4806, "step": 21770 }, { "epoch": 427.0, "eval_loss": 0.48580941557884216, "eval_runtime": 2.3158, "eval_samples_per_second": 984.13, "eval_steps_per_second": 3.886, "step": 21777 }, { "epoch": 427.06, "learning_rate": 0.00011479204889905722, "loss": 0.4783, "step": 21780 }, { "epoch": 427.25, "learning_rate": 0.00011475769116286837, "loss": 0.4786, "step": 21790 }, { "epoch": 427.45, "learning_rate": 0.00011472332181832034, "loss": 0.4772, "step": 21800 }, { "epoch": 427.65, "learning_rate": 0.00011468894087544828, "loss": 0.4754, "step": 21810 }, { "epoch": 427.84, "learning_rate": 0.00011465454834429066, "loss": 0.4736, "step": 21820 }, { "epoch": 428.0, "eval_loss": 0.48313280940055847, "eval_runtime": 2.1586, "eval_samples_per_second": 1055.778, "eval_steps_per_second": 4.169, "step": 21828 }, { "epoch": 428.04, "learning_rate": 0.00011462014423488926, "loss": 0.4773, "step": 21830 }, { "epoch": 428.24, "learning_rate": 0.00011458572855728937, "loss": 0.4773, "step": 21840 }, { "epoch": 428.43, "learning_rate": 0.00011455130132153959, "loss": 0.4773, "step": 21850 }, { "epoch": 428.63, "learning_rate": 0.00011451686253769192, "loss": 0.4753, "step": 21860 }, { "epoch": 428.82, "learning_rate": 0.00011448241221580167, "loss": 0.4857, "step": 21870 }, { "epoch": 429.0, "eval_loss": 0.4785289764404297, "eval_runtime": 2.2717, "eval_samples_per_second": 1003.196, "eval_steps_per_second": 3.962, "step": 21879 }, { "epoch": 429.02, "learning_rate": 0.00011444795036592761, "loss": 0.4738, "step": 21880 }, { "epoch": 429.22, "learning_rate": 0.00011441347699813185, "loss": 0.4752, "step": 21890 }, { "epoch": 429.41, "learning_rate": 0.00011437899212247977, "loss": 0.4768, "step": 21900 }, { "epoch": 429.61, "learning_rate": 0.00011434449574904024, "loss": 0.4774, "step": 21910 }, { "epoch": 429.8, "learning_rate": 0.0001143099878878854, "loss": 0.4768, "step": 21920 }, { "epoch": 430.0, "learning_rate": 0.00011427546854909084, "loss": 0.4819, "step": 21930 }, { "epoch": 430.0, "eval_loss": 0.48050355911254883, "eval_runtime": 2.1897, "eval_samples_per_second": 1040.776, "eval_steps_per_second": 4.11, "step": 21930 }, { "epoch": 430.2, "learning_rate": 0.00011424093774273535, "loss": 0.477, "step": 21940 }, { "epoch": 430.39, "learning_rate": 0.00011420639547890122, "loss": 0.4774, "step": 21950 }, { "epoch": 430.59, "learning_rate": 0.000114171841767674, "loss": 0.4751, "step": 21960 }, { "epoch": 430.78, "learning_rate": 0.00011413727661914259, "loss": 0.4773, "step": 21970 }, { "epoch": 430.98, "learning_rate": 0.00011410270004339924, "loss": 0.4767, "step": 21980 }, { "epoch": 431.0, "eval_loss": 0.48454973101615906, "eval_runtime": 2.2681, "eval_samples_per_second": 1004.809, "eval_steps_per_second": 3.968, "step": 21981 }, { "epoch": 431.18, "learning_rate": 0.00011406811205053956, "loss": 0.4798, "step": 21990 }, { "epoch": 431.37, "learning_rate": 0.00011403351265066249, "loss": 0.4791, "step": 22000 }, { "epoch": 431.57, "learning_rate": 0.00011399890185387023, "loss": 0.4769, "step": 22010 }, { "epoch": 431.76, "learning_rate": 0.0001139642796702684, "loss": 0.4805, "step": 22020 }, { "epoch": 431.96, "learning_rate": 0.0001139296461099659, "loss": 0.4765, "step": 22030 }, { "epoch": 432.0, "eval_loss": 0.4803260564804077, "eval_runtime": 2.1961, "eval_samples_per_second": 1037.757, "eval_steps_per_second": 4.098, "step": 22032 }, { "epoch": 432.16, "learning_rate": 0.00011389500118307494, "loss": 0.4787, "step": 22040 }, { "epoch": 432.35, "learning_rate": 0.00011386034489971108, "loss": 0.473, "step": 22050 }, { "epoch": 432.55, "learning_rate": 0.00011382567726999318, "loss": 0.4783, "step": 22060 }, { "epoch": 432.75, "learning_rate": 0.00011379099830404341, "loss": 0.4726, "step": 22070 }, { "epoch": 432.94, "learning_rate": 0.00011375630801198725, "loss": 0.4785, "step": 22080 }, { "epoch": 433.0, "eval_loss": 0.4825577139854431, "eval_runtime": 2.2219, "eval_samples_per_second": 1025.707, "eval_steps_per_second": 4.051, "step": 22083 }, { "epoch": 433.14, "learning_rate": 0.00011372160640395352, "loss": 0.4733, "step": 22090 }, { "epoch": 433.33, "learning_rate": 0.0001136868934900743, "loss": 0.4737, "step": 22100 }, { "epoch": 433.53, "learning_rate": 0.00011365216928048498, "loss": 0.4766, "step": 22110 }, { "epoch": 433.73, "learning_rate": 0.00011361743378532422, "loss": 0.4767, "step": 22120 }, { "epoch": 433.92, "learning_rate": 0.00011358268701473408, "loss": 0.4758, "step": 22130 }, { "epoch": 434.0, "eval_loss": 0.48143434524536133, "eval_runtime": 2.319, "eval_samples_per_second": 982.761, "eval_steps_per_second": 3.881, "step": 22134 }, { "epoch": 434.12, "learning_rate": 0.00011354792897885981, "loss": 0.4729, "step": 22140 }, { "epoch": 434.31, "learning_rate": 0.00011351315968784996, "loss": 0.4798, "step": 22150 }, { "epoch": 434.51, "learning_rate": 0.00011347837915185645, "loss": 0.4768, "step": 22160 }, { "epoch": 434.71, "learning_rate": 0.00011344358738103432, "loss": 0.4777, "step": 22170 }, { "epoch": 434.9, "learning_rate": 0.0001134087843855421, "loss": 0.4677, "step": 22180 }, { "epoch": 435.0, "eval_loss": 0.4814690947532654, "eval_runtime": 2.3045, "eval_samples_per_second": 988.926, "eval_steps_per_second": 3.905, "step": 22185 }, { "epoch": 435.1, "learning_rate": 0.00011337397017554141, "loss": 0.4754, "step": 22190 }, { "epoch": 435.29, "learning_rate": 0.00011333914476119726, "loss": 0.4744, "step": 22200 }, { "epoch": 435.49, "learning_rate": 0.00011330430815267787, "loss": 0.4691, "step": 22210 }, { "epoch": 435.69, "learning_rate": 0.0001132694603601548, "loss": 0.4759, "step": 22220 }, { "epoch": 435.88, "learning_rate": 0.00011323460139380279, "loss": 0.4735, "step": 22230 }, { "epoch": 436.0, "eval_loss": 0.48106876015663147, "eval_runtime": 2.1513, "eval_samples_per_second": 1059.348, "eval_steps_per_second": 4.183, "step": 22236 }, { "epoch": 436.08, "learning_rate": 0.00011319973126379986, "loss": 0.4761, "step": 22240 }, { "epoch": 436.27, "learning_rate": 0.00011316484998032736, "loss": 0.4761, "step": 22250 }, { "epoch": 436.47, "learning_rate": 0.00011312995755356982, "loss": 0.476, "step": 22260 }, { "epoch": 436.67, "learning_rate": 0.00011309505399371506, "loss": 0.4783, "step": 22270 }, { "epoch": 436.86, "learning_rate": 0.00011306013931095412, "loss": 0.4764, "step": 22280 }, { "epoch": 437.0, "eval_loss": 0.47487953305244446, "eval_runtime": 2.3105, "eval_samples_per_second": 986.353, "eval_steps_per_second": 3.895, "step": 22287 }, { "epoch": 437.06, "learning_rate": 0.00011302521351548133, "loss": 0.4741, "step": 22290 }, { "epoch": 437.25, "learning_rate": 0.00011299027661749425, "loss": 0.4758, "step": 22300 }, { "epoch": 437.45, "learning_rate": 0.00011295532862719366, "loss": 0.4735, "step": 22310 }, { "epoch": 437.65, "learning_rate": 0.00011292036955478361, "loss": 0.4778, "step": 22320 }, { "epoch": 437.84, "learning_rate": 0.0001128853994104713, "loss": 0.4743, "step": 22330 }, { "epoch": 438.0, "eval_loss": 0.4845726490020752, "eval_runtime": 2.2198, "eval_samples_per_second": 1026.683, "eval_steps_per_second": 4.054, "step": 22338 }, { "epoch": 438.04, "learning_rate": 0.00011285041820446735, "loss": 0.4728, "step": 22340 }, { "epoch": 438.24, "learning_rate": 0.0001128154259469854, "loss": 0.479, "step": 22350 }, { "epoch": 438.43, "learning_rate": 0.00011278042264824247, "loss": 0.4758, "step": 22360 }, { "epoch": 438.63, "learning_rate": 0.0001127454083184587, "loss": 0.4799, "step": 22370 }, { "epoch": 438.82, "learning_rate": 0.00011271038296785748, "loss": 0.4736, "step": 22380 }, { "epoch": 439.0, "eval_loss": 0.4824729263782501, "eval_runtime": 2.1493, "eval_samples_per_second": 1060.367, "eval_steps_per_second": 4.187, "step": 22389 }, { "epoch": 439.02, "learning_rate": 0.00011267534660666548, "loss": 0.474, "step": 22390 }, { "epoch": 439.22, "learning_rate": 0.0001126402992451125, "loss": 0.4794, "step": 22400 }, { "epoch": 439.41, "learning_rate": 0.0001126052408934316, "loss": 0.4767, "step": 22410 }, { "epoch": 439.61, "learning_rate": 0.00011257017156185904, "loss": 0.4762, "step": 22420 }, { "epoch": 439.8, "learning_rate": 0.00011253509126063428, "loss": 0.4721, "step": 22430 }, { "epoch": 440.0, "learning_rate": 0.0001125, "loss": 0.4732, "step": 22440 }, { "epoch": 440.0, "eval_loss": 0.47832006216049194, "eval_runtime": 2.181, "eval_samples_per_second": 1044.937, "eval_steps_per_second": 4.127, "step": 22440 }, { "epoch": 440.2, "learning_rate": 0.00011246489779020203, "loss": 0.4754, "step": 22450 }, { "epoch": 440.39, "learning_rate": 0.00011242978464148945, "loss": 0.4796, "step": 22460 }, { "epoch": 440.59, "learning_rate": 0.00011239466056411455, "loss": 0.4794, "step": 22470 }, { "epoch": 440.78, "learning_rate": 0.00011235952556833274, "loss": 0.4781, "step": 22480 }, { "epoch": 440.98, "learning_rate": 0.00011232437966440264, "loss": 0.4706, "step": 22490 }, { "epoch": 441.0, "eval_loss": 0.48102495074272156, "eval_runtime": 2.2838, "eval_samples_per_second": 997.887, "eval_steps_per_second": 3.941, "step": 22491 }, { "epoch": 441.18, "learning_rate": 0.00011228922286258613, "loss": 0.472, "step": 22500 }, { "epoch": 441.37, "learning_rate": 0.00011225405517314813, "loss": 0.4749, "step": 22510 }, { "epoch": 441.57, "learning_rate": 0.00011221887660635688, "loss": 0.479, "step": 22520 }, { "epoch": 441.76, "learning_rate": 0.00011218368717248373, "loss": 0.4701, "step": 22530 }, { "epoch": 441.96, "learning_rate": 0.0001121484868818032, "loss": 0.4735, "step": 22540 }, { "epoch": 442.0, "eval_loss": 0.477983683347702, "eval_runtime": 2.2047, "eval_samples_per_second": 1033.722, "eval_steps_per_second": 4.082, "step": 22542 }, { "epoch": 442.16, "learning_rate": 0.000112113275744593, "loss": 0.4774, "step": 22550 }, { "epoch": 442.35, "learning_rate": 0.00011207805377113397, "loss": 0.4765, "step": 22560 }, { "epoch": 442.55, "learning_rate": 0.00011204282097171016, "loss": 0.4727, "step": 22570 }, { "epoch": 442.75, "learning_rate": 0.0001120075773566088, "loss": 0.4713, "step": 22580 }, { "epoch": 442.94, "learning_rate": 0.00011197232293612015, "loss": 0.4796, "step": 22590 }, { "epoch": 443.0, "eval_loss": 0.4880768954753876, "eval_runtime": 2.2972, "eval_samples_per_second": 992.057, "eval_steps_per_second": 3.918, "step": 22593 }, { "epoch": 443.14, "learning_rate": 0.0001119370577205378, "loss": 0.4796, "step": 22600 }, { "epoch": 443.33, "learning_rate": 0.00011190178172015837, "loss": 0.4784, "step": 22610 }, { "epoch": 443.53, "learning_rate": 0.00011186649494528165, "loss": 0.4766, "step": 22620 }, { "epoch": 443.73, "learning_rate": 0.00011183119740621062, "loss": 0.4778, "step": 22630 }, { "epoch": 443.92, "learning_rate": 0.00011179588911325136, "loss": 0.4724, "step": 22640 }, { "epoch": 444.0, "eval_loss": 0.4784562289714813, "eval_runtime": 2.3023, "eval_samples_per_second": 989.881, "eval_steps_per_second": 3.909, "step": 22644 }, { "epoch": 444.12, "learning_rate": 0.0001117605700767131, "loss": 0.4807, "step": 22650 }, { "epoch": 444.31, "learning_rate": 0.00011172524030690823, "loss": 0.4827, "step": 22660 }, { "epoch": 444.51, "learning_rate": 0.00011168989981415223, "loss": 0.4754, "step": 22670 }, { "epoch": 444.71, "learning_rate": 0.00011165454860876375, "loss": 0.4823, "step": 22680 }, { "epoch": 444.9, "learning_rate": 0.00011161918670106455, "loss": 0.4701, "step": 22690 }, { "epoch": 445.0, "eval_loss": 0.47529175877571106, "eval_runtime": 2.1777, "eval_samples_per_second": 1046.517, "eval_steps_per_second": 4.133, "step": 22695 }, { "epoch": 445.1, "learning_rate": 0.00011158381410137952, "loss": 0.4754, "step": 22700 }, { "epoch": 445.29, "learning_rate": 0.00011154843082003669, "loss": 0.4733, "step": 22710 }, { "epoch": 445.49, "learning_rate": 0.00011151303686736717, "loss": 0.4736, "step": 22720 }, { "epoch": 445.69, "learning_rate": 0.00011147763225370518, "loss": 0.4716, "step": 22730 }, { "epoch": 445.88, "learning_rate": 0.00011144221698938812, "loss": 0.4764, "step": 22740 }, { "epoch": 446.0, "eval_loss": 0.47874537110328674, "eval_runtime": 2.1189, "eval_samples_per_second": 1075.539, "eval_steps_per_second": 4.247, "step": 22746 }, { "epoch": 446.08, "learning_rate": 0.00011140679108475641, "loss": 0.4709, "step": 22750 }, { "epoch": 446.27, "learning_rate": 0.0001113713545501537, "loss": 0.4714, "step": 22760 }, { "epoch": 446.47, "learning_rate": 0.0001113359073959266, "loss": 0.4686, "step": 22770 }, { "epoch": 446.67, "learning_rate": 0.00011130044963242492, "loss": 0.4723, "step": 22780 }, { "epoch": 446.86, "learning_rate": 0.0001112649812700015, "loss": 0.4729, "step": 22790 }, { "epoch": 447.0, "eval_loss": 0.48238447308540344, "eval_runtime": 2.236, "eval_samples_per_second": 1019.217, "eval_steps_per_second": 4.025, "step": 22797 }, { "epoch": 447.06, "learning_rate": 0.00011122950231901234, "loss": 0.4718, "step": 22800 }, { "epoch": 447.25, "learning_rate": 0.00011119401278981652, "loss": 0.4718, "step": 22810 }, { "epoch": 447.45, "learning_rate": 0.00011115851269277615, "loss": 0.4731, "step": 22820 }, { "epoch": 447.65, "learning_rate": 0.00011112300203825649, "loss": 0.4734, "step": 22830 }, { "epoch": 447.84, "learning_rate": 0.00011108748083662589, "loss": 0.4726, "step": 22840 }, { "epoch": 448.0, "eval_loss": 0.47418108582496643, "eval_runtime": 2.2463, "eval_samples_per_second": 1014.566, "eval_steps_per_second": 4.007, "step": 22848 }, { "epoch": 448.04, "learning_rate": 0.00011105194909825568, "loss": 0.4732, "step": 22850 }, { "epoch": 448.24, "learning_rate": 0.00011101640683352039, "loss": 0.4746, "step": 22860 }, { "epoch": 448.43, "learning_rate": 0.00011098085405279753, "loss": 0.4708, "step": 22870 }, { "epoch": 448.63, "learning_rate": 0.00011094529076646774, "loss": 0.4745, "step": 22880 }, { "epoch": 448.82, "learning_rate": 0.0001109097169849147, "loss": 0.4736, "step": 22890 }, { "epoch": 449.0, "eval_loss": 0.47750285267829895, "eval_runtime": 2.1452, "eval_samples_per_second": 1062.363, "eval_steps_per_second": 4.195, "step": 22899 }, { "epoch": 449.02, "learning_rate": 0.00011087413271852517, "loss": 0.4703, "step": 22900 }, { "epoch": 449.22, "learning_rate": 0.00011083853797768895, "loss": 0.4719, "step": 22910 }, { "epoch": 449.41, "learning_rate": 0.00011080293277279894, "loss": 0.4737, "step": 22920 }, { "epoch": 449.61, "learning_rate": 0.00011076731711425101, "loss": 0.4706, "step": 22930 }, { "epoch": 449.8, "learning_rate": 0.00011073169101244421, "loss": 0.4715, "step": 22940 }, { "epoch": 450.0, "learning_rate": 0.00011069605447778052, "loss": 0.4764, "step": 22950 }, { "epoch": 450.0, "eval_loss": 0.47553837299346924, "eval_runtime": 2.1747, "eval_samples_per_second": 1047.979, "eval_steps_per_second": 4.139, "step": 22950 }, { "epoch": 450.2, "learning_rate": 0.00011066040752066499, "loss": 0.4765, "step": 22960 }, { "epoch": 450.39, "learning_rate": 0.0001106247501515058, "loss": 0.4758, "step": 22970 }, { "epoch": 450.59, "learning_rate": 0.00011058908238071406, "loss": 0.4674, "step": 22980 }, { "epoch": 450.78, "learning_rate": 0.000110553404218704, "loss": 0.4684, "step": 22990 }, { "epoch": 450.98, "learning_rate": 0.0001105177156758928, "loss": 0.4701, "step": 23000 }, { "epoch": 451.0, "eval_loss": 0.47549954056739807, "eval_runtime": 2.2025, "eval_samples_per_second": 1034.721, "eval_steps_per_second": 4.086, "step": 23001 }, { "epoch": 451.18, "learning_rate": 0.00011048201676270076, "loss": 0.4771, "step": 23010 }, { "epoch": 451.37, "learning_rate": 0.00011044630748955113, "loss": 0.4733, "step": 23020 }, { "epoch": 451.57, "learning_rate": 0.00011041058786687028, "loss": 0.4733, "step": 23030 }, { "epoch": 451.76, "learning_rate": 0.00011037485790508745, "loss": 0.4714, "step": 23040 }, { "epoch": 451.96, "learning_rate": 0.0001103391176146351, "loss": 0.4746, "step": 23050 }, { "epoch": 452.0, "eval_loss": 0.4750150740146637, "eval_runtime": 2.2323, "eval_samples_per_second": 1020.924, "eval_steps_per_second": 4.032, "step": 23052 }, { "epoch": 452.16, "learning_rate": 0.00011030336700594852, "loss": 0.4654, "step": 23060 }, { "epoch": 452.35, "learning_rate": 0.00011026760608946611, "loss": 0.4708, "step": 23070 }, { "epoch": 452.55, "learning_rate": 0.00011023183487562929, "loss": 0.4726, "step": 23080 }, { "epoch": 452.75, "learning_rate": 0.00011019605337488241, "loss": 0.4665, "step": 23090 }, { "epoch": 452.94, "learning_rate": 0.0001101602615976729, "loss": 0.4727, "step": 23100 }, { "epoch": 453.0, "eval_loss": 0.47314518690109253, "eval_runtime": 2.1703, "eval_samples_per_second": 1050.085, "eval_steps_per_second": 4.147, "step": 23103 }, { "epoch": 453.14, "learning_rate": 0.00011012445955445117, "loss": 0.4679, "step": 23110 }, { "epoch": 453.33, "learning_rate": 0.00011008864725567059, "loss": 0.4682, "step": 23120 }, { "epoch": 453.53, "learning_rate": 0.00011005282471178757, "loss": 0.4684, "step": 23130 }, { "epoch": 453.73, "learning_rate": 0.00011001699193326147, "loss": 0.4692, "step": 23140 }, { "epoch": 453.92, "learning_rate": 0.00010998114893055469, "loss": 0.4691, "step": 23150 }, { "epoch": 454.0, "eval_loss": 0.4686477482318878, "eval_runtime": 2.3412, "eval_samples_per_second": 973.429, "eval_steps_per_second": 3.844, "step": 23154 }, { "epoch": 454.12, "learning_rate": 0.00010994529571413258, "loss": 0.4665, "step": 23160 }, { "epoch": 454.31, "learning_rate": 0.00010990943229446346, "loss": 0.466, "step": 23170 }, { "epoch": 454.51, "learning_rate": 0.0001098735586820187, "loss": 0.47, "step": 23180 }, { "epoch": 454.71, "learning_rate": 0.00010983767488727253, "loss": 0.4683, "step": 23190 }, { "epoch": 454.9, "learning_rate": 0.00010980178092070225, "loss": 0.4673, "step": 23200 }, { "epoch": 455.0, "eval_loss": 0.4761298596858978, "eval_runtime": 2.277, "eval_samples_per_second": 1000.862, "eval_steps_per_second": 3.953, "step": 23205 }, { "epoch": 455.1, "learning_rate": 0.00010976587679278812, "loss": 0.4718, "step": 23210 }, { "epoch": 455.29, "learning_rate": 0.00010972996251401328, "loss": 0.4687, "step": 23220 }, { "epoch": 455.49, "learning_rate": 0.00010969403809486397, "loss": 0.4687, "step": 23230 }, { "epoch": 455.69, "learning_rate": 0.0001096581035458293, "loss": 0.468, "step": 23240 }, { "epoch": 455.88, "learning_rate": 0.00010962215887740132, "loss": 0.4726, "step": 23250 }, { "epoch": 456.0, "eval_loss": 0.4763098955154419, "eval_runtime": 2.1556, "eval_samples_per_second": 1057.242, "eval_steps_per_second": 4.175, "step": 23256 }, { "epoch": 456.08, "learning_rate": 0.00010958620410007513, "loss": 0.4706, "step": 23260 }, { "epoch": 456.27, "learning_rate": 0.00010955023922434864, "loss": 0.4695, "step": 23270 }, { "epoch": 456.47, "learning_rate": 0.00010951426426072286, "loss": 0.4676, "step": 23280 }, { "epoch": 456.67, "learning_rate": 0.00010947827921970169, "loss": 0.4688, "step": 23290 }, { "epoch": 456.86, "learning_rate": 0.00010944228411179189, "loss": 0.4726, "step": 23300 }, { "epoch": 457.0, "eval_loss": 0.4806825816631317, "eval_runtime": 2.1723, "eval_samples_per_second": 1049.096, "eval_steps_per_second": 4.143, "step": 23307 }, { "epoch": 457.06, "learning_rate": 0.00010940627894750328, "loss": 0.4692, "step": 23310 }, { "epoch": 457.25, "learning_rate": 0.00010937026373734856, "loss": 0.478, "step": 23320 }, { "epoch": 457.45, "learning_rate": 0.00010933423849184336, "loss": 0.4758, "step": 23330 }, { "epoch": 457.65, "learning_rate": 0.00010929820322150624, "loss": 0.4698, "step": 23340 }, { "epoch": 457.84, "learning_rate": 0.00010926215793685869, "loss": 0.4696, "step": 23350 }, { "epoch": 458.0, "eval_loss": 0.4738100469112396, "eval_runtime": 2.3236, "eval_samples_per_second": 980.81, "eval_steps_per_second": 3.873, "step": 23358 }, { "epoch": 458.04, "learning_rate": 0.00010922610264842516, "loss": 0.4709, "step": 23360 }, { "epoch": 458.24, "learning_rate": 0.00010919003736673297, "loss": 0.4675, "step": 23370 }, { "epoch": 458.43, "learning_rate": 0.00010915396210231239, "loss": 0.4716, "step": 23380 }, { "epoch": 458.63, "learning_rate": 0.00010911787686569658, "loss": 0.4712, "step": 23390 }, { "epoch": 458.82, "learning_rate": 0.00010908178166742161, "loss": 0.4689, "step": 23400 }, { "epoch": 459.0, "eval_loss": 0.4727371335029602, "eval_runtime": 2.3226, "eval_samples_per_second": 981.246, "eval_steps_per_second": 3.875, "step": 23409 }, { "epoch": 459.02, "learning_rate": 0.0001090456765180265, "loss": 0.4678, "step": 23410 }, { "epoch": 459.22, "learning_rate": 0.00010900956142805315, "loss": 0.4697, "step": 23420 }, { "epoch": 459.41, "learning_rate": 0.00010897343640804634, "loss": 0.472, "step": 23430 }, { "epoch": 459.61, "learning_rate": 0.00010893730146855378, "loss": 0.4685, "step": 23440 }, { "epoch": 459.8, "learning_rate": 0.00010890115662012607, "loss": 0.4681, "step": 23450 }, { "epoch": 460.0, "learning_rate": 0.0001088650018733167, "loss": 0.4702, "step": 23460 }, { "epoch": 460.0, "eval_loss": 0.479326456785202, "eval_runtime": 2.2444, "eval_samples_per_second": 1015.425, "eval_steps_per_second": 4.01, "step": 23460 }, { "epoch": 460.2, "learning_rate": 0.00010882883723868205, "loss": 0.4758, "step": 23470 }, { "epoch": 460.39, "learning_rate": 0.00010879266272678136, "loss": 0.4722, "step": 23480 }, { "epoch": 460.59, "learning_rate": 0.00010875647834817681, "loss": 0.4707, "step": 23490 }, { "epoch": 460.78, "learning_rate": 0.00010872028411343344, "loss": 0.4692, "step": 23500 }, { "epoch": 460.98, "learning_rate": 0.00010868408003311912, "loss": 0.4692, "step": 23510 }, { "epoch": 461.0, "eval_loss": 0.4696498513221741, "eval_runtime": 2.2495, "eval_samples_per_second": 1013.127, "eval_steps_per_second": 4.001, "step": 23511 }, { "epoch": 461.18, "learning_rate": 0.00010864786611780469, "loss": 0.4652, "step": 23520 }, { "epoch": 461.37, "learning_rate": 0.00010861164237806375, "loss": 0.468, "step": 23530 }, { "epoch": 461.57, "learning_rate": 0.00010857540882447286, "loss": 0.4651, "step": 23540 }, { "epoch": 461.76, "learning_rate": 0.0001085391654676114, "loss": 0.4701, "step": 23550 }, { "epoch": 461.96, "learning_rate": 0.00010850291231806159, "loss": 0.4694, "step": 23560 }, { "epoch": 462.0, "eval_loss": 0.47131288051605225, "eval_runtime": 2.2683, "eval_samples_per_second": 1004.726, "eval_steps_per_second": 3.968, "step": 23562 }, { "epoch": 462.16, "learning_rate": 0.00010846664938640861, "loss": 0.4661, "step": 23570 }, { "epoch": 462.35, "learning_rate": 0.00010843037668324038, "loss": 0.4732, "step": 23580 }, { "epoch": 462.55, "learning_rate": 0.00010839409421914771, "loss": 0.4709, "step": 23590 }, { "epoch": 462.75, "learning_rate": 0.00010835780200472429, "loss": 0.4654, "step": 23600 }, { "epoch": 462.94, "learning_rate": 0.00010832150005056665, "loss": 0.4628, "step": 23610 }, { "epoch": 463.0, "eval_loss": 0.47472110390663147, "eval_runtime": 2.2214, "eval_samples_per_second": 1025.941, "eval_steps_per_second": 4.052, "step": 23613 }, { "epoch": 463.14, "learning_rate": 0.00010828518836727413, "loss": 0.4711, "step": 23620 }, { "epoch": 463.33, "learning_rate": 0.00010824886696544895, "loss": 0.4662, "step": 23630 }, { "epoch": 463.53, "learning_rate": 0.00010821253585569609, "loss": 0.471, "step": 23640 }, { "epoch": 463.73, "learning_rate": 0.00010817619504862352, "loss": 0.4736, "step": 23650 }, { "epoch": 463.92, "learning_rate": 0.00010813984455484189, "loss": 0.4677, "step": 23660 }, { "epoch": 464.0, "eval_loss": 0.4787036180496216, "eval_runtime": 2.1739, "eval_samples_per_second": 1048.334, "eval_steps_per_second": 4.14, "step": 23664 }, { "epoch": 464.12, "learning_rate": 0.00010810348438496473, "loss": 0.472, "step": 23670 }, { "epoch": 464.31, "learning_rate": 0.00010806711454960843, "loss": 0.4654, "step": 23680 }, { "epoch": 464.51, "learning_rate": 0.00010803073505939212, "loss": 0.4696, "step": 23690 }, { "epoch": 464.71, "learning_rate": 0.00010799434592493785, "loss": 0.467, "step": 23700 }, { "epoch": 464.9, "learning_rate": 0.0001079579471568704, "loss": 0.4673, "step": 23710 }, { "epoch": 465.0, "eval_loss": 0.4681728184223175, "eval_runtime": 2.1469, "eval_samples_per_second": 1061.532, "eval_steps_per_second": 4.192, "step": 23715 }, { "epoch": 465.1, "learning_rate": 0.00010792153876581743, "loss": 0.4626, "step": 23720 }, { "epoch": 465.29, "learning_rate": 0.00010788512076240935, "loss": 0.4646, "step": 23730 }, { "epoch": 465.49, "learning_rate": 0.00010784869315727942, "loss": 0.4706, "step": 23740 }, { "epoch": 465.69, "learning_rate": 0.0001078122559610637, "loss": 0.4601, "step": 23750 }, { "epoch": 465.88, "learning_rate": 0.000107775809184401, "loss": 0.4709, "step": 23760 }, { "epoch": 466.0, "eval_loss": 0.4692438542842865, "eval_runtime": 2.3021, "eval_samples_per_second": 989.952, "eval_steps_per_second": 3.909, "step": 23766 }, { "epoch": 466.08, "learning_rate": 0.00010773935283793298, "loss": 0.4682, "step": 23770 }, { "epoch": 466.27, "learning_rate": 0.00010770288693230411, "loss": 0.4682, "step": 23780 }, { "epoch": 466.47, "learning_rate": 0.00010766641147816161, "loss": 0.4669, "step": 23790 }, { "epoch": 466.67, "learning_rate": 0.00010762992648615548, "loss": 0.4654, "step": 23800 }, { "epoch": 466.86, "learning_rate": 0.00010759343196693854, "loss": 0.463, "step": 23810 }, { "epoch": 467.0, "eval_loss": 0.46763309836387634, "eval_runtime": 2.1636, "eval_samples_per_second": 1053.34, "eval_steps_per_second": 4.16, "step": 23817 }, { "epoch": 467.06, "learning_rate": 0.00010755692793116637, "loss": 0.4643, "step": 23820 }, { "epoch": 467.25, "learning_rate": 0.00010752041438949733, "loss": 0.4676, "step": 23830 }, { "epoch": 467.45, "learning_rate": 0.00010748389135259255, "loss": 0.4628, "step": 23840 }, { "epoch": 467.65, "learning_rate": 0.00010744735883111596, "loss": 0.4687, "step": 23850 }, { "epoch": 467.84, "learning_rate": 0.00010741081683573427, "loss": 0.4654, "step": 23860 }, { "epoch": 468.0, "eval_loss": 0.4696432054042816, "eval_runtime": 2.3135, "eval_samples_per_second": 985.094, "eval_steps_per_second": 3.89, "step": 23868 }, { "epoch": 468.04, "learning_rate": 0.00010737426537711687, "loss": 0.4669, "step": 23870 }, { "epoch": 468.24, "learning_rate": 0.00010733770446593599, "loss": 0.4703, "step": 23880 }, { "epoch": 468.43, "learning_rate": 0.00010730113411286661, "loss": 0.4674, "step": 23890 }, { "epoch": 468.63, "learning_rate": 0.00010726455432858645, "loss": 0.4677, "step": 23900 }, { "epoch": 468.82, "learning_rate": 0.000107227965123776, "loss": 0.4648, "step": 23910 }, { "epoch": 469.0, "eval_loss": 0.46745070815086365, "eval_runtime": 2.136, "eval_samples_per_second": 1066.943, "eval_steps_per_second": 4.213, "step": 23919 }, { "epoch": 469.02, "learning_rate": 0.0001071913665091185, "loss": 0.4628, "step": 23920 }, { "epoch": 469.22, "learning_rate": 0.0001071547584952999, "loss": 0.4594, "step": 23930 }, { "epoch": 469.41, "learning_rate": 0.00010711814109300897, "loss": 0.4666, "step": 23940 }, { "epoch": 469.61, "learning_rate": 0.0001070815143129371, "loss": 0.4666, "step": 23950 }, { "epoch": 469.8, "learning_rate": 0.00010704487816577857, "loss": 0.462, "step": 23960 }, { "epoch": 470.0, "learning_rate": 0.00010700823266223026, "loss": 0.4642, "step": 23970 }, { "epoch": 470.0, "eval_loss": 0.4700300395488739, "eval_runtime": 2.281, "eval_samples_per_second": 999.12, "eval_steps_per_second": 3.946, "step": 23970 }, { "epoch": 470.2, "learning_rate": 0.00010697157781299187, "loss": 0.4698, "step": 23980 }, { "epoch": 470.39, "learning_rate": 0.00010693491362876583, "loss": 0.4675, "step": 23990 }, { "epoch": 470.59, "learning_rate": 0.0001068982401202572, "loss": 0.4652, "step": 24000 }, { "epoch": 470.78, "learning_rate": 0.00010686155729817386, "loss": 0.4582, "step": 24010 }, { "epoch": 470.98, "learning_rate": 0.00010682486517322637, "loss": 0.4687, "step": 24020 }, { "epoch": 471.0, "eval_loss": 0.46906590461730957, "eval_runtime": 2.2917, "eval_samples_per_second": 994.474, "eval_steps_per_second": 3.927, "step": 24021 }, { "epoch": 471.18, "learning_rate": 0.000106788163756128, "loss": 0.4654, "step": 24030 }, { "epoch": 471.37, "learning_rate": 0.00010675145305759477, "loss": 0.4646, "step": 24040 }, { "epoch": 471.57, "learning_rate": 0.00010671473308834538, "loss": 0.4708, "step": 24050 }, { "epoch": 471.76, "learning_rate": 0.00010667800385910123, "loss": 0.4675, "step": 24060 }, { "epoch": 471.96, "learning_rate": 0.00010664126538058645, "loss": 0.469, "step": 24070 }, { "epoch": 472.0, "eval_loss": 0.4749109745025635, "eval_runtime": 2.2452, "eval_samples_per_second": 1015.044, "eval_steps_per_second": 4.009, "step": 24072 }, { "epoch": 472.16, "learning_rate": 0.0001066045176635278, "loss": 0.4687, "step": 24080 }, { "epoch": 472.35, "learning_rate": 0.0001065677607186549, "loss": 0.4688, "step": 24090 }, { "epoch": 472.55, "learning_rate": 0.00010653099455669988, "loss": 0.4732, "step": 24100 }, { "epoch": 472.75, "learning_rate": 0.00010649421918839764, "loss": 0.4664, "step": 24110 }, { "epoch": 472.94, "learning_rate": 0.0001064574346244858, "loss": 0.4692, "step": 24120 }, { "epoch": 473.0, "eval_loss": 0.4672113358974457, "eval_runtime": 2.1455, "eval_samples_per_second": 1062.232, "eval_steps_per_second": 4.195, "step": 24123 }, { "epoch": 473.14, "learning_rate": 0.00010642064087570464, "loss": 0.4617, "step": 24130 }, { "epoch": 473.33, "learning_rate": 0.00010638383795279706, "loss": 0.4674, "step": 24140 }, { "epoch": 473.53, "learning_rate": 0.00010634702586650875, "loss": 0.4667, "step": 24150 }, { "epoch": 473.73, "learning_rate": 0.00010631020462758798, "loss": 0.4658, "step": 24160 }, { "epoch": 473.92, "learning_rate": 0.00010627337424678576, "loss": 0.4635, "step": 24170 }, { "epoch": 474.0, "eval_loss": 0.4706786870956421, "eval_runtime": 2.1628, "eval_samples_per_second": 1053.708, "eval_steps_per_second": 4.161, "step": 24174 }, { "epoch": 474.12, "learning_rate": 0.0001062365347348557, "loss": 0.4611, "step": 24180 }, { "epoch": 474.31, "learning_rate": 0.00010619968610255416, "loss": 0.4698, "step": 24190 }, { "epoch": 474.51, "learning_rate": 0.00010616282836064008, "loss": 0.4638, "step": 24200 }, { "epoch": 474.71, "learning_rate": 0.00010612596151987513, "loss": 0.4641, "step": 24210 }, { "epoch": 474.9, "learning_rate": 0.00010608908559102359, "loss": 0.4635, "step": 24220 }, { "epoch": 475.0, "eval_loss": 0.46961140632629395, "eval_runtime": 2.3372, "eval_samples_per_second": 975.091, "eval_steps_per_second": 3.851, "step": 24225 }, { "epoch": 475.1, "learning_rate": 0.0001060522005848524, "loss": 0.4676, "step": 24230 }, { "epoch": 475.29, "learning_rate": 0.00010601530651213118, "loss": 0.4638, "step": 24240 }, { "epoch": 475.49, "learning_rate": 0.00010597840338363216, "loss": 0.4637, "step": 24250 }, { "epoch": 475.69, "learning_rate": 0.00010594149121013026, "loss": 0.4719, "step": 24260 }, { "epoch": 475.88, "learning_rate": 0.00010590457000240298, "loss": 0.4655, "step": 24270 }, { "epoch": 476.0, "eval_loss": 0.46518537402153015, "eval_runtime": 2.3011, "eval_samples_per_second": 990.409, "eval_steps_per_second": 3.911, "step": 24276 }, { "epoch": 476.08, "learning_rate": 0.0001058676397712305, "loss": 0.4668, "step": 24280 }, { "epoch": 476.27, "learning_rate": 0.00010583070052739558, "loss": 0.4618, "step": 24290 }, { "epoch": 476.47, "learning_rate": 0.00010579375228168375, "loss": 0.471, "step": 24300 }, { "epoch": 476.67, "learning_rate": 0.000105756795044883, "loss": 0.4621, "step": 24310 }, { "epoch": 476.86, "learning_rate": 0.00010571982882778404, "loss": 0.4633, "step": 24320 }, { "epoch": 477.0, "eval_loss": 0.47023797035217285, "eval_runtime": 2.2428, "eval_samples_per_second": 1016.149, "eval_steps_per_second": 4.013, "step": 24327 }, { "epoch": 477.06, "learning_rate": 0.00010568285364118019, "loss": 0.4672, "step": 24330 }, { "epoch": 477.25, "learning_rate": 0.00010564586949586735, "loss": 0.4653, "step": 24340 }, { "epoch": 477.45, "learning_rate": 0.00010560887640264411, "loss": 0.4642, "step": 24350 }, { "epoch": 477.65, "learning_rate": 0.0001055718743723116, "loss": 0.463, "step": 24360 }, { "epoch": 477.84, "learning_rate": 0.00010553486341567358, "loss": 0.4622, "step": 24370 }, { "epoch": 478.0, "eval_loss": 0.46373993158340454, "eval_runtime": 2.2432, "eval_samples_per_second": 1015.964, "eval_steps_per_second": 4.012, "step": 24378 }, { "epoch": 478.04, "learning_rate": 0.00010549784354353645, "loss": 0.4653, "step": 24380 }, { "epoch": 478.24, "learning_rate": 0.00010546081476670916, "loss": 0.4636, "step": 24390 }, { "epoch": 478.43, "learning_rate": 0.0001054237770960033, "loss": 0.4671, "step": 24400 }, { "epoch": 478.63, "learning_rate": 0.00010538673054223307, "loss": 0.4628, "step": 24410 }, { "epoch": 478.82, "learning_rate": 0.00010534967511621517, "loss": 0.4571, "step": 24420 }, { "epoch": 479.0, "eval_loss": 0.4678489565849304, "eval_runtime": 2.1558, "eval_samples_per_second": 1057.143, "eval_steps_per_second": 4.175, "step": 24429 }, { "epoch": 479.02, "learning_rate": 0.00010531261082876903, "loss": 0.4718, "step": 24430 }, { "epoch": 479.22, "learning_rate": 0.00010527553769071657, "loss": 0.4683, "step": 24440 }, { "epoch": 479.41, "learning_rate": 0.00010523845571288229, "loss": 0.4668, "step": 24450 }, { "epoch": 479.61, "learning_rate": 0.00010520136490609335, "loss": 0.4611, "step": 24460 }, { "epoch": 479.8, "learning_rate": 0.00010516426528117939, "loss": 0.4606, "step": 24470 }, { "epoch": 480.0, "learning_rate": 0.0001051271568489727, "loss": 0.4645, "step": 24480 }, { "epoch": 480.0, "eval_loss": 0.46348774433135986, "eval_runtime": 2.2724, "eval_samples_per_second": 1002.887, "eval_steps_per_second": 3.961, "step": 24480 }, { "epoch": 480.2, "learning_rate": 0.00010509003962030813, "loss": 0.4638, "step": 24490 }, { "epoch": 480.39, "learning_rate": 0.00010505291360602302, "loss": 0.4716, "step": 24500 }, { "epoch": 480.59, "learning_rate": 0.00010501577881695744, "loss": 0.465, "step": 24510 }, { "epoch": 480.78, "learning_rate": 0.00010497863526395384, "loss": 0.459, "step": 24520 }, { "epoch": 480.98, "learning_rate": 0.0001049414829578573, "loss": 0.4654, "step": 24530 }, { "epoch": 481.0, "eval_loss": 0.4655218720436096, "eval_runtime": 2.1945, "eval_samples_per_second": 1038.497, "eval_steps_per_second": 4.101, "step": 24531 }, { "epoch": 481.18, "learning_rate": 0.00010490432190951555, "loss": 0.4653, "step": 24540 }, { "epoch": 481.37, "learning_rate": 0.00010486715212977869, "loss": 0.4632, "step": 24550 }, { "epoch": 481.57, "learning_rate": 0.00010482997362949951, "loss": 0.46, "step": 24560 }, { "epoch": 481.76, "learning_rate": 0.00010479278641953334, "loss": 0.4667, "step": 24570 }, { "epoch": 481.96, "learning_rate": 0.00010475559051073795, "loss": 0.4588, "step": 24580 }, { "epoch": 482.0, "eval_loss": 0.4688310921192169, "eval_runtime": 2.2239, "eval_samples_per_second": 1024.76, "eval_steps_per_second": 4.047, "step": 24582 }, { "epoch": 482.16, "learning_rate": 0.00010471838591397375, "loss": 0.4668, "step": 24590 }, { "epoch": 482.35, "learning_rate": 0.00010468117264010365, "loss": 0.461, "step": 24600 }, { "epoch": 482.55, "learning_rate": 0.0001046439506999931, "loss": 0.4644, "step": 24610 }, { "epoch": 482.75, "learning_rate": 0.00010460672010451007, "loss": 0.4635, "step": 24620 }, { "epoch": 482.94, "learning_rate": 0.00010456948086452506, "loss": 0.4608, "step": 24630 }, { "epoch": 483.0, "eval_loss": 0.4639376103878021, "eval_runtime": 2.1825, "eval_samples_per_second": 1044.2, "eval_steps_per_second": 4.124, "step": 24633 }, { "epoch": 483.14, "learning_rate": 0.00010453223299091109, "loss": 0.4648, "step": 24640 }, { "epoch": 483.33, "learning_rate": 0.00010449497649454372, "loss": 0.4604, "step": 24650 }, { "epoch": 483.53, "learning_rate": 0.00010445771138630103, "loss": 0.4682, "step": 24660 }, { "epoch": 483.73, "learning_rate": 0.00010442043767706357, "loss": 0.4593, "step": 24670 }, { "epoch": 483.92, "learning_rate": 0.00010438315537771447, "loss": 0.4606, "step": 24680 }, { "epoch": 484.0, "eval_loss": 0.4653979241847992, "eval_runtime": 2.1706, "eval_samples_per_second": 1049.951, "eval_steps_per_second": 4.146, "step": 24684 }, { "epoch": 484.12, "learning_rate": 0.0001043458644991393, "loss": 0.4604, "step": 24690 }, { "epoch": 484.31, "learning_rate": 0.00010430856505222615, "loss": 0.4633, "step": 24700 }, { "epoch": 484.51, "learning_rate": 0.00010427125704786566, "loss": 0.4568, "step": 24710 }, { "epoch": 484.71, "learning_rate": 0.00010423394049695094, "loss": 0.4643, "step": 24720 }, { "epoch": 484.9, "learning_rate": 0.00010419661541037757, "loss": 0.4624, "step": 24730 }, { "epoch": 485.0, "eval_loss": 0.46611276268959045, "eval_runtime": 2.1771, "eval_samples_per_second": 1046.812, "eval_steps_per_second": 4.134, "step": 24735 }, { "epoch": 485.1, "learning_rate": 0.00010415928179904363, "loss": 0.4584, "step": 24740 }, { "epoch": 485.29, "learning_rate": 0.00010412193967384975, "loss": 0.4598, "step": 24750 }, { "epoch": 485.49, "learning_rate": 0.00010408458904569895, "loss": 0.4652, "step": 24760 }, { "epoch": 485.69, "learning_rate": 0.00010404722992549679, "loss": 0.4618, "step": 24770 }, { "epoch": 485.88, "learning_rate": 0.00010400986232415133, "loss": 0.4612, "step": 24780 }, { "epoch": 486.0, "eval_loss": 0.4668976664543152, "eval_runtime": 2.2071, "eval_samples_per_second": 1032.556, "eval_steps_per_second": 4.078, "step": 24786 }, { "epoch": 486.08, "learning_rate": 0.00010397248625257304, "loss": 0.4547, "step": 24790 }, { "epoch": 486.27, "learning_rate": 0.0001039351017216749, "loss": 0.4597, "step": 24800 }, { "epoch": 486.47, "learning_rate": 0.00010389770874237239, "loss": 0.4615, "step": 24810 }, { "epoch": 486.67, "learning_rate": 0.00010386030732558342, "loss": 0.4635, "step": 24820 }, { "epoch": 486.86, "learning_rate": 0.00010382289748222834, "loss": 0.46, "step": 24830 }, { "epoch": 487.0, "eval_loss": 0.4653010666370392, "eval_runtime": 2.233, "eval_samples_per_second": 1020.607, "eval_steps_per_second": 4.03, "step": 24837 }, { "epoch": 487.06, "learning_rate": 0.00010378547922323, "loss": 0.4602, "step": 24840 }, { "epoch": 487.25, "learning_rate": 0.00010374805255951372, "loss": 0.4638, "step": 24850 }, { "epoch": 487.45, "learning_rate": 0.00010371061750200723, "loss": 0.4647, "step": 24860 }, { "epoch": 487.65, "learning_rate": 0.00010367317406164075, "loss": 0.4633, "step": 24870 }, { "epoch": 487.84, "learning_rate": 0.00010363572224934692, "loss": 0.4623, "step": 24880 }, { "epoch": 488.0, "eval_loss": 0.468781054019928, "eval_runtime": 2.2652, "eval_samples_per_second": 1006.101, "eval_steps_per_second": 3.973, "step": 24888 }, { "epoch": 488.04, "learning_rate": 0.00010359826207606081, "loss": 0.4618, "step": 24890 }, { "epoch": 488.24, "learning_rate": 0.00010356079355272, "loss": 0.4628, "step": 24900 }, { "epoch": 488.43, "learning_rate": 0.00010352331669026443, "loss": 0.4608, "step": 24910 }, { "epoch": 488.63, "learning_rate": 0.0001034858314996365, "loss": 0.4598, "step": 24920 }, { "epoch": 488.82, "learning_rate": 0.00010344833799178109, "loss": 0.4648, "step": 24930 }, { "epoch": 489.0, "eval_loss": 0.464847594499588, "eval_runtime": 2.2742, "eval_samples_per_second": 1002.105, "eval_steps_per_second": 3.957, "step": 24939 }, { "epoch": 489.02, "learning_rate": 0.00010341083617764545, "loss": 0.4605, "step": 24940 }, { "epoch": 489.22, "learning_rate": 0.00010337332606817925, "loss": 0.4586, "step": 24950 }, { "epoch": 489.41, "learning_rate": 0.00010333580767433465, "loss": 0.4569, "step": 24960 }, { "epoch": 489.61, "learning_rate": 0.00010329828100706613, "loss": 0.4621, "step": 24970 }, { "epoch": 489.8, "learning_rate": 0.00010326074607733068, "loss": 0.4627, "step": 24980 }, { "epoch": 490.0, "learning_rate": 0.00010322320289608766, "loss": 0.4602, "step": 24990 }, { "epoch": 490.0, "eval_loss": 0.46202248334884644, "eval_runtime": 2.2413, "eval_samples_per_second": 1016.833, "eval_steps_per_second": 4.016, "step": 24990 }, { "epoch": 490.2, "learning_rate": 0.0001031856514742988, "loss": 0.4667, "step": 25000 }, { "epoch": 490.39, "learning_rate": 0.00010314809182292835, "loss": 0.4642, "step": 25010 }, { "epoch": 490.59, "learning_rate": 0.00010311052395294285, "loss": 0.4599, "step": 25020 }, { "epoch": 490.78, "learning_rate": 0.00010307294787531127, "loss": 0.4623, "step": 25030 }, { "epoch": 490.98, "learning_rate": 0.00010303536360100501, "loss": 0.4587, "step": 25040 }, { "epoch": 491.0, "eval_loss": 0.46522802114486694, "eval_runtime": 2.2035, "eval_samples_per_second": 1034.275, "eval_steps_per_second": 4.084, "step": 25041 }, { "epoch": 491.18, "learning_rate": 0.0001029977711409978, "loss": 0.4634, "step": 25050 }, { "epoch": 491.37, "learning_rate": 0.00010296017050626583, "loss": 0.4635, "step": 25060 }, { "epoch": 491.57, "learning_rate": 0.00010292256170778768, "loss": 0.46, "step": 25070 }, { "epoch": 491.76, "learning_rate": 0.0001028849447565442, "loss": 0.4615, "step": 25080 }, { "epoch": 491.96, "learning_rate": 0.00010284731966351879, "loss": 0.4627, "step": 25090 }, { "epoch": 492.0, "eval_loss": 0.46937766671180725, "eval_runtime": 2.2693, "eval_samples_per_second": 1004.287, "eval_steps_per_second": 3.966, "step": 25092 }, { "epoch": 492.16, "learning_rate": 0.00010280968643969705, "loss": 0.4662, "step": 25100 }, { "epoch": 492.35, "learning_rate": 0.00010277204509606712, "loss": 0.4631, "step": 25110 }, { "epoch": 492.55, "learning_rate": 0.0001027343956436194, "loss": 0.4627, "step": 25120 }, { "epoch": 492.75, "learning_rate": 0.00010269673809334665, "loss": 0.4622, "step": 25130 }, { "epoch": 492.94, "learning_rate": 0.00010265907245624411, "loss": 0.4638, "step": 25140 }, { "epoch": 493.0, "eval_loss": 0.4619758725166321, "eval_runtime": 2.1822, "eval_samples_per_second": 1044.355, "eval_steps_per_second": 4.124, "step": 25143 }, { "epoch": 493.14, "learning_rate": 0.00010262139874330926, "loss": 0.4589, "step": 25150 }, { "epoch": 493.33, "learning_rate": 0.00010258371696554199, "loss": 0.4604, "step": 25160 }, { "epoch": 493.53, "learning_rate": 0.00010254602713394455, "loss": 0.4613, "step": 25170 }, { "epoch": 493.73, "learning_rate": 0.0001025083292595215, "loss": 0.4568, "step": 25180 }, { "epoch": 493.92, "learning_rate": 0.00010247062335327983, "loss": 0.4565, "step": 25190 }, { "epoch": 494.0, "eval_loss": 0.4652526378631592, "eval_runtime": 2.2708, "eval_samples_per_second": 1003.596, "eval_steps_per_second": 3.963, "step": 25194 }, { "epoch": 494.12, "learning_rate": 0.00010243290942622879, "loss": 0.462, "step": 25200 }, { "epoch": 494.31, "learning_rate": 0.00010239518748937999, "loss": 0.4635, "step": 25210 }, { "epoch": 494.51, "learning_rate": 0.00010235745755374745, "loss": 0.4616, "step": 25220 }, { "epoch": 494.71, "learning_rate": 0.0001023197196303474, "loss": 0.4571, "step": 25230 }, { "epoch": 494.9, "learning_rate": 0.00010228197373019853, "loss": 0.4588, "step": 25240 }, { "epoch": 495.0, "eval_loss": 0.45982059836387634, "eval_runtime": 2.3244, "eval_samples_per_second": 980.453, "eval_steps_per_second": 3.872, "step": 25245 }, { "epoch": 495.1, "learning_rate": 0.00010224421986432178, "loss": 0.4594, "step": 25250 }, { "epoch": 495.29, "learning_rate": 0.0001022064580437404, "loss": 0.4618, "step": 25260 }, { "epoch": 495.49, "learning_rate": 0.00010216868827948008, "loss": 0.4567, "step": 25270 }, { "epoch": 495.69, "learning_rate": 0.00010213091058256868, "loss": 0.4546, "step": 25280 }, { "epoch": 495.88, "learning_rate": 0.00010209312496403647, "loss": 0.4568, "step": 25290 }, { "epoch": 496.0, "eval_loss": 0.461697518825531, "eval_runtime": 2.2304, "eval_samples_per_second": 1021.781, "eval_steps_per_second": 4.035, "step": 25296 }, { "epoch": 496.08, "learning_rate": 0.00010205533143491601, "loss": 0.4572, "step": 25300 }, { "epoch": 496.27, "learning_rate": 0.00010201753000624215, "loss": 0.4592, "step": 25310 }, { "epoch": 496.47, "learning_rate": 0.00010197972068905208, "loss": 0.4641, "step": 25320 }, { "epoch": 496.67, "learning_rate": 0.0001019419034943853, "loss": 0.4587, "step": 25330 }, { "epoch": 496.86, "learning_rate": 0.00010190407843328351, "loss": 0.4524, "step": 25340 }, { "epoch": 497.0, "eval_loss": 0.4631481468677521, "eval_runtime": 2.1613, "eval_samples_per_second": 1054.478, "eval_steps_per_second": 4.164, "step": 25347 }, { "epoch": 497.06, "learning_rate": 0.00010186624551679089, "loss": 0.4544, "step": 25350 }, { "epoch": 497.25, "learning_rate": 0.00010182840475595374, "loss": 0.462, "step": 25360 }, { "epoch": 497.45, "learning_rate": 0.00010179055616182074, "loss": 0.4558, "step": 25370 }, { "epoch": 497.65, "learning_rate": 0.00010175269974544281, "loss": 0.4606, "step": 25380 }, { "epoch": 497.84, "learning_rate": 0.0001017148355178732, "loss": 0.4635, "step": 25390 }, { "epoch": 498.0, "eval_loss": 0.4639947712421417, "eval_runtime": 2.2201, "eval_samples_per_second": 1026.549, "eval_steps_per_second": 4.054, "step": 25398 }, { "epoch": 498.04, "learning_rate": 0.00010167696349016742, "loss": 0.4639, "step": 25400 }, { "epoch": 498.24, "learning_rate": 0.00010163908367338325, "loss": 0.463, "step": 25410 }, { "epoch": 498.43, "learning_rate": 0.00010160119607858076, "loss": 0.4628, "step": 25420 }, { "epoch": 498.63, "learning_rate": 0.0001015633007168223, "loss": 0.4606, "step": 25430 }, { "epoch": 498.82, "learning_rate": 0.00010152539759917242, "loss": 0.4534, "step": 25440 }, { "epoch": 499.0, "eval_loss": 0.4642672538757324, "eval_runtime": 2.2526, "eval_samples_per_second": 1011.722, "eval_steps_per_second": 3.995, "step": 25449 }, { "epoch": 499.02, "learning_rate": 0.00010148748673669804, "loss": 0.4633, "step": 25450 }, { "epoch": 499.22, "learning_rate": 0.00010144956814046823, "loss": 0.4603, "step": 25460 }, { "epoch": 499.41, "learning_rate": 0.00010141164182155442, "loss": 0.4532, "step": 25470 }, { "epoch": 499.61, "learning_rate": 0.00010137370779103025, "loss": 0.4535, "step": 25480 }, { "epoch": 499.8, "learning_rate": 0.00010133576605997158, "loss": 0.4539, "step": 25490 }, { "epoch": 500.0, "learning_rate": 0.00010129781663945658, "loss": 0.4599, "step": 25500 }, { "epoch": 500.0, "eval_loss": 0.46625402569770813, "eval_runtime": 2.1508, "eval_samples_per_second": 1059.606, "eval_steps_per_second": 4.184, "step": 25500 }, { "epoch": 500.2, "learning_rate": 0.00010125985954056561, "loss": 0.4591, "step": 25510 }, { "epoch": 500.39, "learning_rate": 0.0001012218947743813, "loss": 0.4557, "step": 25520 }, { "epoch": 500.59, "learning_rate": 0.00010118392235198851, "loss": 0.4556, "step": 25530 }, { "epoch": 500.78, "learning_rate": 0.00010114594228447439, "loss": 0.4598, "step": 25540 }, { "epoch": 500.98, "learning_rate": 0.0001011079545829282, "loss": 0.4549, "step": 25550 }, { "epoch": 501.0, "eval_loss": 0.4588215947151184, "eval_runtime": 2.1877, "eval_samples_per_second": 1041.737, "eval_steps_per_second": 4.114, "step": 25551 }, { "epoch": 501.18, "learning_rate": 0.00010106995925844154, "loss": 0.4607, "step": 25560 }, { "epoch": 501.37, "learning_rate": 0.0001010319563221082, "loss": 0.4562, "step": 25570 }, { "epoch": 501.57, "learning_rate": 0.00010099394578502419, "loss": 0.4575, "step": 25580 }, { "epoch": 501.76, "learning_rate": 0.00010095592765828774, "loss": 0.46, "step": 25590 }, { "epoch": 501.96, "learning_rate": 0.00010091790195299925, "loss": 0.4595, "step": 25600 }, { "epoch": 502.0, "eval_loss": 0.46614253520965576, "eval_runtime": 2.3019, "eval_samples_per_second": 990.035, "eval_steps_per_second": 3.91, "step": 25602 }, { "epoch": 502.16, "learning_rate": 0.00010087986868026144, "loss": 0.4567, "step": 25610 }, { "epoch": 502.35, "learning_rate": 0.00010084182785117916, "loss": 0.4569, "step": 25620 }, { "epoch": 502.55, "learning_rate": 0.00010080377947685946, "loss": 0.4602, "step": 25630 }, { "epoch": 502.75, "learning_rate": 0.00010076572356841164, "loss": 0.461, "step": 25640 }, { "epoch": 502.94, "learning_rate": 0.00010072766013694715, "loss": 0.46, "step": 25650 }, { "epoch": 503.0, "eval_loss": 0.4626482427120209, "eval_runtime": 2.185, "eval_samples_per_second": 1043.005, "eval_steps_per_second": 4.119, "step": 25653 }, { "epoch": 503.14, "learning_rate": 0.0001006895891935797, "loss": 0.4623, "step": 25660 }, { "epoch": 503.33, "learning_rate": 0.00010065151074942516, "loss": 0.4602, "step": 25670 }, { "epoch": 503.53, "learning_rate": 0.00010061342481560151, "loss": 0.457, "step": 25680 }, { "epoch": 503.73, "learning_rate": 0.0001005753314032291, "loss": 0.4564, "step": 25690 }, { "epoch": 503.92, "learning_rate": 0.0001005372305234303, "loss": 0.4504, "step": 25700 }, { "epoch": 504.0, "eval_loss": 0.4590928256511688, "eval_runtime": 2.299, "eval_samples_per_second": 991.307, "eval_steps_per_second": 3.915, "step": 25704 }, { "epoch": 504.12, "learning_rate": 0.00010049912218732971, "loss": 0.4553, "step": 25710 }, { "epoch": 504.31, "learning_rate": 0.00010046100640605413, "loss": 0.4599, "step": 25720 }, { "epoch": 504.51, "learning_rate": 0.0001004228831907325, "loss": 0.456, "step": 25730 }, { "epoch": 504.71, "learning_rate": 0.00010038475255249597, "loss": 0.4505, "step": 25740 }, { "epoch": 504.9, "learning_rate": 0.00010034661450247785, "loss": 0.459, "step": 25750 }, { "epoch": 505.0, "eval_loss": 0.46226629614830017, "eval_runtime": 2.2506, "eval_samples_per_second": 1012.612, "eval_steps_per_second": 3.999, "step": 25755 }, { "epoch": 505.1, "learning_rate": 0.00010030846905181356, "loss": 0.456, "step": 25760 }, { "epoch": 505.29, "learning_rate": 0.00010027031621164076, "loss": 0.4556, "step": 25770 }, { "epoch": 505.49, "learning_rate": 0.0001002321559930992, "loss": 0.4565, "step": 25780 }, { "epoch": 505.69, "learning_rate": 0.00010019398840733082, "loss": 0.4603, "step": 25790 }, { "epoch": 505.88, "learning_rate": 0.00010015581346547969, "loss": 0.4582, "step": 25800 }, { "epoch": 506.0, "eval_loss": 0.46172964572906494, "eval_runtime": 2.1975, "eval_samples_per_second": 1037.086, "eval_steps_per_second": 4.096, "step": 25806 }, { "epoch": 506.08, "learning_rate": 0.00010011763117869207, "loss": 0.4576, "step": 25810 }, { "epoch": 506.27, "learning_rate": 0.00010007944155811633, "loss": 0.4531, "step": 25820 }, { "epoch": 506.47, "learning_rate": 0.00010004124461490297, "loss": 0.4574, "step": 25830 }, { "epoch": 506.67, "learning_rate": 0.00010000304036020466, "loss": 0.456, "step": 25840 }, { "epoch": 506.86, "learning_rate": 9.996482880517619e-05, "loss": 0.4532, "step": 25850 }, { "epoch": 507.0, "eval_loss": 0.4579889178276062, "eval_runtime": 2.2839, "eval_samples_per_second": 997.87, "eval_steps_per_second": 3.941, "step": 25857 }, { "epoch": 507.06, "learning_rate": 9.992660996097446e-05, "loss": 0.4539, "step": 25860 }, { "epoch": 507.25, "learning_rate": 9.988838383875856e-05, "loss": 0.4575, "step": 25870 }, { "epoch": 507.45, "learning_rate": 9.985015044968964e-05, "loss": 0.4508, "step": 25880 }, { "epoch": 507.65, "learning_rate": 9.981190980493099e-05, "loss": 0.4547, "step": 25890 }, { "epoch": 507.84, "learning_rate": 9.977366191564806e-05, "loss": 0.4555, "step": 25900 }, { "epoch": 508.0, "eval_loss": 0.46151694655418396, "eval_runtime": 2.2996, "eval_samples_per_second": 991.055, "eval_steps_per_second": 3.914, "step": 25908 }, { "epoch": 508.04, "learning_rate": 9.973540679300834e-05, "loss": 0.4548, "step": 25910 }, { "epoch": 508.24, "learning_rate": 9.96971444481815e-05, "loss": 0.4524, "step": 25920 }, { "epoch": 508.43, "learning_rate": 9.965887489233927e-05, "loss": 0.4586, "step": 25930 }, { "epoch": 508.63, "learning_rate": 9.962059813665552e-05, "loss": 0.4573, "step": 25940 }, { "epoch": 508.82, "learning_rate": 9.95823141923062e-05, "loss": 0.4571, "step": 25950 }, { "epoch": 509.0, "eval_loss": 0.4616622030735016, "eval_runtime": 2.1945, "eval_samples_per_second": 1038.515, "eval_steps_per_second": 4.101, "step": 25959 }, { "epoch": 509.02, "learning_rate": 9.954402307046938e-05, "loss": 0.4542, "step": 25960 }, { "epoch": 509.22, "learning_rate": 9.950572478232521e-05, "loss": 0.4546, "step": 25970 }, { "epoch": 509.41, "learning_rate": 9.946741933905595e-05, "loss": 0.4548, "step": 25980 }, { "epoch": 509.61, "learning_rate": 9.942910675184589e-05, "loss": 0.46, "step": 25990 }, { "epoch": 509.8, "learning_rate": 9.93907870318815e-05, "loss": 0.4555, "step": 26000 }, { "epoch": 510.0, "learning_rate": 9.935246019035126e-05, "loss": 0.4561, "step": 26010 }, { "epoch": 510.0, "eval_loss": 0.4578864276409149, "eval_runtime": 2.2028, "eval_samples_per_second": 1034.57, "eval_steps_per_second": 4.086, "step": 26010 }, { "epoch": 510.2, "learning_rate": 9.931412623844574e-05, "loss": 0.4559, "step": 26020 }, { "epoch": 510.39, "learning_rate": 9.927578518735765e-05, "loss": 0.4575, "step": 26030 }, { "epoch": 510.59, "learning_rate": 9.923743704828166e-05, "loss": 0.4473, "step": 26040 }, { "epoch": 510.78, "learning_rate": 9.919908183241461e-05, "loss": 0.4509, "step": 26050 }, { "epoch": 510.98, "learning_rate": 9.916071955095537e-05, "loss": 0.4541, "step": 26060 }, { "epoch": 511.0, "eval_loss": 0.46014508605003357, "eval_runtime": 2.2714, "eval_samples_per_second": 1003.329, "eval_steps_per_second": 3.962, "step": 26061 }, { "epoch": 511.18, "learning_rate": 9.912235021510483e-05, "loss": 0.4526, "step": 26070 }, { "epoch": 511.37, "learning_rate": 9.908397383606601e-05, "loss": 0.4553, "step": 26080 }, { "epoch": 511.57, "learning_rate": 9.904559042504398e-05, "loss": 0.455, "step": 26090 }, { "epoch": 511.76, "learning_rate": 9.900719999324578e-05, "loss": 0.4538, "step": 26100 }, { "epoch": 511.96, "learning_rate": 9.896880255188064e-05, "loss": 0.4534, "step": 26110 }, { "epoch": 512.0, "eval_loss": 0.4626559019088745, "eval_runtime": 2.1826, "eval_samples_per_second": 1044.191, "eval_steps_per_second": 4.124, "step": 26112 }, { "epoch": 512.16, "learning_rate": 9.893039811215967e-05, "loss": 0.4576, "step": 26120 }, { "epoch": 512.35, "learning_rate": 9.889198668529617e-05, "loss": 0.4529, "step": 26130 }, { "epoch": 512.55, "learning_rate": 9.88535682825054e-05, "loss": 0.457, "step": 26140 }, { "epoch": 512.75, "learning_rate": 9.881514291500467e-05, "loss": 0.4545, "step": 26150 }, { "epoch": 512.94, "learning_rate": 9.877671059401334e-05, "loss": 0.4569, "step": 26160 }, { "epoch": 513.0, "eval_loss": 0.46150699257850647, "eval_runtime": 2.2927, "eval_samples_per_second": 994.031, "eval_steps_per_second": 3.926, "step": 26163 }, { "epoch": 513.14, "learning_rate": 9.873827133075279e-05, "loss": 0.4467, "step": 26170 }, { "epoch": 513.33, "learning_rate": 9.869982513644645e-05, "loss": 0.4549, "step": 26180 }, { "epoch": 513.53, "learning_rate": 9.866137202231968e-05, "loss": 0.4542, "step": 26190 }, { "epoch": 513.73, "learning_rate": 9.86229119996e-05, "loss": 0.4548, "step": 26200 }, { "epoch": 513.92, "learning_rate": 9.858444507951688e-05, "loss": 0.4583, "step": 26210 }, { "epoch": 514.0, "eval_loss": 0.45271191000938416, "eval_runtime": 2.2058, "eval_samples_per_second": 1033.196, "eval_steps_per_second": 4.08, "step": 26214 }, { "epoch": 514.12, "learning_rate": 9.854597127330176e-05, "loss": 0.4542, "step": 26220 }, { "epoch": 514.31, "learning_rate": 9.850749059218815e-05, "loss": 0.4526, "step": 26230 }, { "epoch": 514.51, "learning_rate": 9.846900304741158e-05, "loss": 0.4558, "step": 26240 }, { "epoch": 514.71, "learning_rate": 9.84305086502095e-05, "loss": 0.4566, "step": 26250 }, { "epoch": 514.9, "learning_rate": 9.839200741182147e-05, "loss": 0.4498, "step": 26260 }, { "epoch": 515.0, "eval_loss": 0.45869016647338867, "eval_runtime": 2.3386, "eval_samples_per_second": 974.499, "eval_steps_per_second": 3.848, "step": 26265 }, { "epoch": 515.1, "learning_rate": 9.835349934348896e-05, "loss": 0.4524, "step": 26270 }, { "epoch": 515.29, "learning_rate": 9.831498445645545e-05, "loss": 0.4531, "step": 26280 }, { "epoch": 515.49, "learning_rate": 9.827646276196647e-05, "loss": 0.4572, "step": 26290 }, { "epoch": 515.69, "learning_rate": 9.82379342712695e-05, "loss": 0.4539, "step": 26300 }, { "epoch": 515.88, "learning_rate": 9.819939899561396e-05, "loss": 0.4511, "step": 26310 }, { "epoch": 516.0, "eval_loss": 0.45518627762794495, "eval_runtime": 2.1521, "eval_samples_per_second": 1058.985, "eval_steps_per_second": 4.182, "step": 26316 }, { "epoch": 516.08, "learning_rate": 9.816085694625133e-05, "loss": 0.4531, "step": 26320 }, { "epoch": 516.27, "learning_rate": 9.812230813443498e-05, "loss": 0.4527, "step": 26330 }, { "epoch": 516.47, "learning_rate": 9.808375257142035e-05, "loss": 0.4538, "step": 26340 }, { "epoch": 516.67, "learning_rate": 9.804519026846476e-05, "loss": 0.4529, "step": 26350 }, { "epoch": 516.86, "learning_rate": 9.800662123682759e-05, "loss": 0.4535, "step": 26360 }, { "epoch": 517.0, "eval_loss": 0.457948237657547, "eval_runtime": 2.2407, "eval_samples_per_second": 1017.082, "eval_steps_per_second": 4.017, "step": 26367 }, { "epoch": 517.06, "learning_rate": 9.796804548777013e-05, "loss": 0.456, "step": 26370 }, { "epoch": 517.25, "learning_rate": 9.79294630325556e-05, "loss": 0.4534, "step": 26380 }, { "epoch": 517.45, "learning_rate": 9.789087388244927e-05, "loss": 0.4549, "step": 26390 }, { "epoch": 517.65, "learning_rate": 9.785227804871827e-05, "loss": 0.4502, "step": 26400 }, { "epoch": 517.84, "learning_rate": 9.781367554263172e-05, "loss": 0.4551, "step": 26410 }, { "epoch": 518.0, "eval_loss": 0.4542873203754425, "eval_runtime": 2.2808, "eval_samples_per_second": 999.213, "eval_steps_per_second": 3.946, "step": 26418 }, { "epoch": 518.04, "learning_rate": 9.777506637546072e-05, "loss": 0.4515, "step": 26420 }, { "epoch": 518.24, "learning_rate": 9.773645055847825e-05, "loss": 0.4536, "step": 26430 }, { "epoch": 518.43, "learning_rate": 9.76978281029593e-05, "loss": 0.4534, "step": 26440 }, { "epoch": 518.63, "learning_rate": 9.76591990201808e-05, "loss": 0.4444, "step": 26450 }, { "epoch": 518.82, "learning_rate": 9.762056332142147e-05, "loss": 0.4581, "step": 26460 }, { "epoch": 519.0, "eval_loss": 0.4596610963344574, "eval_runtime": 2.2928, "eval_samples_per_second": 993.96, "eval_steps_per_second": 3.925, "step": 26469 }, { "epoch": 519.02, "learning_rate": 9.758192101796217e-05, "loss": 0.454, "step": 26470 }, { "epoch": 519.22, "learning_rate": 9.754327212108556e-05, "loss": 0.4528, "step": 26480 }, { "epoch": 519.41, "learning_rate": 9.750461664207622e-05, "loss": 0.4552, "step": 26490 }, { "epoch": 519.61, "learning_rate": 9.746595459222076e-05, "loss": 0.4541, "step": 26500 }, { "epoch": 519.8, "learning_rate": 9.742728598280759e-05, "loss": 0.4487, "step": 26510 }, { "epoch": 520.0, "learning_rate": 9.738861082512709e-05, "loss": 0.4573, "step": 26520 }, { "epoch": 520.0, "eval_loss": 0.45396384596824646, "eval_runtime": 2.2352, "eval_samples_per_second": 1019.61, "eval_steps_per_second": 4.027, "step": 26520 }, { "epoch": 520.2, "learning_rate": 9.734992913047155e-05, "loss": 0.4491, "step": 26530 }, { "epoch": 520.39, "learning_rate": 9.731124091013513e-05, "loss": 0.452, "step": 26540 }, { "epoch": 520.59, "learning_rate": 9.727254617541398e-05, "loss": 0.4491, "step": 26550 }, { "epoch": 520.78, "learning_rate": 9.723384493760606e-05, "loss": 0.4543, "step": 26560 }, { "epoch": 520.98, "learning_rate": 9.719513720801126e-05, "loss": 0.4495, "step": 26570 }, { "epoch": 521.0, "eval_loss": 0.4577941596508026, "eval_runtime": 2.2275, "eval_samples_per_second": 1023.139, "eval_steps_per_second": 4.04, "step": 26571 }, { "epoch": 521.18, "learning_rate": 9.715642299793144e-05, "loss": 0.4459, "step": 26580 }, { "epoch": 521.37, "learning_rate": 9.711770231867022e-05, "loss": 0.4513, "step": 26590 }, { "epoch": 521.57, "learning_rate": 9.70789751815332e-05, "loss": 0.4512, "step": 26600 }, { "epoch": 521.76, "learning_rate": 9.704024159782782e-05, "loss": 0.4571, "step": 26610 }, { "epoch": 521.96, "learning_rate": 9.700150157886345e-05, "loss": 0.4532, "step": 26620 }, { "epoch": 522.0, "eval_loss": 0.4605408012866974, "eval_runtime": 2.251, "eval_samples_per_second": 1012.42, "eval_steps_per_second": 3.998, "step": 26622 }, { "epoch": 522.16, "learning_rate": 9.69627551359513e-05, "loss": 0.455, "step": 26630 }, { "epoch": 522.35, "learning_rate": 9.692400228040447e-05, "loss": 0.4555, "step": 26640 }, { "epoch": 522.55, "learning_rate": 9.688524302353792e-05, "loss": 0.4535, "step": 26650 }, { "epoch": 522.75, "learning_rate": 9.68464773766685e-05, "loss": 0.4543, "step": 26660 }, { "epoch": 522.94, "learning_rate": 9.68077053511149e-05, "loss": 0.4474, "step": 26670 }, { "epoch": 523.0, "eval_loss": 0.45791128277778625, "eval_runtime": 2.139, "eval_samples_per_second": 1065.459, "eval_steps_per_second": 4.208, "step": 26673 }, { "epoch": 523.14, "learning_rate": 9.67689269581977e-05, "loss": 0.454, "step": 26680 }, { "epoch": 523.33, "learning_rate": 9.673014220923934e-05, "loss": 0.4547, "step": 26690 }, { "epoch": 523.53, "learning_rate": 9.669135111556406e-05, "loss": 0.4555, "step": 26700 }, { "epoch": 523.73, "learning_rate": 9.665255368849804e-05, "loss": 0.4555, "step": 26710 }, { "epoch": 523.92, "learning_rate": 9.661374993936924e-05, "loss": 0.4504, "step": 26720 }, { "epoch": 524.0, "eval_loss": 0.456340491771698, "eval_runtime": 2.2323, "eval_samples_per_second": 1020.94, "eval_steps_per_second": 4.032, "step": 26724 }, { "epoch": 524.12, "learning_rate": 9.657493987950747e-05, "loss": 0.4502, "step": 26730 }, { "epoch": 524.31, "learning_rate": 9.653612352024446e-05, "loss": 0.4512, "step": 26740 }, { "epoch": 524.51, "learning_rate": 9.649730087291364e-05, "loss": 0.4583, "step": 26750 }, { "epoch": 524.71, "learning_rate": 9.645847194885042e-05, "loss": 0.4529, "step": 26760 }, { "epoch": 524.9, "learning_rate": 9.641963675939197e-05, "loss": 0.4529, "step": 26770 }, { "epoch": 525.0, "eval_loss": 0.45831215381622314, "eval_runtime": 2.2948, "eval_samples_per_second": 993.117, "eval_steps_per_second": 3.922, "step": 26775 }, { "epoch": 525.1, "learning_rate": 9.638079531587728e-05, "loss": 0.4495, "step": 26780 }, { "epoch": 525.29, "learning_rate": 9.63419476296472e-05, "loss": 0.4527, "step": 26790 }, { "epoch": 525.49, "learning_rate": 9.63030937120444e-05, "loss": 0.4501, "step": 26800 }, { "epoch": 525.69, "learning_rate": 9.626423357441331e-05, "loss": 0.4495, "step": 26810 }, { "epoch": 525.88, "learning_rate": 9.622536722810026e-05, "loss": 0.4475, "step": 26820 }, { "epoch": 526.0, "eval_loss": 0.4616130292415619, "eval_runtime": 2.2822, "eval_samples_per_second": 998.609, "eval_steps_per_second": 3.944, "step": 26826 }, { "epoch": 526.08, "learning_rate": 9.618649468445336e-05, "loss": 0.4521, "step": 26830 }, { "epoch": 526.27, "learning_rate": 9.614761595482252e-05, "loss": 0.4569, "step": 26840 }, { "epoch": 526.47, "learning_rate": 9.610873105055945e-05, "loss": 0.458, "step": 26850 }, { "epoch": 526.67, "learning_rate": 9.60698399830177e-05, "loss": 0.4507, "step": 26860 }, { "epoch": 526.86, "learning_rate": 9.603094276355257e-05, "loss": 0.4457, "step": 26870 }, { "epoch": 527.0, "eval_loss": 0.4558161199092865, "eval_runtime": 2.317, "eval_samples_per_second": 983.601, "eval_steps_per_second": 3.884, "step": 26877 }, { "epoch": 527.06, "learning_rate": 9.599203940352118e-05, "loss": 0.451, "step": 26880 }, { "epoch": 527.25, "learning_rate": 9.595312991428245e-05, "loss": 0.4454, "step": 26890 }, { "epoch": 527.45, "learning_rate": 9.59142143071971e-05, "loss": 0.4559, "step": 26900 }, { "epoch": 527.65, "learning_rate": 9.587529259362759e-05, "loss": 0.4518, "step": 26910 }, { "epoch": 527.84, "learning_rate": 9.583636478493823e-05, "loss": 0.4532, "step": 26920 }, { "epoch": 528.0, "eval_loss": 0.45839163661003113, "eval_runtime": 2.2859, "eval_samples_per_second": 996.975, "eval_steps_per_second": 3.937, "step": 26928 }, { "epoch": 528.04, "learning_rate": 9.579743089249503e-05, "loss": 0.4491, "step": 26930 }, { "epoch": 528.24, "learning_rate": 9.575849092766584e-05, "loss": 0.4496, "step": 26940 }, { "epoch": 528.43, "learning_rate": 9.571954490182026e-05, "loss": 0.4544, "step": 26950 }, { "epoch": 528.63, "learning_rate": 9.568059282632964e-05, "loss": 0.4531, "step": 26960 }, { "epoch": 528.82, "learning_rate": 9.564163471256716e-05, "loss": 0.4566, "step": 26970 }, { "epoch": 529.0, "eval_loss": 0.4572843313217163, "eval_runtime": 2.3239, "eval_samples_per_second": 980.668, "eval_steps_per_second": 3.873, "step": 26979 }, { "epoch": 529.02, "learning_rate": 9.56026705719077e-05, "loss": 0.4539, "step": 26980 }, { "epoch": 529.22, "learning_rate": 9.55637004157279e-05, "loss": 0.46, "step": 26990 }, { "epoch": 529.41, "learning_rate": 9.552472425540622e-05, "loss": 0.4526, "step": 27000 }, { "epoch": 529.61, "learning_rate": 9.548574210232277e-05, "loss": 0.454, "step": 27010 }, { "epoch": 529.8, "learning_rate": 9.544675396785952e-05, "loss": 0.4532, "step": 27020 }, { "epoch": 530.0, "learning_rate": 9.540775986340012e-05, "loss": 0.4546, "step": 27030 }, { "epoch": 530.0, "eval_loss": 0.4563215970993042, "eval_runtime": 2.1683, "eval_samples_per_second": 1051.064, "eval_steps_per_second": 4.151, "step": 27030 }, { "epoch": 530.2, "learning_rate": 9.536875980032996e-05, "loss": 0.4493, "step": 27040 }, { "epoch": 530.39, "learning_rate": 9.532975379003623e-05, "loss": 0.4495, "step": 27050 }, { "epoch": 530.59, "learning_rate": 9.529074184390779e-05, "loss": 0.4491, "step": 27060 }, { "epoch": 530.78, "learning_rate": 9.525172397333525e-05, "loss": 0.4553, "step": 27070 }, { "epoch": 530.98, "learning_rate": 9.521270018971095e-05, "loss": 0.4479, "step": 27080 }, { "epoch": 531.0, "eval_loss": 0.46282848715782166, "eval_runtime": 2.3449, "eval_samples_per_second": 971.905, "eval_steps_per_second": 3.838, "step": 27081 }, { "epoch": 531.18, "learning_rate": 9.5173670504429e-05, "loss": 0.4551, "step": 27090 }, { "epoch": 531.37, "learning_rate": 9.513463492888519e-05, "loss": 0.4503, "step": 27100 }, { "epoch": 531.57, "learning_rate": 9.509559347447701e-05, "loss": 0.4468, "step": 27110 }, { "epoch": 531.76, "learning_rate": 9.50565461526037e-05, "loss": 0.4551, "step": 27120 }, { "epoch": 531.96, "learning_rate": 9.501749297466626e-05, "loss": 0.4485, "step": 27130 }, { "epoch": 532.0, "eval_loss": 0.45467355847358704, "eval_runtime": 2.2383, "eval_samples_per_second": 1018.188, "eval_steps_per_second": 4.021, "step": 27132 }, { "epoch": 532.16, "learning_rate": 9.497843395206726e-05, "loss": 0.4511, "step": 27140 }, { "epoch": 532.35, "learning_rate": 9.493936909621113e-05, "loss": 0.4563, "step": 27150 }, { "epoch": 532.55, "learning_rate": 9.49002984185039e-05, "loss": 0.4486, "step": 27160 }, { "epoch": 532.75, "learning_rate": 9.486122193035337e-05, "loss": 0.4467, "step": 27170 }, { "epoch": 532.94, "learning_rate": 9.482213964316898e-05, "loss": 0.4491, "step": 27180 }, { "epoch": 533.0, "eval_loss": 0.45390358567237854, "eval_runtime": 2.2377, "eval_samples_per_second": 1018.434, "eval_steps_per_second": 4.022, "step": 27183 }, { "epoch": 533.14, "learning_rate": 9.478305156836188e-05, "loss": 0.4484, "step": 27190 }, { "epoch": 533.33, "learning_rate": 9.474395771734493e-05, "loss": 0.45, "step": 27200 }, { "epoch": 533.53, "learning_rate": 9.470485810153268e-05, "loss": 0.4471, "step": 27210 }, { "epoch": 533.73, "learning_rate": 9.46657527323413e-05, "loss": 0.449, "step": 27220 }, { "epoch": 533.92, "learning_rate": 9.462664162118871e-05, "loss": 0.4522, "step": 27230 }, { "epoch": 534.0, "eval_loss": 0.4536179304122925, "eval_runtime": 2.288, "eval_samples_per_second": 996.082, "eval_steps_per_second": 3.934, "step": 27234 }, { "epoch": 534.12, "learning_rate": 9.458752477949451e-05, "loss": 0.4502, "step": 27240 }, { "epoch": 534.31, "learning_rate": 9.454840221867989e-05, "loss": 0.449, "step": 27250 }, { "epoch": 534.51, "learning_rate": 9.450927395016781e-05, "loss": 0.4502, "step": 27260 }, { "epoch": 534.71, "learning_rate": 9.447013998538283e-05, "loss": 0.4442, "step": 27270 }, { "epoch": 534.9, "learning_rate": 9.44310003357512e-05, "loss": 0.4477, "step": 27280 }, { "epoch": 535.0, "eval_loss": 0.45614269375801086, "eval_runtime": 2.3434, "eval_samples_per_second": 972.526, "eval_steps_per_second": 3.841, "step": 27285 }, { "epoch": 535.1, "learning_rate": 9.439185501270083e-05, "loss": 0.4535, "step": 27290 }, { "epoch": 535.29, "learning_rate": 9.435270402766128e-05, "loss": 0.4503, "step": 27300 }, { "epoch": 535.49, "learning_rate": 9.431354739206374e-05, "loss": 0.4503, "step": 27310 }, { "epoch": 535.69, "learning_rate": 9.42743851173411e-05, "loss": 0.4542, "step": 27320 }, { "epoch": 535.88, "learning_rate": 9.423521721492788e-05, "loss": 0.45, "step": 27330 }, { "epoch": 536.0, "eval_loss": 0.4529837667942047, "eval_runtime": 2.2382, "eval_samples_per_second": 1018.237, "eval_steps_per_second": 4.021, "step": 27336 }, { "epoch": 536.08, "learning_rate": 9.419604369626024e-05, "loss": 0.4511, "step": 27340 }, { "epoch": 536.27, "learning_rate": 9.415686457277593e-05, "loss": 0.4497, "step": 27350 }, { "epoch": 536.47, "learning_rate": 9.41176798559144e-05, "loss": 0.4475, "step": 27360 }, { "epoch": 536.67, "learning_rate": 9.407848955711672e-05, "loss": 0.445, "step": 27370 }, { "epoch": 536.86, "learning_rate": 9.403929368782558e-05, "loss": 0.4522, "step": 27380 }, { "epoch": 537.0, "eval_loss": 0.45247963070869446, "eval_runtime": 2.2916, "eval_samples_per_second": 994.507, "eval_steps_per_second": 3.927, "step": 27387 }, { "epoch": 537.06, "learning_rate": 9.40000922594853e-05, "loss": 0.4471, "step": 27390 }, { "epoch": 537.25, "learning_rate": 9.396088528354184e-05, "loss": 0.4472, "step": 27400 }, { "epoch": 537.45, "learning_rate": 9.392167277144273e-05, "loss": 0.4466, "step": 27410 }, { "epoch": 537.65, "learning_rate": 9.388245473463717e-05, "loss": 0.4453, "step": 27420 }, { "epoch": 537.84, "learning_rate": 9.384323118457593e-05, "loss": 0.4475, "step": 27430 }, { "epoch": 538.0, "eval_loss": 0.4553549885749817, "eval_runtime": 2.1572, "eval_samples_per_second": 1056.468, "eval_steps_per_second": 4.172, "step": 27438 }, { "epoch": 538.04, "learning_rate": 9.380400213271146e-05, "loss": 0.4426, "step": 27440 }, { "epoch": 538.24, "learning_rate": 9.376476759049773e-05, "loss": 0.4476, "step": 27450 }, { "epoch": 538.43, "learning_rate": 9.372552756939033e-05, "loss": 0.447, "step": 27460 }, { "epoch": 538.63, "learning_rate": 9.368628208084654e-05, "loss": 0.4471, "step": 27470 }, { "epoch": 538.82, "learning_rate": 9.36470311363251e-05, "loss": 0.4475, "step": 27480 }, { "epoch": 539.0, "eval_loss": 0.44862520694732666, "eval_runtime": 2.2485, "eval_samples_per_second": 1013.542, "eval_steps_per_second": 4.003, "step": 27489 }, { "epoch": 539.02, "learning_rate": 9.360777474728644e-05, "loss": 0.446, "step": 27490 }, { "epoch": 539.22, "learning_rate": 9.356851292519255e-05, "loss": 0.4503, "step": 27500 }, { "epoch": 539.41, "learning_rate": 9.3529245681507e-05, "loss": 0.4517, "step": 27510 }, { "epoch": 539.61, "learning_rate": 9.348997302769497e-05, "loss": 0.4513, "step": 27520 }, { "epoch": 539.8, "learning_rate": 9.345069497522318e-05, "loss": 0.445, "step": 27530 }, { "epoch": 540.0, "learning_rate": 9.341141153555993e-05, "loss": 0.4512, "step": 27540 }, { "epoch": 540.0, "eval_loss": 0.45837074518203735, "eval_runtime": 2.1784, "eval_samples_per_second": 1046.161, "eval_steps_per_second": 4.131, "step": 27540 }, { "epoch": 540.2, "learning_rate": 9.337212272017514e-05, "loss": 0.4532, "step": 27550 }, { "epoch": 540.39, "learning_rate": 9.333282854054025e-05, "loss": 0.454, "step": 27560 }, { "epoch": 540.59, "learning_rate": 9.329352900812828e-05, "loss": 0.4488, "step": 27570 }, { "epoch": 540.78, "learning_rate": 9.325422413441384e-05, "loss": 0.4521, "step": 27580 }, { "epoch": 540.98, "learning_rate": 9.321491393087304e-05, "loss": 0.445, "step": 27590 }, { "epoch": 541.0, "eval_loss": 0.4543311297893524, "eval_runtime": 2.2001, "eval_samples_per_second": 1035.874, "eval_steps_per_second": 4.091, "step": 27591 }, { "epoch": 541.18, "learning_rate": 9.317559840898362e-05, "loss": 0.4519, "step": 27600 }, { "epoch": 541.37, "learning_rate": 9.313627758022483e-05, "loss": 0.4481, "step": 27610 }, { "epoch": 541.57, "learning_rate": 9.309695145607745e-05, "loss": 0.4495, "step": 27620 }, { "epoch": 541.76, "learning_rate": 9.305762004802384e-05, "loss": 0.4435, "step": 27630 }, { "epoch": 541.96, "learning_rate": 9.30182833675479e-05, "loss": 0.4478, "step": 27640 }, { "epoch": 542.0, "eval_loss": 0.45065122842788696, "eval_runtime": 2.1992, "eval_samples_per_second": 1036.271, "eval_steps_per_second": 4.092, "step": 27642 }, { "epoch": 542.16, "learning_rate": 9.297894142613508e-05, "loss": 0.4459, "step": 27650 }, { "epoch": 542.35, "learning_rate": 9.293959423527233e-05, "loss": 0.4501, "step": 27660 }, { "epoch": 542.55, "learning_rate": 9.290024180644814e-05, "loss": 0.4467, "step": 27670 }, { "epoch": 542.75, "learning_rate": 9.286088415115258e-05, "loss": 0.4475, "step": 27680 }, { "epoch": 542.94, "learning_rate": 9.282152128087714e-05, "loss": 0.4472, "step": 27690 }, { "epoch": 543.0, "eval_loss": 0.45196664333343506, "eval_runtime": 2.1993, "eval_samples_per_second": 1036.26, "eval_steps_per_second": 4.092, "step": 27693 }, { "epoch": 543.14, "learning_rate": 9.278215320711498e-05, "loss": 0.4518, "step": 27700 }, { "epoch": 543.33, "learning_rate": 9.274277994136068e-05, "loss": 0.4531, "step": 27710 }, { "epoch": 543.53, "learning_rate": 9.270340149511031e-05, "loss": 0.4465, "step": 27720 }, { "epoch": 543.73, "learning_rate": 9.266401787986152e-05, "loss": 0.4475, "step": 27730 }, { "epoch": 543.92, "learning_rate": 9.262462910711349e-05, "loss": 0.448, "step": 27740 }, { "epoch": 544.0, "eval_loss": 0.450714111328125, "eval_runtime": 2.2524, "eval_samples_per_second": 1011.789, "eval_steps_per_second": 3.996, "step": 27744 }, { "epoch": 544.12, "learning_rate": 9.25852351883668e-05, "loss": 0.4441, "step": 27750 }, { "epoch": 544.31, "learning_rate": 9.254583613512365e-05, "loss": 0.4489, "step": 27760 }, { "epoch": 544.51, "learning_rate": 9.250643195888763e-05, "loss": 0.4485, "step": 27770 }, { "epoch": 544.71, "learning_rate": 9.24670226711639e-05, "loss": 0.4464, "step": 27780 }, { "epoch": 544.9, "learning_rate": 9.242760828345914e-05, "loss": 0.4447, "step": 27790 }, { "epoch": 545.0, "eval_loss": 0.4513770639896393, "eval_runtime": 2.2074, "eval_samples_per_second": 1032.432, "eval_steps_per_second": 4.077, "step": 27795 }, { "epoch": 545.1, "learning_rate": 9.238818880728141e-05, "loss": 0.4453, "step": 27800 }, { "epoch": 545.29, "learning_rate": 9.234876425414038e-05, "loss": 0.4424, "step": 27810 }, { "epoch": 545.49, "learning_rate": 9.230933463554707e-05, "loss": 0.4513, "step": 27820 }, { "epoch": 545.69, "learning_rate": 9.226989996301406e-05, "loss": 0.4442, "step": 27830 }, { "epoch": 545.88, "learning_rate": 9.223046024805545e-05, "loss": 0.4485, "step": 27840 }, { "epoch": 546.0, "eval_loss": 0.45527341961860657, "eval_runtime": 2.2727, "eval_samples_per_second": 1002.78, "eval_steps_per_second": 3.96, "step": 27846 }, { "epoch": 546.08, "learning_rate": 9.21910155021867e-05, "loss": 0.4453, "step": 27850 }, { "epoch": 546.27, "learning_rate": 9.215156573692484e-05, "loss": 0.4465, "step": 27860 }, { "epoch": 546.47, "learning_rate": 9.211211096378832e-05, "loss": 0.4485, "step": 27870 }, { "epoch": 546.67, "learning_rate": 9.207265119429701e-05, "loss": 0.447, "step": 27880 }, { "epoch": 546.86, "learning_rate": 9.203318643997231e-05, "loss": 0.4482, "step": 27890 }, { "epoch": 547.0, "eval_loss": 0.45322051644325256, "eval_runtime": 2.2565, "eval_samples_per_second": 1009.989, "eval_steps_per_second": 3.989, "step": 27897 }, { "epoch": 547.06, "learning_rate": 9.199371671233703e-05, "loss": 0.4495, "step": 27900 }, { "epoch": 547.25, "learning_rate": 9.19542420229155e-05, "loss": 0.4469, "step": 27910 }, { "epoch": 547.45, "learning_rate": 9.19147623832334e-05, "loss": 0.4501, "step": 27920 }, { "epoch": 547.65, "learning_rate": 9.18752778048179e-05, "loss": 0.4452, "step": 27930 }, { "epoch": 547.84, "learning_rate": 9.183578829919766e-05, "loss": 0.4448, "step": 27940 }, { "epoch": 548.0, "eval_loss": 0.45331457257270813, "eval_runtime": 2.3182, "eval_samples_per_second": 983.097, "eval_steps_per_second": 3.882, "step": 27948 }, { "epoch": 548.04, "learning_rate": 9.179629387790273e-05, "loss": 0.4482, "step": 27950 }, { "epoch": 548.24, "learning_rate": 9.175679455246455e-05, "loss": 0.4493, "step": 27960 }, { "epoch": 548.43, "learning_rate": 9.171729033441608e-05, "loss": 0.4473, "step": 27970 }, { "epoch": 548.63, "learning_rate": 9.167778123529166e-05, "loss": 0.4484, "step": 27980 }, { "epoch": 548.82, "learning_rate": 9.163826726662708e-05, "loss": 0.4467, "step": 27990 }, { "epoch": 549.0, "eval_loss": 0.451092928647995, "eval_runtime": 2.3345, "eval_samples_per_second": 976.221, "eval_steps_per_second": 3.855, "step": 27999 }, { "epoch": 549.02, "learning_rate": 9.159874843995953e-05, "loss": 0.441, "step": 28000 }, { "epoch": 549.22, "learning_rate": 9.155922476682761e-05, "loss": 0.4482, "step": 28010 }, { "epoch": 549.41, "learning_rate": 9.151969625877138e-05, "loss": 0.4456, "step": 28020 }, { "epoch": 549.61, "learning_rate": 9.148016292733227e-05, "loss": 0.4459, "step": 28030 }, { "epoch": 549.8, "learning_rate": 9.144062478405311e-05, "loss": 0.447, "step": 28040 }, { "epoch": 550.0, "learning_rate": 9.140108184047819e-05, "loss": 0.4473, "step": 28050 }, { "epoch": 550.0, "eval_loss": 0.4531325399875641, "eval_runtime": 2.2118, "eval_samples_per_second": 1030.365, "eval_steps_per_second": 4.069, "step": 28050 }, { "epoch": 550.2, "learning_rate": 9.136153410815314e-05, "loss": 0.4424, "step": 28060 }, { "epoch": 550.39, "learning_rate": 9.132198159862502e-05, "loss": 0.4469, "step": 28070 }, { "epoch": 550.59, "learning_rate": 9.128242432344232e-05, "loss": 0.4473, "step": 28080 }, { "epoch": 550.78, "learning_rate": 9.124286229415483e-05, "loss": 0.4431, "step": 28090 }, { "epoch": 550.98, "learning_rate": 9.12032955223138e-05, "loss": 0.4423, "step": 28100 }, { "epoch": 551.0, "eval_loss": 0.4461700916290283, "eval_runtime": 2.3161, "eval_samples_per_second": 983.994, "eval_steps_per_second": 3.886, "step": 28101 }, { "epoch": 551.18, "learning_rate": 9.116372401947184e-05, "loss": 0.4453, "step": 28110 }, { "epoch": 551.37, "learning_rate": 9.112414779718297e-05, "loss": 0.4445, "step": 28120 }, { "epoch": 551.57, "learning_rate": 9.108456686700254e-05, "loss": 0.4451, "step": 28130 }, { "epoch": 551.76, "learning_rate": 9.104498124048729e-05, "loss": 0.4423, "step": 28140 }, { "epoch": 551.96, "learning_rate": 9.100539092919539e-05, "loss": 0.4473, "step": 28150 }, { "epoch": 552.0, "eval_loss": 0.45376914739608765, "eval_runtime": 2.2299, "eval_samples_per_second": 1022.003, "eval_steps_per_second": 4.036, "step": 28152 }, { "epoch": 552.16, "learning_rate": 9.096579594468628e-05, "loss": 0.4518, "step": 28160 }, { "epoch": 552.35, "learning_rate": 9.092619629852082e-05, "loss": 0.4467, "step": 28170 }, { "epoch": 552.55, "learning_rate": 9.088659200226123e-05, "loss": 0.4415, "step": 28180 }, { "epoch": 552.75, "learning_rate": 9.084698306747106e-05, "loss": 0.4454, "step": 28190 }, { "epoch": 552.94, "learning_rate": 9.080736950571528e-05, "loss": 0.4463, "step": 28200 }, { "epoch": 553.0, "eval_loss": 0.44718244671821594, "eval_runtime": 2.2312, "eval_samples_per_second": 1021.403, "eval_steps_per_second": 4.034, "step": 28203 }, { "epoch": 553.14, "learning_rate": 9.076775132856014e-05, "loss": 0.4421, "step": 28210 }, { "epoch": 553.33, "learning_rate": 9.072812854757326e-05, "loss": 0.4447, "step": 28220 }, { "epoch": 553.53, "learning_rate": 9.068850117432362e-05, "loss": 0.4417, "step": 28230 }, { "epoch": 553.73, "learning_rate": 9.064886922038155e-05, "loss": 0.4433, "step": 28240 }, { "epoch": 553.92, "learning_rate": 9.060923269731863e-05, "loss": 0.4459, "step": 28250 }, { "epoch": 554.0, "eval_loss": 0.44858473539352417, "eval_runtime": 2.2955, "eval_samples_per_second": 992.806, "eval_steps_per_second": 3.921, "step": 28254 }, { "epoch": 554.12, "learning_rate": 9.056959161670789e-05, "loss": 0.4465, "step": 28260 }, { "epoch": 554.31, "learning_rate": 9.052994599012364e-05, "loss": 0.4401, "step": 28270 }, { "epoch": 554.51, "learning_rate": 9.049029582914152e-05, "loss": 0.4439, "step": 28280 }, { "epoch": 554.71, "learning_rate": 9.045064114533851e-05, "loss": 0.4463, "step": 28290 }, { "epoch": 554.9, "learning_rate": 9.041098195029282e-05, "loss": 0.4432, "step": 28300 }, { "epoch": 555.0, "eval_loss": 0.4470457136631012, "eval_runtime": 2.3026, "eval_samples_per_second": 989.739, "eval_steps_per_second": 3.909, "step": 28305 }, { "epoch": 555.1, "learning_rate": 9.037131825558412e-05, "loss": 0.4453, "step": 28310 }, { "epoch": 555.29, "learning_rate": 9.033165007279328e-05, "loss": 0.4443, "step": 28320 }, { "epoch": 555.49, "learning_rate": 9.029197741350259e-05, "loss": 0.4401, "step": 28330 }, { "epoch": 555.69, "learning_rate": 9.025230028929551e-05, "loss": 0.4378, "step": 28340 }, { "epoch": 555.88, "learning_rate": 9.021261871175689e-05, "loss": 0.4448, "step": 28350 }, { "epoch": 556.0, "eval_loss": 0.45223483443260193, "eval_runtime": 2.292, "eval_samples_per_second": 994.322, "eval_steps_per_second": 3.927, "step": 28356 }, { "epoch": 556.08, "learning_rate": 9.017293269247294e-05, "loss": 0.4481, "step": 28360 }, { "epoch": 556.27, "learning_rate": 9.0133242243031e-05, "loss": 0.448, "step": 28370 }, { "epoch": 556.47, "learning_rate": 9.009354737501981e-05, "loss": 0.4442, "step": 28380 }, { "epoch": 556.67, "learning_rate": 9.005384810002943e-05, "loss": 0.4417, "step": 28390 }, { "epoch": 556.86, "learning_rate": 9.001414442965111e-05, "loss": 0.4406, "step": 28400 }, { "epoch": 557.0, "eval_loss": 0.45280978083610535, "eval_runtime": 2.1811, "eval_samples_per_second": 1044.869, "eval_steps_per_second": 4.126, "step": 28407 }, { "epoch": 557.06, "learning_rate": 8.997443637547749e-05, "loss": 0.4386, "step": 28410 }, { "epoch": 557.25, "learning_rate": 8.99347239491024e-05, "loss": 0.4453, "step": 28420 }, { "epoch": 557.45, "learning_rate": 8.9895007162121e-05, "loss": 0.445, "step": 28430 }, { "epoch": 557.65, "learning_rate": 8.98552860261297e-05, "loss": 0.4407, "step": 28440 }, { "epoch": 557.84, "learning_rate": 8.981556055272618e-05, "loss": 0.4433, "step": 28450 }, { "epoch": 558.0, "eval_loss": 0.45024630427360535, "eval_runtime": 2.2208, "eval_samples_per_second": 1026.227, "eval_steps_per_second": 4.053, "step": 28458 }, { "epoch": 558.04, "learning_rate": 8.97758307535094e-05, "loss": 0.4461, "step": 28460 }, { "epoch": 558.24, "learning_rate": 8.973609664007956e-05, "loss": 0.4451, "step": 28470 }, { "epoch": 558.43, "learning_rate": 8.969635822403816e-05, "loss": 0.4448, "step": 28480 }, { "epoch": 558.63, "learning_rate": 8.965661551698793e-05, "loss": 0.4462, "step": 28490 }, { "epoch": 558.82, "learning_rate": 8.961686853053284e-05, "loss": 0.4447, "step": 28500 }, { "epoch": 559.0, "eval_loss": 0.44705361127853394, "eval_runtime": 2.3532, "eval_samples_per_second": 968.453, "eval_steps_per_second": 3.825, "step": 28509 }, { "epoch": 559.02, "learning_rate": 8.95771172762781e-05, "loss": 0.4436, "step": 28510 }, { "epoch": 559.22, "learning_rate": 8.953736176583024e-05, "loss": 0.4435, "step": 28520 }, { "epoch": 559.41, "learning_rate": 8.949760201079695e-05, "loss": 0.4488, "step": 28530 }, { "epoch": 559.61, "learning_rate": 8.945783802278721e-05, "loss": 0.4413, "step": 28540 }, { "epoch": 559.8, "learning_rate": 8.941806981341121e-05, "loss": 0.4442, "step": 28550 }, { "epoch": 560.0, "learning_rate": 8.937829739428038e-05, "loss": 0.4438, "step": 28560 }, { "epoch": 560.0, "eval_loss": 0.44999274611473083, "eval_runtime": 2.2349, "eval_samples_per_second": 1019.738, "eval_steps_per_second": 4.027, "step": 28560 }, { "epoch": 560.2, "learning_rate": 8.933852077700738e-05, "loss": 0.4454, "step": 28570 }, { "epoch": 560.39, "learning_rate": 8.929873997320608e-05, "loss": 0.4419, "step": 28580 }, { "epoch": 560.59, "learning_rate": 8.92589549944916e-05, "loss": 0.4426, "step": 28590 }, { "epoch": 560.78, "learning_rate": 8.921916585248027e-05, "loss": 0.4401, "step": 28600 }, { "epoch": 560.98, "learning_rate": 8.917937255878963e-05, "loss": 0.4433, "step": 28610 }, { "epoch": 561.0, "eval_loss": 0.4470755159854889, "eval_runtime": 2.354, "eval_samples_per_second": 968.157, "eval_steps_per_second": 3.823, "step": 28611 }, { "epoch": 561.18, "learning_rate": 8.913957512503844e-05, "loss": 0.4507, "step": 28620 }, { "epoch": 561.37, "learning_rate": 8.909977356284665e-05, "loss": 0.4438, "step": 28630 }, { "epoch": 561.57, "learning_rate": 8.905996788383543e-05, "loss": 0.4419, "step": 28640 }, { "epoch": 561.76, "learning_rate": 8.902015809962717e-05, "loss": 0.4417, "step": 28650 }, { "epoch": 561.96, "learning_rate": 8.898034422184542e-05, "loss": 0.4412, "step": 28660 }, { "epoch": 562.0, "eval_loss": 0.44909417629241943, "eval_runtime": 2.2227, "eval_samples_per_second": 1025.318, "eval_steps_per_second": 4.049, "step": 28662 }, { "epoch": 562.16, "learning_rate": 8.894052626211494e-05, "loss": 0.4466, "step": 28670 }, { "epoch": 562.35, "learning_rate": 8.890070423206171e-05, "loss": 0.4483, "step": 28680 }, { "epoch": 562.55, "learning_rate": 8.886087814331283e-05, "loss": 0.4412, "step": 28690 }, { "epoch": 562.75, "learning_rate": 8.882104800749671e-05, "loss": 0.4419, "step": 28700 }, { "epoch": 562.94, "learning_rate": 8.878121383624278e-05, "loss": 0.4357, "step": 28710 }, { "epoch": 563.0, "eval_loss": 0.4474387466907501, "eval_runtime": 2.317, "eval_samples_per_second": 983.597, "eval_steps_per_second": 3.884, "step": 28713 }, { "epoch": 563.14, "learning_rate": 8.874137564118174e-05, "loss": 0.4407, "step": 28720 }, { "epoch": 563.33, "learning_rate": 8.870153343394552e-05, "loss": 0.4416, "step": 28730 }, { "epoch": 563.53, "learning_rate": 8.866168722616707e-05, "loss": 0.4397, "step": 28740 }, { "epoch": 563.73, "learning_rate": 8.862183702948066e-05, "loss": 0.4414, "step": 28750 }, { "epoch": 563.92, "learning_rate": 8.858198285552164e-05, "loss": 0.4424, "step": 28760 }, { "epoch": 564.0, "eval_loss": 0.44805294275283813, "eval_runtime": 2.3439, "eval_samples_per_second": 972.294, "eval_steps_per_second": 3.84, "step": 28764 }, { "epoch": 564.12, "learning_rate": 8.854212471592652e-05, "loss": 0.4423, "step": 28770 }, { "epoch": 564.31, "learning_rate": 8.850226262233302e-05, "loss": 0.4466, "step": 28780 }, { "epoch": 564.51, "learning_rate": 8.846239658637994e-05, "loss": 0.4365, "step": 28790 }, { "epoch": 564.71, "learning_rate": 8.84225266197073e-05, "loss": 0.4449, "step": 28800 }, { "epoch": 564.9, "learning_rate": 8.838265273395625e-05, "loss": 0.4412, "step": 28810 }, { "epoch": 565.0, "eval_loss": 0.4479809105396271, "eval_runtime": 2.2726, "eval_samples_per_second": 1002.819, "eval_steps_per_second": 3.96, "step": 28815 }, { "epoch": 565.1, "learning_rate": 8.834277494076904e-05, "loss": 0.4408, "step": 28820 }, { "epoch": 565.29, "learning_rate": 8.830289325178915e-05, "loss": 0.4453, "step": 28830 }, { "epoch": 565.49, "learning_rate": 8.826300767866111e-05, "loss": 0.4493, "step": 28840 }, { "epoch": 565.69, "learning_rate": 8.822311823303061e-05, "loss": 0.4414, "step": 28850 }, { "epoch": 565.88, "learning_rate": 8.818322492654448e-05, "loss": 0.4483, "step": 28860 }, { "epoch": 566.0, "eval_loss": 0.4453369081020355, "eval_runtime": 2.2664, "eval_samples_per_second": 1005.562, "eval_steps_per_second": 3.971, "step": 28866 }, { "epoch": 566.08, "learning_rate": 8.81433277708507e-05, "loss": 0.4432, "step": 28870 }, { "epoch": 566.27, "learning_rate": 8.810342677759833e-05, "loss": 0.4364, "step": 28880 }, { "epoch": 566.47, "learning_rate": 8.80635219584376e-05, "loss": 0.439, "step": 28890 }, { "epoch": 566.67, "learning_rate": 8.802361332501978e-05, "loss": 0.4412, "step": 28900 }, { "epoch": 566.86, "learning_rate": 8.798370088899733e-05, "loss": 0.4397, "step": 28910 }, { "epoch": 567.0, "eval_loss": 0.4435195326805115, "eval_runtime": 2.2184, "eval_samples_per_second": 1027.336, "eval_steps_per_second": 4.057, "step": 28917 }, { "epoch": 567.06, "learning_rate": 8.794378466202377e-05, "loss": 0.4363, "step": 28920 }, { "epoch": 567.25, "learning_rate": 8.790386465575376e-05, "loss": 0.4437, "step": 28930 }, { "epoch": 567.45, "learning_rate": 8.78639408818431e-05, "loss": 0.4402, "step": 28940 }, { "epoch": 567.65, "learning_rate": 8.782401335194854e-05, "loss": 0.4403, "step": 28950 }, { "epoch": 567.84, "learning_rate": 8.778408207772813e-05, "loss": 0.4377, "step": 28960 }, { "epoch": 568.0, "eval_loss": 0.446013480424881, "eval_runtime": 2.2216, "eval_samples_per_second": 1025.817, "eval_steps_per_second": 4.051, "step": 28968 }, { "epoch": 568.04, "learning_rate": 8.774414707084085e-05, "loss": 0.4416, "step": 28970 }, { "epoch": 568.24, "learning_rate": 8.770420834294683e-05, "loss": 0.435, "step": 28980 }, { "epoch": 568.43, "learning_rate": 8.766426590570733e-05, "loss": 0.4426, "step": 28990 }, { "epoch": 568.63, "learning_rate": 8.762431977078461e-05, "loss": 0.4417, "step": 29000 }, { "epoch": 568.82, "learning_rate": 8.758436994984206e-05, "loss": 0.4424, "step": 29010 }, { "epoch": 569.0, "eval_loss": 0.4475310444831848, "eval_runtime": 2.2115, "eval_samples_per_second": 1030.531, "eval_steps_per_second": 4.07, "step": 29019 }, { "epoch": 569.02, "learning_rate": 8.754441645454416e-05, "loss": 0.4454, "step": 29020 }, { "epoch": 569.22, "learning_rate": 8.750445929655642e-05, "loss": 0.4375, "step": 29030 }, { "epoch": 569.41, "learning_rate": 8.746449848754546e-05, "loss": 0.44, "step": 29040 }, { "epoch": 569.61, "learning_rate": 8.74245340391789e-05, "loss": 0.4443, "step": 29050 }, { "epoch": 569.8, "learning_rate": 8.738456596312549e-05, "loss": 0.44, "step": 29060 }, { "epoch": 570.0, "learning_rate": 8.734459427105504e-05, "loss": 0.4412, "step": 29070 }, { "epoch": 570.0, "eval_loss": 0.44452720880508423, "eval_runtime": 2.2457, "eval_samples_per_second": 1014.851, "eval_steps_per_second": 4.008, "step": 29070 }, { "epoch": 570.2, "learning_rate": 8.730461897463838e-05, "loss": 0.4409, "step": 29080 }, { "epoch": 570.39, "learning_rate": 8.726464008554736e-05, "loss": 0.4418, "step": 29090 }, { "epoch": 570.59, "learning_rate": 8.7224657615455e-05, "loss": 0.4427, "step": 29100 }, { "epoch": 570.78, "learning_rate": 8.718467157603525e-05, "loss": 0.4463, "step": 29110 }, { "epoch": 570.98, "learning_rate": 8.714468197896313e-05, "loss": 0.4435, "step": 29120 }, { "epoch": 571.0, "eval_loss": 0.441842257976532, "eval_runtime": 2.1954, "eval_samples_per_second": 1038.08, "eval_steps_per_second": 4.099, "step": 29121 }, { "epoch": 571.18, "learning_rate": 8.710468883591474e-05, "loss": 0.44, "step": 29130 }, { "epoch": 571.37, "learning_rate": 8.706469215856715e-05, "loss": 0.4434, "step": 29140 }, { "epoch": 571.57, "learning_rate": 8.702469195859853e-05, "loss": 0.4397, "step": 29150 }, { "epoch": 571.76, "learning_rate": 8.698468824768803e-05, "loss": 0.4398, "step": 29160 }, { "epoch": 571.96, "learning_rate": 8.694468103751586e-05, "loss": 0.4398, "step": 29170 }, { "epoch": 572.0, "eval_loss": 0.4434479773044586, "eval_runtime": 2.2832, "eval_samples_per_second": 998.16, "eval_steps_per_second": 3.942, "step": 29172 }, { "epoch": 572.16, "learning_rate": 8.690467033976322e-05, "loss": 0.4408, "step": 29180 }, { "epoch": 572.35, "learning_rate": 8.686465616611232e-05, "loss": 0.4413, "step": 29190 }, { "epoch": 572.55, "learning_rate": 8.682463852824644e-05, "loss": 0.4385, "step": 29200 }, { "epoch": 572.75, "learning_rate": 8.678461743784983e-05, "loss": 0.4384, "step": 29210 }, { "epoch": 572.94, "learning_rate": 8.674459290660773e-05, "loss": 0.4427, "step": 29220 }, { "epoch": 573.0, "eval_loss": 0.44168439507484436, "eval_runtime": 2.2332, "eval_samples_per_second": 1020.522, "eval_steps_per_second": 4.03, "step": 29223 }, { "epoch": 573.14, "learning_rate": 8.670456494620645e-05, "loss": 0.4434, "step": 29230 }, { "epoch": 573.33, "learning_rate": 8.666453356833323e-05, "loss": 0.4396, "step": 29240 }, { "epoch": 573.53, "learning_rate": 8.662449878467637e-05, "loss": 0.4406, "step": 29250 }, { "epoch": 573.73, "learning_rate": 8.658446060692512e-05, "loss": 0.4434, "step": 29260 }, { "epoch": 573.92, "learning_rate": 8.65444190467697e-05, "loss": 0.4409, "step": 29270 }, { "epoch": 574.0, "eval_loss": 0.44099777936935425, "eval_runtime": 2.2519, "eval_samples_per_second": 1012.023, "eval_steps_per_second": 3.997, "step": 29274 }, { "epoch": 574.12, "learning_rate": 8.650437411590141e-05, "loss": 0.4399, "step": 29280 }, { "epoch": 574.31, "learning_rate": 8.646432582601244e-05, "loss": 0.4385, "step": 29290 }, { "epoch": 574.51, "learning_rate": 8.6424274188796e-05, "loss": 0.4378, "step": 29300 }, { "epoch": 574.71, "learning_rate": 8.63842192159463e-05, "loss": 0.4413, "step": 29310 }, { "epoch": 574.9, "learning_rate": 8.634416091915846e-05, "loss": 0.4425, "step": 29320 }, { "epoch": 575.0, "eval_loss": 0.4433988630771637, "eval_runtime": 2.3093, "eval_samples_per_second": 986.89, "eval_steps_per_second": 3.897, "step": 29325 }, { "epoch": 575.1, "learning_rate": 8.630409931012866e-05, "loss": 0.445, "step": 29330 }, { "epoch": 575.29, "learning_rate": 8.626403440055395e-05, "loss": 0.439, "step": 29340 }, { "epoch": 575.49, "learning_rate": 8.622396620213241e-05, "loss": 0.4458, "step": 29350 }, { "epoch": 575.69, "learning_rate": 8.618389472656305e-05, "loss": 0.4361, "step": 29360 }, { "epoch": 575.88, "learning_rate": 8.614381998554585e-05, "loss": 0.4402, "step": 29370 }, { "epoch": 576.0, "eval_loss": 0.4489006996154785, "eval_runtime": 2.2375, "eval_samples_per_second": 1018.567, "eval_steps_per_second": 4.022, "step": 29376 }, { "epoch": 576.08, "learning_rate": 8.610374199078179e-05, "loss": 0.4417, "step": 29380 }, { "epoch": 576.27, "learning_rate": 8.606366075397266e-05, "loss": 0.4436, "step": 29390 }, { "epoch": 576.47, "learning_rate": 8.602357628682135e-05, "loss": 0.4445, "step": 29400 }, { "epoch": 576.67, "learning_rate": 8.598348860103162e-05, "loss": 0.4367, "step": 29410 }, { "epoch": 576.86, "learning_rate": 8.594339770830815e-05, "loss": 0.4394, "step": 29420 }, { "epoch": 577.0, "eval_loss": 0.4435146450996399, "eval_runtime": 2.2075, "eval_samples_per_second": 1032.38, "eval_steps_per_second": 4.077, "step": 29427 }, { "epoch": 577.06, "learning_rate": 8.590330362035663e-05, "loss": 0.4387, "step": 29430 }, { "epoch": 577.25, "learning_rate": 8.58632063488836e-05, "loss": 0.4438, "step": 29440 }, { "epoch": 577.45, "learning_rate": 8.582310590559662e-05, "loss": 0.4413, "step": 29450 }, { "epoch": 577.65, "learning_rate": 8.578300230220408e-05, "loss": 0.4334, "step": 29460 }, { "epoch": 577.84, "learning_rate": 8.574289555041537e-05, "loss": 0.4379, "step": 29470 }, { "epoch": 578.0, "eval_loss": 0.4446564018726349, "eval_runtime": 2.2644, "eval_samples_per_second": 1006.439, "eval_steps_per_second": 3.975, "step": 29478 }, { "epoch": 578.04, "learning_rate": 8.570278566194071e-05, "loss": 0.4394, "step": 29480 }, { "epoch": 578.24, "learning_rate": 8.566267264849137e-05, "loss": 0.4376, "step": 29490 }, { "epoch": 578.43, "learning_rate": 8.56225565217794e-05, "loss": 0.4346, "step": 29500 }, { "epoch": 578.63, "learning_rate": 8.558243729351784e-05, "loss": 0.442, "step": 29510 }, { "epoch": 578.82, "learning_rate": 8.554231497542058e-05, "loss": 0.4391, "step": 29520 }, { "epoch": 579.0, "eval_loss": 0.44711729884147644, "eval_runtime": 2.3296, "eval_samples_per_second": 978.296, "eval_steps_per_second": 3.863, "step": 29529 }, { "epoch": 579.02, "learning_rate": 8.550218957920247e-05, "loss": 0.4372, "step": 29530 }, { "epoch": 579.22, "learning_rate": 8.546206111657923e-05, "loss": 0.4419, "step": 29540 }, { "epoch": 579.41, "learning_rate": 8.542192959926748e-05, "loss": 0.4369, "step": 29550 }, { "epoch": 579.61, "learning_rate": 8.538179503898471e-05, "loss": 0.4351, "step": 29560 }, { "epoch": 579.8, "learning_rate": 8.534165744744933e-05, "loss": 0.4397, "step": 29570 }, { "epoch": 580.0, "learning_rate": 8.530151683638061e-05, "loss": 0.4404, "step": 29580 }, { "epoch": 580.0, "eval_loss": 0.44352006912231445, "eval_runtime": 2.2363, "eval_samples_per_second": 1019.074, "eval_steps_per_second": 4.024, "step": 29580 }, { "epoch": 580.2, "learning_rate": 8.526137321749872e-05, "loss": 0.4387, "step": 29590 }, { "epoch": 580.39, "learning_rate": 8.522122660252471e-05, "loss": 0.4392, "step": 29600 }, { "epoch": 580.59, "learning_rate": 8.518107700318048e-05, "loss": 0.4391, "step": 29610 }, { "epoch": 580.78, "learning_rate": 8.514092443118883e-05, "loss": 0.4382, "step": 29620 }, { "epoch": 580.98, "learning_rate": 8.51007688982734e-05, "loss": 0.4399, "step": 29630 }, { "epoch": 581.0, "eval_loss": 0.4410766363143921, "eval_runtime": 2.2013, "eval_samples_per_second": 1035.315, "eval_steps_per_second": 4.089, "step": 29631 }, { "epoch": 581.18, "learning_rate": 8.506061041615872e-05, "loss": 0.4412, "step": 29640 }, { "epoch": 581.37, "learning_rate": 8.50204489965702e-05, "loss": 0.4364, "step": 29650 }, { "epoch": 581.57, "learning_rate": 8.498028465123402e-05, "loss": 0.439, "step": 29660 }, { "epoch": 581.76, "learning_rate": 8.494011739187732e-05, "loss": 0.4371, "step": 29670 }, { "epoch": 581.96, "learning_rate": 8.489994723022801e-05, "loss": 0.4353, "step": 29680 }, { "epoch": 582.0, "eval_loss": 0.4415852725505829, "eval_runtime": 2.2155, "eval_samples_per_second": 1028.655, "eval_steps_per_second": 4.062, "step": 29682 }, { "epoch": 582.16, "learning_rate": 8.485977417801492e-05, "loss": 0.4406, "step": 29690 }, { "epoch": 582.35, "learning_rate": 8.481959824696765e-05, "loss": 0.4348, "step": 29700 }, { "epoch": 582.55, "learning_rate": 8.47794194488167e-05, "loss": 0.4389, "step": 29710 }, { "epoch": 582.75, "learning_rate": 8.473923779529337e-05, "loss": 0.4417, "step": 29720 }, { "epoch": 582.94, "learning_rate": 8.469905329812981e-05, "loss": 0.4417, "step": 29730 }, { "epoch": 583.0, "eval_loss": 0.4416983425617218, "eval_runtime": 2.1672, "eval_samples_per_second": 1051.592, "eval_steps_per_second": 4.153, "step": 29733 }, { "epoch": 583.14, "learning_rate": 8.4658865969059e-05, "loss": 0.4421, "step": 29740 }, { "epoch": 583.33, "learning_rate": 8.461867581981472e-05, "loss": 0.4421, "step": 29750 }, { "epoch": 583.53, "learning_rate": 8.457848286213166e-05, "loss": 0.435, "step": 29760 }, { "epoch": 583.73, "learning_rate": 8.453828710774517e-05, "loss": 0.4389, "step": 29770 }, { "epoch": 583.92, "learning_rate": 8.44980885683916e-05, "loss": 0.4389, "step": 29780 }, { "epoch": 584.0, "eval_loss": 0.4399338662624359, "eval_runtime": 2.2366, "eval_samples_per_second": 1018.961, "eval_steps_per_second": 4.024, "step": 29784 }, { "epoch": 584.12, "learning_rate": 8.4457887255808e-05, "loss": 0.4362, "step": 29790 }, { "epoch": 584.31, "learning_rate": 8.441768318173226e-05, "loss": 0.4391, "step": 29800 }, { "epoch": 584.51, "learning_rate": 8.437747635790304e-05, "loss": 0.4367, "step": 29810 }, { "epoch": 584.71, "learning_rate": 8.433726679605987e-05, "loss": 0.4409, "step": 29820 }, { "epoch": 584.9, "learning_rate": 8.429705450794304e-05, "loss": 0.4378, "step": 29830 }, { "epoch": 585.0, "eval_loss": 0.44315850734710693, "eval_runtime": 2.3103, "eval_samples_per_second": 986.471, "eval_steps_per_second": 3.896, "step": 29835 }, { "epoch": 585.1, "learning_rate": 8.425683950529364e-05, "loss": 0.4343, "step": 29840 }, { "epoch": 585.29, "learning_rate": 8.421662179985356e-05, "loss": 0.4368, "step": 29850 }, { "epoch": 585.49, "learning_rate": 8.417640140336546e-05, "loss": 0.4365, "step": 29860 }, { "epoch": 585.69, "learning_rate": 8.413617832757278e-05, "loss": 0.4407, "step": 29870 }, { "epoch": 585.88, "learning_rate": 8.409595258421981e-05, "loss": 0.439, "step": 29880 }, { "epoch": 586.0, "eval_loss": 0.44265684485435486, "eval_runtime": 2.3136, "eval_samples_per_second": 985.054, "eval_steps_per_second": 3.89, "step": 29886 }, { "epoch": 586.08, "learning_rate": 8.405572418505156e-05, "loss": 0.4379, "step": 29890 }, { "epoch": 586.27, "learning_rate": 8.401549314181376e-05, "loss": 0.4392, "step": 29900 }, { "epoch": 586.47, "learning_rate": 8.397525946625307e-05, "loss": 0.438, "step": 29910 }, { "epoch": 586.67, "learning_rate": 8.393502317011676e-05, "loss": 0.4313, "step": 29920 }, { "epoch": 586.86, "learning_rate": 8.389478426515299e-05, "loss": 0.431, "step": 29930 }, { "epoch": 587.0, "eval_loss": 0.4403259754180908, "eval_runtime": 2.3194, "eval_samples_per_second": 982.582, "eval_steps_per_second": 3.88, "step": 29937 }, { "epoch": 587.06, "learning_rate": 8.385454276311057e-05, "loss": 0.4365, "step": 29940 }, { "epoch": 587.25, "learning_rate": 8.381429867573911e-05, "loss": 0.4384, "step": 29950 }, { "epoch": 587.45, "learning_rate": 8.377405201478904e-05, "loss": 0.433, "step": 29960 }, { "epoch": 587.65, "learning_rate": 8.373380279201146e-05, "loss": 0.4378, "step": 29970 }, { "epoch": 587.84, "learning_rate": 8.369355101915824e-05, "loss": 0.4348, "step": 29980 }, { "epoch": 588.0, "eval_loss": 0.4408820867538452, "eval_runtime": 2.3452, "eval_samples_per_second": 971.782, "eval_steps_per_second": 3.838, "step": 29988 }, { "epoch": 588.04, "learning_rate": 8.365329670798203e-05, "loss": 0.435, "step": 29990 }, { "epoch": 588.24, "learning_rate": 8.361303987023614e-05, "loss": 0.4375, "step": 30000 }, { "epoch": 588.43, "learning_rate": 8.357278051767472e-05, "loss": 0.4412, "step": 30010 }, { "epoch": 588.63, "learning_rate": 8.353251866205257e-05, "loss": 0.4345, "step": 30020 }, { "epoch": 588.82, "learning_rate": 8.349225431512524e-05, "loss": 0.4363, "step": 30030 }, { "epoch": 589.0, "eval_loss": 0.44250038266181946, "eval_runtime": 2.2027, "eval_samples_per_second": 1034.662, "eval_steps_per_second": 4.086, "step": 30039 }, { "epoch": 589.02, "learning_rate": 8.345198748864909e-05, "loss": 0.4344, "step": 30040 }, { "epoch": 589.22, "learning_rate": 8.341171819438106e-05, "loss": 0.4384, "step": 30050 }, { "epoch": 589.41, "learning_rate": 8.337144644407893e-05, "loss": 0.4389, "step": 30060 }, { "epoch": 589.61, "learning_rate": 8.333117224950114e-05, "loss": 0.4378, "step": 30070 }, { "epoch": 589.8, "learning_rate": 8.329089562240686e-05, "loss": 0.4367, "step": 30080 }, { "epoch": 590.0, "learning_rate": 8.325061657455594e-05, "loss": 0.4399, "step": 30090 }, { "epoch": 590.0, "eval_loss": 0.4393501877784729, "eval_runtime": 2.3569, "eval_samples_per_second": 966.933, "eval_steps_per_second": 3.819, "step": 30090 }, { "epoch": 590.2, "learning_rate": 8.3210335117709e-05, "loss": 0.4309, "step": 30100 }, { "epoch": 590.39, "learning_rate": 8.317005126362731e-05, "loss": 0.4375, "step": 30110 }, { "epoch": 590.59, "learning_rate": 8.312976502407288e-05, "loss": 0.435, "step": 30120 }, { "epoch": 590.78, "learning_rate": 8.308947641080836e-05, "loss": 0.4352, "step": 30130 }, { "epoch": 590.98, "learning_rate": 8.304918543559715e-05, "loss": 0.4342, "step": 30140 }, { "epoch": 591.0, "eval_loss": 0.4411936104297638, "eval_runtime": 2.2081, "eval_samples_per_second": 1032.125, "eval_steps_per_second": 4.076, "step": 30141 }, { "epoch": 591.18, "learning_rate": 8.300889211020331e-05, "loss": 0.4317, "step": 30150 }, { "epoch": 591.37, "learning_rate": 8.296859644639157e-05, "loss": 0.4366, "step": 30160 }, { "epoch": 591.57, "learning_rate": 8.292829845592739e-05, "loss": 0.4366, "step": 30170 }, { "epoch": 591.76, "learning_rate": 8.288799815057689e-05, "loss": 0.4329, "step": 30180 }, { "epoch": 591.96, "learning_rate": 8.284769554210685e-05, "loss": 0.4342, "step": 30190 }, { "epoch": 592.0, "eval_loss": 0.4399246275424957, "eval_runtime": 2.2423, "eval_samples_per_second": 1016.359, "eval_steps_per_second": 4.014, "step": 30192 }, { "epoch": 592.16, "learning_rate": 8.280739064228471e-05, "loss": 0.4357, "step": 30200 }, { "epoch": 592.35, "learning_rate": 8.276708346287865e-05, "loss": 0.4347, "step": 30210 }, { "epoch": 592.55, "learning_rate": 8.272677401565742e-05, "loss": 0.4355, "step": 30220 }, { "epoch": 592.75, "learning_rate": 8.268646231239052e-05, "loss": 0.4405, "step": 30230 }, { "epoch": 592.94, "learning_rate": 8.264614836484803e-05, "loss": 0.4348, "step": 30240 }, { "epoch": 593.0, "eval_loss": 0.441998690366745, "eval_runtime": 2.2767, "eval_samples_per_second": 1001.018, "eval_steps_per_second": 3.953, "step": 30243 }, { "epoch": 593.14, "learning_rate": 8.260583218480075e-05, "loss": 0.4352, "step": 30250 }, { "epoch": 593.33, "learning_rate": 8.256551378402012e-05, "loss": 0.4402, "step": 30260 }, { "epoch": 593.53, "learning_rate": 8.252519317427817e-05, "loss": 0.436, "step": 30270 }, { "epoch": 593.73, "learning_rate": 8.248487036734766e-05, "loss": 0.4357, "step": 30280 }, { "epoch": 593.92, "learning_rate": 8.244454537500189e-05, "loss": 0.4326, "step": 30290 }, { "epoch": 594.0, "eval_loss": 0.44458866119384766, "eval_runtime": 2.2919, "eval_samples_per_second": 994.37, "eval_steps_per_second": 3.927, "step": 30294 }, { "epoch": 594.12, "learning_rate": 8.240421820901495e-05, "loss": 0.4314, "step": 30300 }, { "epoch": 594.31, "learning_rate": 8.23638888811614e-05, "loss": 0.4389, "step": 30310 }, { "epoch": 594.51, "learning_rate": 8.232355740321651e-05, "loss": 0.4343, "step": 30320 }, { "epoch": 594.71, "learning_rate": 8.228322378695622e-05, "loss": 0.433, "step": 30330 }, { "epoch": 594.9, "learning_rate": 8.2242888044157e-05, "loss": 0.4333, "step": 30340 }, { "epoch": 595.0, "eval_loss": 0.44296392798423767, "eval_runtime": 2.1971, "eval_samples_per_second": 1037.271, "eval_steps_per_second": 4.096, "step": 30345 }, { "epoch": 595.1, "learning_rate": 8.220255018659601e-05, "loss": 0.4384, "step": 30350 }, { "epoch": 595.29, "learning_rate": 8.2162210226051e-05, "loss": 0.4359, "step": 30360 }, { "epoch": 595.49, "learning_rate": 8.212186817430031e-05, "loss": 0.4307, "step": 30370 }, { "epoch": 595.69, "learning_rate": 8.208152404312299e-05, "loss": 0.4335, "step": 30380 }, { "epoch": 595.88, "learning_rate": 8.204117784429856e-05, "loss": 0.4336, "step": 30390 }, { "epoch": 596.0, "eval_loss": 0.4396732747554779, "eval_runtime": 2.2713, "eval_samples_per_second": 1003.396, "eval_steps_per_second": 3.963, "step": 30396 }, { "epoch": 596.08, "learning_rate": 8.200082958960723e-05, "loss": 0.4356, "step": 30400 }, { "epoch": 596.27, "learning_rate": 8.196047929082981e-05, "loss": 0.4374, "step": 30410 }, { "epoch": 596.47, "learning_rate": 8.192012695974765e-05, "loss": 0.4351, "step": 30420 }, { "epoch": 596.67, "learning_rate": 8.187977260814275e-05, "loss": 0.431, "step": 30430 }, { "epoch": 596.86, "learning_rate": 8.183941624779769e-05, "loss": 0.4314, "step": 30440 }, { "epoch": 597.0, "eval_loss": 0.44181305170059204, "eval_runtime": 2.2614, "eval_samples_per_second": 1007.787, "eval_steps_per_second": 3.98, "step": 30447 }, { "epoch": 597.06, "learning_rate": 8.179905789049561e-05, "loss": 0.4307, "step": 30450 }, { "epoch": 597.25, "learning_rate": 8.175869754802028e-05, "loss": 0.4337, "step": 30460 }, { "epoch": 597.45, "learning_rate": 8.1718335232156e-05, "loss": 0.4334, "step": 30470 }, { "epoch": 597.65, "learning_rate": 8.167797095468766e-05, "loss": 0.4348, "step": 30480 }, { "epoch": 597.84, "learning_rate": 8.163760472740073e-05, "loss": 0.4371, "step": 30490 }, { "epoch": 598.0, "eval_loss": 0.441135048866272, "eval_runtime": 2.2413, "eval_samples_per_second": 1016.834, "eval_steps_per_second": 4.016, "step": 30498 }, { "epoch": 598.04, "learning_rate": 8.159723656208126e-05, "loss": 0.436, "step": 30500 }, { "epoch": 598.24, "learning_rate": 8.155686647051584e-05, "loss": 0.4382, "step": 30510 }, { "epoch": 598.43, "learning_rate": 8.151649446449163e-05, "loss": 0.4335, "step": 30520 }, { "epoch": 598.63, "learning_rate": 8.147612055579639e-05, "loss": 0.434, "step": 30530 }, { "epoch": 598.82, "learning_rate": 8.143574475621837e-05, "loss": 0.4333, "step": 30540 }, { "epoch": 599.0, "eval_loss": 0.4385489225387573, "eval_runtime": 2.2156, "eval_samples_per_second": 1028.626, "eval_steps_per_second": 4.062, "step": 30549 }, { "epoch": 599.02, "learning_rate": 8.139536707754641e-05, "loss": 0.4423, "step": 30550 }, { "epoch": 599.22, "learning_rate": 8.13549875315699e-05, "loss": 0.4333, "step": 30560 }, { "epoch": 599.41, "learning_rate": 8.131460613007875e-05, "loss": 0.4335, "step": 30570 }, { "epoch": 599.61, "learning_rate": 8.127422288486345e-05, "loss": 0.432, "step": 30580 }, { "epoch": 599.8, "learning_rate": 8.123383780771498e-05, "loss": 0.4281, "step": 30590 }, { "epoch": 600.0, "learning_rate": 8.119345091042493e-05, "loss": 0.4337, "step": 30600 }, { "epoch": 600.0, "eval_loss": 0.43944406509399414, "eval_runtime": 2.3431, "eval_samples_per_second": 972.654, "eval_steps_per_second": 3.841, "step": 30600 }, { "epoch": 600.2, "learning_rate": 8.115306220478532e-05, "loss": 0.4308, "step": 30610 }, { "epoch": 600.39, "learning_rate": 8.111267170258878e-05, "loss": 0.4389, "step": 30620 }, { "epoch": 600.59, "learning_rate": 8.107227941562841e-05, "loss": 0.4416, "step": 30630 }, { "epoch": 600.78, "learning_rate": 8.103188535569788e-05, "loss": 0.4374, "step": 30640 }, { "epoch": 600.98, "learning_rate": 8.099148953459137e-05, "loss": 0.4371, "step": 30650 }, { "epoch": 601.0, "eval_loss": 0.44066575169563293, "eval_runtime": 2.2914, "eval_samples_per_second": 994.578, "eval_steps_per_second": 3.928, "step": 30651 }, { "epoch": 601.18, "learning_rate": 8.095109196410353e-05, "loss": 0.4357, "step": 30660 }, { "epoch": 601.37, "learning_rate": 8.091069265602957e-05, "loss": 0.4378, "step": 30670 }, { "epoch": 601.57, "learning_rate": 8.087029162216514e-05, "loss": 0.435, "step": 30680 }, { "epoch": 601.76, "learning_rate": 8.082988887430652e-05, "loss": 0.4314, "step": 30690 }, { "epoch": 601.96, "learning_rate": 8.078948442425035e-05, "loss": 0.4294, "step": 30700 }, { "epoch": 602.0, "eval_loss": 0.43954363465309143, "eval_runtime": 2.1748, "eval_samples_per_second": 1047.926, "eval_steps_per_second": 4.138, "step": 30702 }, { "epoch": 602.16, "learning_rate": 8.074907828379383e-05, "loss": 0.4387, "step": 30710 }, { "epoch": 602.35, "learning_rate": 8.070867046473468e-05, "loss": 0.431, "step": 30720 }, { "epoch": 602.55, "learning_rate": 8.066826097887109e-05, "loss": 0.4358, "step": 30730 }, { "epoch": 602.75, "learning_rate": 8.062784983800169e-05, "loss": 0.437, "step": 30740 }, { "epoch": 602.94, "learning_rate": 8.058743705392566e-05, "loss": 0.4323, "step": 30750 }, { "epoch": 603.0, "eval_loss": 0.4404396712779999, "eval_runtime": 2.3305, "eval_samples_per_second": 977.881, "eval_steps_per_second": 3.862, "step": 30753 }, { "epoch": 603.14, "learning_rate": 8.054702263844258e-05, "loss": 0.4352, "step": 30760 }, { "epoch": 603.33, "learning_rate": 8.050660660335264e-05, "loss": 0.4357, "step": 30770 }, { "epoch": 603.53, "learning_rate": 8.046618896045638e-05, "loss": 0.4345, "step": 30780 }, { "epoch": 603.73, "learning_rate": 8.042576972155484e-05, "loss": 0.4309, "step": 30790 }, { "epoch": 603.92, "learning_rate": 8.038534889844957e-05, "loss": 0.4303, "step": 30800 }, { "epoch": 604.0, "eval_loss": 0.44217541813850403, "eval_runtime": 2.3094, "eval_samples_per_second": 986.824, "eval_steps_per_second": 3.897, "step": 30804 }, { "epoch": 604.12, "learning_rate": 8.03449265029425e-05, "loss": 0.4332, "step": 30810 }, { "epoch": 604.31, "learning_rate": 8.030450254683612e-05, "loss": 0.4369, "step": 30820 }, { "epoch": 604.51, "learning_rate": 8.026407704193327e-05, "loss": 0.4384, "step": 30830 }, { "epoch": 604.71, "learning_rate": 8.022365000003734e-05, "loss": 0.4343, "step": 30840 }, { "epoch": 604.9, "learning_rate": 8.01832214329521e-05, "loss": 0.4325, "step": 30850 }, { "epoch": 605.0, "eval_loss": 0.4375738203525543, "eval_runtime": 2.1954, "eval_samples_per_second": 1038.103, "eval_steps_per_second": 4.1, "step": 30855 }, { "epoch": 605.1, "learning_rate": 8.014279135248181e-05, "loss": 0.437, "step": 30860 }, { "epoch": 605.29, "learning_rate": 8.010235977043112e-05, "loss": 0.4353, "step": 30870 }, { "epoch": 605.49, "learning_rate": 8.006192669860521e-05, "loss": 0.4295, "step": 30880 }, { "epoch": 605.69, "learning_rate": 8.002149214880955e-05, "loss": 0.4342, "step": 30890 }, { "epoch": 605.88, "learning_rate": 7.99810561328502e-05, "loss": 0.44, "step": 30900 }, { "epoch": 606.0, "eval_loss": 0.4398665130138397, "eval_runtime": 2.2661, "eval_samples_per_second": 1005.707, "eval_steps_per_second": 3.972, "step": 30906 }, { "epoch": 606.08, "learning_rate": 7.994061866253355e-05, "loss": 0.436, "step": 30910 }, { "epoch": 606.27, "learning_rate": 7.990017974966642e-05, "loss": 0.433, "step": 30920 }, { "epoch": 606.47, "learning_rate": 7.98597394060561e-05, "loss": 0.4341, "step": 30930 }, { "epoch": 606.67, "learning_rate": 7.981929764351026e-05, "loss": 0.4318, "step": 30940 }, { "epoch": 606.86, "learning_rate": 7.977885447383698e-05, "loss": 0.4343, "step": 30950 }, { "epoch": 607.0, "eval_loss": 0.4403430223464966, "eval_runtime": 2.2274, "eval_samples_per_second": 1023.166, "eval_steps_per_second": 4.041, "step": 30957 }, { "epoch": 607.06, "learning_rate": 7.973840990884477e-05, "loss": 0.4345, "step": 30960 }, { "epoch": 607.25, "learning_rate": 7.969796396034253e-05, "loss": 0.4375, "step": 30970 }, { "epoch": 607.45, "learning_rate": 7.965751664013962e-05, "loss": 0.4318, "step": 30980 }, { "epoch": 607.65, "learning_rate": 7.961706796004572e-05, "loss": 0.4307, "step": 30990 }, { "epoch": 607.84, "learning_rate": 7.957661793187091e-05, "loss": 0.4313, "step": 31000 }, { "epoch": 608.0, "eval_loss": 0.43968504667282104, "eval_runtime": 2.2198, "eval_samples_per_second": 1026.684, "eval_steps_per_second": 4.054, "step": 31008 }, { "epoch": 608.04, "learning_rate": 7.953616656742579e-05, "loss": 0.4283, "step": 31010 }, { "epoch": 608.24, "learning_rate": 7.949571387852114e-05, "loss": 0.4336, "step": 31020 }, { "epoch": 608.43, "learning_rate": 7.945525987696835e-05, "loss": 0.4313, "step": 31030 }, { "epoch": 608.63, "learning_rate": 7.941480457457901e-05, "loss": 0.4299, "step": 31040 }, { "epoch": 608.82, "learning_rate": 7.937434798316518e-05, "loss": 0.4338, "step": 31050 }, { "epoch": 609.0, "eval_loss": 0.4378510117530823, "eval_runtime": 2.2222, "eval_samples_per_second": 1025.549, "eval_steps_per_second": 4.05, "step": 31059 }, { "epoch": 609.02, "learning_rate": 7.933389011453933e-05, "loss": 0.4263, "step": 31060 }, { "epoch": 609.22, "learning_rate": 7.929343098051422e-05, "loss": 0.4302, "step": 31070 }, { "epoch": 609.41, "learning_rate": 7.9252970592903e-05, "loss": 0.4299, "step": 31080 }, { "epoch": 609.61, "learning_rate": 7.921250896351922e-05, "loss": 0.4326, "step": 31090 }, { "epoch": 609.8, "learning_rate": 7.917204610417677e-05, "loss": 0.4275, "step": 31100 }, { "epoch": 610.0, "learning_rate": 7.91315820266899e-05, "loss": 0.4299, "step": 31110 }, { "epoch": 610.0, "eval_loss": 0.4349246919155121, "eval_runtime": 2.3617, "eval_samples_per_second": 964.973, "eval_steps_per_second": 3.811, "step": 31110 }, { "epoch": 610.2, "learning_rate": 7.909111674287323e-05, "loss": 0.4344, "step": 31120 }, { "epoch": 610.39, "learning_rate": 7.905065026454171e-05, "loss": 0.4368, "step": 31130 }, { "epoch": 610.59, "learning_rate": 7.901018260351064e-05, "loss": 0.4281, "step": 31140 }, { "epoch": 610.78, "learning_rate": 7.896971377159571e-05, "loss": 0.4305, "step": 31150 }, { "epoch": 610.98, "learning_rate": 7.892924378061289e-05, "loss": 0.4325, "step": 31160 }, { "epoch": 611.0, "eval_loss": 0.4369864761829376, "eval_runtime": 2.1774, "eval_samples_per_second": 1046.683, "eval_steps_per_second": 4.133, "step": 31161 }, { "epoch": 611.18, "learning_rate": 7.88887726423785e-05, "loss": 0.4301, "step": 31170 }, { "epoch": 611.37, "learning_rate": 7.884830036870921e-05, "loss": 0.427, "step": 31180 }, { "epoch": 611.57, "learning_rate": 7.880782697142207e-05, "loss": 0.4304, "step": 31190 }, { "epoch": 611.76, "learning_rate": 7.876735246233437e-05, "loss": 0.4378, "step": 31200 }, { "epoch": 611.96, "learning_rate": 7.872687685326375e-05, "loss": 0.429, "step": 31210 }, { "epoch": 612.0, "eval_loss": 0.43705418705940247, "eval_runtime": 2.3129, "eval_samples_per_second": 985.344, "eval_steps_per_second": 3.891, "step": 31212 }, { "epoch": 612.16, "learning_rate": 7.868640015602824e-05, "loss": 0.4303, "step": 31220 }, { "epoch": 612.35, "learning_rate": 7.864592238244607e-05, "loss": 0.4328, "step": 31230 }, { "epoch": 612.55, "learning_rate": 7.86054435443359e-05, "loss": 0.4279, "step": 31240 }, { "epoch": 612.75, "learning_rate": 7.85649636535166e-05, "loss": 0.4313, "step": 31250 }, { "epoch": 612.94, "learning_rate": 7.852448272180744e-05, "loss": 0.4291, "step": 31260 }, { "epoch": 613.0, "eval_loss": 0.42991194128990173, "eval_runtime": 2.234, "eval_samples_per_second": 1020.124, "eval_steps_per_second": 4.029, "step": 31263 }, { "epoch": 613.14, "learning_rate": 7.848400076102792e-05, "loss": 0.4317, "step": 31270 }, { "epoch": 613.33, "learning_rate": 7.844351778299788e-05, "loss": 0.4335, "step": 31280 }, { "epoch": 613.53, "learning_rate": 7.840303379953746e-05, "loss": 0.4325, "step": 31290 }, { "epoch": 613.73, "learning_rate": 7.836254882246704e-05, "loss": 0.4302, "step": 31300 }, { "epoch": 613.92, "learning_rate": 7.832206286360736e-05, "loss": 0.4349, "step": 31310 }, { "epoch": 614.0, "eval_loss": 0.43643268942832947, "eval_runtime": 2.2319, "eval_samples_per_second": 1021.095, "eval_steps_per_second": 4.032, "step": 31314 }, { "epoch": 614.12, "learning_rate": 7.828157593477942e-05, "loss": 0.4316, "step": 31320 }, { "epoch": 614.31, "learning_rate": 7.82410880478045e-05, "loss": 0.4332, "step": 31330 }, { "epoch": 614.51, "learning_rate": 7.820059921450414e-05, "loss": 0.431, "step": 31340 }, { "epoch": 614.71, "learning_rate": 7.816010944670021e-05, "loss": 0.435, "step": 31350 }, { "epoch": 614.9, "learning_rate": 7.811961875621478e-05, "loss": 0.4308, "step": 31360 }, { "epoch": 615.0, "eval_loss": 0.43355175852775574, "eval_runtime": 2.3224, "eval_samples_per_second": 981.333, "eval_steps_per_second": 3.875, "step": 31365 }, { "epoch": 615.1, "learning_rate": 7.807912715487025e-05, "loss": 0.4322, "step": 31370 }, { "epoch": 615.29, "learning_rate": 7.803863465448927e-05, "loss": 0.4239, "step": 31380 }, { "epoch": 615.49, "learning_rate": 7.799814126689471e-05, "loss": 0.4337, "step": 31390 }, { "epoch": 615.69, "learning_rate": 7.79576470039098e-05, "loss": 0.4332, "step": 31400 }, { "epoch": 615.88, "learning_rate": 7.791715187735792e-05, "loss": 0.4305, "step": 31410 }, { "epoch": 616.0, "eval_loss": 0.4343326687812805, "eval_runtime": 2.3142, "eval_samples_per_second": 984.779, "eval_steps_per_second": 3.889, "step": 31416 }, { "epoch": 616.08, "learning_rate": 7.787665589906275e-05, "loss": 0.4302, "step": 31420 }, { "epoch": 616.27, "learning_rate": 7.783615908084822e-05, "loss": 0.4308, "step": 31430 }, { "epoch": 616.47, "learning_rate": 7.779566143453846e-05, "loss": 0.4339, "step": 31440 }, { "epoch": 616.67, "learning_rate": 7.775516297195794e-05, "loss": 0.4281, "step": 31450 }, { "epoch": 616.86, "learning_rate": 7.771466370493127e-05, "loss": 0.4267, "step": 31460 }, { "epoch": 617.0, "eval_loss": 0.4391220808029175, "eval_runtime": 2.3012, "eval_samples_per_second": 990.354, "eval_steps_per_second": 3.911, "step": 31467 }, { "epoch": 617.06, "learning_rate": 7.767416364528332e-05, "loss": 0.4367, "step": 31470 }, { "epoch": 617.25, "learning_rate": 7.763366280483926e-05, "loss": 0.4325, "step": 31480 }, { "epoch": 617.45, "learning_rate": 7.759316119542437e-05, "loss": 0.4279, "step": 31490 }, { "epoch": 617.65, "learning_rate": 7.755265882886426e-05, "loss": 0.4301, "step": 31500 }, { "epoch": 617.84, "learning_rate": 7.75121557169847e-05, "loss": 0.4329, "step": 31510 }, { "epoch": 618.0, "eval_loss": 0.43645963072776794, "eval_runtime": 2.1947, "eval_samples_per_second": 1038.398, "eval_steps_per_second": 4.101, "step": 31518 }, { "epoch": 618.04, "learning_rate": 7.747165187161168e-05, "loss": 0.4271, "step": 31520 }, { "epoch": 618.24, "learning_rate": 7.743114730457145e-05, "loss": 0.428, "step": 31530 }, { "epoch": 618.43, "learning_rate": 7.739064202769044e-05, "loss": 0.4333, "step": 31540 }, { "epoch": 618.63, "learning_rate": 7.735013605279525e-05, "loss": 0.4311, "step": 31550 }, { "epoch": 618.82, "learning_rate": 7.730962939171278e-05, "loss": 0.4269, "step": 31560 }, { "epoch": 619.0, "eval_loss": 0.43330323696136475, "eval_runtime": 2.2191, "eval_samples_per_second": 1026.991, "eval_steps_per_second": 4.056, "step": 31569 }, { "epoch": 619.02, "learning_rate": 7.726912205627e-05, "loss": 0.4339, "step": 31570 }, { "epoch": 619.22, "learning_rate": 7.722861405829422e-05, "loss": 0.4303, "step": 31580 }, { "epoch": 619.41, "learning_rate": 7.718810540961281e-05, "loss": 0.43, "step": 31590 }, { "epoch": 619.61, "learning_rate": 7.714759612205342e-05, "loss": 0.4249, "step": 31600 }, { "epoch": 619.8, "learning_rate": 7.710708620744387e-05, "loss": 0.4292, "step": 31610 }, { "epoch": 620.0, "learning_rate": 7.706657567761216e-05, "loss": 0.4251, "step": 31620 }, { "epoch": 620.0, "eval_loss": 0.4343318045139313, "eval_runtime": 2.1695, "eval_samples_per_second": 1050.467, "eval_steps_per_second": 4.148, "step": 31620 }, { "epoch": 620.2, "learning_rate": 7.702606454438641e-05, "loss": 0.4296, "step": 31630 }, { "epoch": 620.39, "learning_rate": 7.698555281959501e-05, "loss": 0.4276, "step": 31640 }, { "epoch": 620.59, "learning_rate": 7.694504051506647e-05, "loss": 0.4316, "step": 31650 }, { "epoch": 620.78, "learning_rate": 7.690452764262947e-05, "loss": 0.4313, "step": 31660 }, { "epoch": 620.98, "learning_rate": 7.686401421411288e-05, "loss": 0.427, "step": 31670 }, { "epoch": 621.0, "eval_loss": 0.4344363510608673, "eval_runtime": 2.2851, "eval_samples_per_second": 997.328, "eval_steps_per_second": 3.939, "step": 31671 }, { "epoch": 621.18, "learning_rate": 7.68235002413457e-05, "loss": 0.427, "step": 31680 }, { "epoch": 621.37, "learning_rate": 7.678298573615714e-05, "loss": 0.4335, "step": 31690 }, { "epoch": 621.57, "learning_rate": 7.67424707103765e-05, "loss": 0.4321, "step": 31700 }, { "epoch": 621.76, "learning_rate": 7.670195517583325e-05, "loss": 0.4297, "step": 31710 }, { "epoch": 621.96, "learning_rate": 7.666143914435709e-05, "loss": 0.4327, "step": 31720 }, { "epoch": 622.0, "eval_loss": 0.434471994638443, "eval_runtime": 2.1731, "eval_samples_per_second": 1048.718, "eval_steps_per_second": 4.141, "step": 31722 }, { "epoch": 622.16, "learning_rate": 7.662092262777771e-05, "loss": 0.4279, "step": 31730 }, { "epoch": 622.35, "learning_rate": 7.658040563792508e-05, "loss": 0.4317, "step": 31740 }, { "epoch": 622.55, "learning_rate": 7.653988818662927e-05, "loss": 0.4361, "step": 31750 }, { "epoch": 622.75, "learning_rate": 7.649937028572046e-05, "loss": 0.4314, "step": 31760 }, { "epoch": 622.94, "learning_rate": 7.645885194702896e-05, "loss": 0.4263, "step": 31770 }, { "epoch": 623.0, "eval_loss": 0.4369990825653076, "eval_runtime": 2.2753, "eval_samples_per_second": 1001.637, "eval_steps_per_second": 3.956, "step": 31773 }, { "epoch": 623.14, "learning_rate": 7.641833318238519e-05, "loss": 0.429, "step": 31780 }, { "epoch": 623.33, "learning_rate": 7.63778140036198e-05, "loss": 0.4323, "step": 31790 }, { "epoch": 623.53, "learning_rate": 7.633729442256343e-05, "loss": 0.4243, "step": 31800 }, { "epoch": 623.73, "learning_rate": 7.629677445104691e-05, "loss": 0.427, "step": 31810 }, { "epoch": 623.92, "learning_rate": 7.62562541009012e-05, "loss": 0.4288, "step": 31820 }, { "epoch": 624.0, "eval_loss": 0.43234148621559143, "eval_runtime": 2.3479, "eval_samples_per_second": 970.674, "eval_steps_per_second": 3.833, "step": 31824 }, { "epoch": 624.12, "learning_rate": 7.621573338395731e-05, "loss": 0.4321, "step": 31830 }, { "epoch": 624.31, "learning_rate": 7.617521231204636e-05, "loss": 0.4283, "step": 31840 }, { "epoch": 624.51, "learning_rate": 7.613469089699965e-05, "loss": 0.433, "step": 31850 }, { "epoch": 624.71, "learning_rate": 7.609416915064846e-05, "loss": 0.4244, "step": 31860 }, { "epoch": 624.9, "learning_rate": 7.605364708482432e-05, "loss": 0.4316, "step": 31870 }, { "epoch": 625.0, "eval_loss": 0.4324721395969391, "eval_runtime": 2.2438, "eval_samples_per_second": 1015.687, "eval_steps_per_second": 4.011, "step": 31875 }, { "epoch": 625.1, "learning_rate": 7.60131247113587e-05, "loss": 0.4274, "step": 31880 }, { "epoch": 625.29, "learning_rate": 7.597260204208328e-05, "loss": 0.4307, "step": 31890 }, { "epoch": 625.49, "learning_rate": 7.593207908882977e-05, "loss": 0.4314, "step": 31900 }, { "epoch": 625.69, "learning_rate": 7.589155586342992e-05, "loss": 0.4292, "step": 31910 }, { "epoch": 625.88, "learning_rate": 7.585103237771566e-05, "loss": 0.431, "step": 31920 }, { "epoch": 626.0, "eval_loss": 0.43281376361846924, "eval_runtime": 2.193, "eval_samples_per_second": 1039.194, "eval_steps_per_second": 4.104, "step": 31926 }, { "epoch": 626.08, "learning_rate": 7.581050864351893e-05, "loss": 0.4305, "step": 31930 }, { "epoch": 626.27, "learning_rate": 7.576998467267174e-05, "loss": 0.435, "step": 31940 }, { "epoch": 626.47, "learning_rate": 7.57294604770062e-05, "loss": 0.4336, "step": 31950 }, { "epoch": 626.67, "learning_rate": 7.568893606835449e-05, "loss": 0.4347, "step": 31960 }, { "epoch": 626.86, "learning_rate": 7.56484114585488e-05, "loss": 0.4316, "step": 31970 }, { "epoch": 627.0, "eval_loss": 0.4315592646598816, "eval_runtime": 2.2604, "eval_samples_per_second": 1008.212, "eval_steps_per_second": 3.982, "step": 31977 }, { "epoch": 627.06, "learning_rate": 7.56078866594214e-05, "loss": 0.4269, "step": 31980 }, { "epoch": 627.25, "learning_rate": 7.556736168280467e-05, "loss": 0.4258, "step": 31990 }, { "epoch": 627.45, "learning_rate": 7.552683654053099e-05, "loss": 0.431, "step": 32000 }, { "epoch": 627.65, "learning_rate": 7.548631124443279e-05, "loss": 0.4276, "step": 32010 }, { "epoch": 627.84, "learning_rate": 7.544578580634253e-05, "loss": 0.4325, "step": 32020 }, { "epoch": 628.0, "eval_loss": 0.43107348680496216, "eval_runtime": 2.2599, "eval_samples_per_second": 1008.442, "eval_steps_per_second": 3.982, "step": 32028 }, { "epoch": 628.04, "learning_rate": 7.54052602380928e-05, "loss": 0.4279, "step": 32030 }, { "epoch": 628.24, "learning_rate": 7.536473455151605e-05, "loss": 0.4284, "step": 32040 }, { "epoch": 628.43, "learning_rate": 7.532420875844502e-05, "loss": 0.4271, "step": 32050 }, { "epoch": 628.63, "learning_rate": 7.528368287071222e-05, "loss": 0.4283, "step": 32060 }, { "epoch": 628.82, "learning_rate": 7.524315690015034e-05, "loss": 0.4287, "step": 32070 }, { "epoch": 629.0, "eval_loss": 0.4322940707206726, "eval_runtime": 2.2832, "eval_samples_per_second": 998.171, "eval_steps_per_second": 3.942, "step": 32079 }, { "epoch": 629.02, "learning_rate": 7.52026308585921e-05, "loss": 0.4283, "step": 32080 }, { "epoch": 629.22, "learning_rate": 7.516210475787015e-05, "loss": 0.4231, "step": 32090 }, { "epoch": 629.41, "learning_rate": 7.512157860981725e-05, "loss": 0.4285, "step": 32100 }, { "epoch": 629.61, "learning_rate": 7.508105242626608e-05, "loss": 0.4219, "step": 32110 }, { "epoch": 629.8, "learning_rate": 7.504052621904941e-05, "loss": 0.4272, "step": 32120 }, { "epoch": 630.0, "learning_rate": 7.5e-05, "loss": 0.4267, "step": 32130 }, { "epoch": 630.0, "eval_loss": 0.4301910400390625, "eval_runtime": 2.3346, "eval_samples_per_second": 976.183, "eval_steps_per_second": 3.855, "step": 32130 }, { "epoch": 630.2, "learning_rate": 7.495947378095059e-05, "loss": 0.4302, "step": 32140 }, { "epoch": 630.39, "learning_rate": 7.49189475737339e-05, "loss": 0.4287, "step": 32150 }, { "epoch": 630.59, "learning_rate": 7.487842139018277e-05, "loss": 0.4281, "step": 32160 }, { "epoch": 630.78, "learning_rate": 7.483789524212983e-05, "loss": 0.4288, "step": 32170 }, { "epoch": 630.98, "learning_rate": 7.47973691414079e-05, "loss": 0.426, "step": 32180 }, { "epoch": 631.0, "eval_loss": 0.43417489528656006, "eval_runtime": 2.248, "eval_samples_per_second": 1013.791, "eval_steps_per_second": 4.004, "step": 32181 }, { "epoch": 631.18, "learning_rate": 7.475684309984963e-05, "loss": 0.428, "step": 32190 }, { "epoch": 631.37, "learning_rate": 7.471631712928778e-05, "loss": 0.4266, "step": 32200 }, { "epoch": 631.57, "learning_rate": 7.467579124155501e-05, "loss": 0.4296, "step": 32210 }, { "epoch": 631.76, "learning_rate": 7.463526544848393e-05, "loss": 0.4288, "step": 32220 }, { "epoch": 631.96, "learning_rate": 7.459473976190722e-05, "loss": 0.4259, "step": 32230 }, { "epoch": 632.0, "eval_loss": 0.4324003756046295, "eval_runtime": 2.2505, "eval_samples_per_second": 1012.666, "eval_steps_per_second": 3.999, "step": 32232 }, { "epoch": 632.16, "learning_rate": 7.455421419365746e-05, "loss": 0.4285, "step": 32240 }, { "epoch": 632.35, "learning_rate": 7.451368875556721e-05, "loss": 0.4285, "step": 32250 }, { "epoch": 632.55, "learning_rate": 7.4473163459469e-05, "loss": 0.4277, "step": 32260 }, { "epoch": 632.75, "learning_rate": 7.443263831719533e-05, "loss": 0.4292, "step": 32270 }, { "epoch": 632.94, "learning_rate": 7.439211334057861e-05, "loss": 0.427, "step": 32280 }, { "epoch": 633.0, "eval_loss": 0.4315228760242462, "eval_runtime": 2.2264, "eval_samples_per_second": 1023.622, "eval_steps_per_second": 4.042, "step": 32283 }, { "epoch": 633.14, "learning_rate": 7.435158854145122e-05, "loss": 0.4282, "step": 32290 }, { "epoch": 633.33, "learning_rate": 7.431106393164551e-05, "loss": 0.4249, "step": 32300 }, { "epoch": 633.53, "learning_rate": 7.427053952299378e-05, "loss": 0.4264, "step": 32310 }, { "epoch": 633.73, "learning_rate": 7.423001532732826e-05, "loss": 0.425, "step": 32320 }, { "epoch": 633.92, "learning_rate": 7.418949135648106e-05, "loss": 0.4268, "step": 32330 }, { "epoch": 634.0, "eval_loss": 0.4299897253513336, "eval_runtime": 2.1933, "eval_samples_per_second": 1039.059, "eval_steps_per_second": 4.103, "step": 32334 }, { "epoch": 634.12, "learning_rate": 7.414896762228434e-05, "loss": 0.4227, "step": 32340 }, { "epoch": 634.31, "learning_rate": 7.410844413657008e-05, "loss": 0.4277, "step": 32350 }, { "epoch": 634.51, "learning_rate": 7.406792091117022e-05, "loss": 0.4286, "step": 32360 }, { "epoch": 634.71, "learning_rate": 7.402739795791672e-05, "loss": 0.4228, "step": 32370 }, { "epoch": 634.9, "learning_rate": 7.398687528864128e-05, "loss": 0.4251, "step": 32380 }, { "epoch": 635.0, "eval_loss": 0.4384912848472595, "eval_runtime": 2.2966, "eval_samples_per_second": 992.338, "eval_steps_per_second": 3.919, "step": 32385 }, { "epoch": 635.1, "learning_rate": 7.394635291517568e-05, "loss": 0.4337, "step": 32390 }, { "epoch": 635.29, "learning_rate": 7.390583084935152e-05, "loss": 0.4361, "step": 32400 }, { "epoch": 635.49, "learning_rate": 7.386530910300036e-05, "loss": 0.4254, "step": 32410 }, { "epoch": 635.69, "learning_rate": 7.382478768795366e-05, "loss": 0.4283, "step": 32420 }, { "epoch": 635.88, "learning_rate": 7.37842666160427e-05, "loss": 0.4291, "step": 32430 }, { "epoch": 636.0, "eval_loss": 0.43578821420669556, "eval_runtime": 2.3566, "eval_samples_per_second": 967.062, "eval_steps_per_second": 3.819, "step": 32436 }, { "epoch": 636.08, "learning_rate": 7.37437458990988e-05, "loss": 0.4256, "step": 32440 }, { "epoch": 636.27, "learning_rate": 7.370322554895306e-05, "loss": 0.4282, "step": 32450 }, { "epoch": 636.47, "learning_rate": 7.366270557743655e-05, "loss": 0.4266, "step": 32460 }, { "epoch": 636.67, "learning_rate": 7.362218599638018e-05, "loss": 0.4216, "step": 32470 }, { "epoch": 636.86, "learning_rate": 7.35816668176148e-05, "loss": 0.4273, "step": 32480 }, { "epoch": 637.0, "eval_loss": 0.43420571088790894, "eval_runtime": 2.2015, "eval_samples_per_second": 1035.206, "eval_steps_per_second": 4.088, "step": 32487 }, { "epoch": 637.06, "learning_rate": 7.354114805297107e-05, "loss": 0.4285, "step": 32490 }, { "epoch": 637.25, "learning_rate": 7.350062971427954e-05, "loss": 0.4278, "step": 32500 }, { "epoch": 637.45, "learning_rate": 7.346011181337071e-05, "loss": 0.424, "step": 32510 }, { "epoch": 637.65, "learning_rate": 7.341959436207488e-05, "loss": 0.4276, "step": 32520 }, { "epoch": 637.84, "learning_rate": 7.337907737222228e-05, "loss": 0.4238, "step": 32530 }, { "epoch": 638.0, "eval_loss": 0.4311440587043762, "eval_runtime": 2.2684, "eval_samples_per_second": 1004.661, "eval_steps_per_second": 3.968, "step": 32538 }, { "epoch": 638.04, "learning_rate": 7.333856085564293e-05, "loss": 0.4303, "step": 32540 }, { "epoch": 638.24, "learning_rate": 7.329804482416673e-05, "loss": 0.4302, "step": 32550 }, { "epoch": 638.43, "learning_rate": 7.325752928962352e-05, "loss": 0.4234, "step": 32560 }, { "epoch": 638.63, "learning_rate": 7.321701426384285e-05, "loss": 0.4266, "step": 32570 }, { "epoch": 638.82, "learning_rate": 7.31764997586543e-05, "loss": 0.4262, "step": 32580 }, { "epoch": 639.0, "eval_loss": 0.432700514793396, "eval_runtime": 2.3427, "eval_samples_per_second": 972.821, "eval_steps_per_second": 3.842, "step": 32589 }, { "epoch": 639.02, "learning_rate": 7.313598578588712e-05, "loss": 0.4235, "step": 32590 }, { "epoch": 639.22, "learning_rate": 7.309547235737053e-05, "loss": 0.4276, "step": 32600 }, { "epoch": 639.41, "learning_rate": 7.305495948493354e-05, "loss": 0.4363, "step": 32610 }, { "epoch": 639.61, "learning_rate": 7.301444718040499e-05, "loss": 0.4284, "step": 32620 }, { "epoch": 639.8, "learning_rate": 7.29739354556136e-05, "loss": 0.4243, "step": 32630 }, { "epoch": 640.0, "learning_rate": 7.293342432238786e-05, "loss": 0.4251, "step": 32640 }, { "epoch": 640.0, "eval_loss": 0.43292704224586487, "eval_runtime": 2.3486, "eval_samples_per_second": 970.382, "eval_steps_per_second": 3.832, "step": 32640 }, { "epoch": 640.2, "learning_rate": 7.289291379255611e-05, "loss": 0.4293, "step": 32650 }, { "epoch": 640.39, "learning_rate": 7.285240387794655e-05, "loss": 0.433, "step": 32660 }, { "epoch": 640.59, "learning_rate": 7.281189459038718e-05, "loss": 0.4284, "step": 32670 }, { "epoch": 640.78, "learning_rate": 7.27713859417058e-05, "loss": 0.4247, "step": 32680 }, { "epoch": 640.98, "learning_rate": 7.273087794373e-05, "loss": 0.4276, "step": 32690 }, { "epoch": 641.0, "eval_loss": 0.43443429470062256, "eval_runtime": 2.2905, "eval_samples_per_second": 994.976, "eval_steps_per_second": 3.929, "step": 32691 }, { "epoch": 641.18, "learning_rate": 7.269037060828723e-05, "loss": 0.4231, "step": 32700 }, { "epoch": 641.37, "learning_rate": 7.264986394720473e-05, "loss": 0.4244, "step": 32710 }, { "epoch": 641.57, "learning_rate": 7.260935797230956e-05, "loss": 0.4202, "step": 32720 }, { "epoch": 641.76, "learning_rate": 7.256885269542851e-05, "loss": 0.4239, "step": 32730 }, { "epoch": 641.96, "learning_rate": 7.252834812838831e-05, "loss": 0.4274, "step": 32740 }, { "epoch": 642.0, "eval_loss": 0.43040502071380615, "eval_runtime": 2.223, "eval_samples_per_second": 1025.208, "eval_steps_per_second": 4.049, "step": 32742 }, { "epoch": 642.16, "learning_rate": 7.248784428301531e-05, "loss": 0.4222, "step": 32750 }, { "epoch": 642.35, "learning_rate": 7.244734117113573e-05, "loss": 0.4268, "step": 32760 }, { "epoch": 642.55, "learning_rate": 7.240683880457563e-05, "loss": 0.4229, "step": 32770 }, { "epoch": 642.75, "learning_rate": 7.236633719516073e-05, "loss": 0.425, "step": 32780 }, { "epoch": 642.94, "learning_rate": 7.232583635471668e-05, "loss": 0.4269, "step": 32790 }, { "epoch": 643.0, "eval_loss": 0.42628300189971924, "eval_runtime": 2.2073, "eval_samples_per_second": 1032.467, "eval_steps_per_second": 4.077, "step": 32793 }, { "epoch": 643.14, "learning_rate": 7.228533629506874e-05, "loss": 0.4247, "step": 32800 }, { "epoch": 643.33, "learning_rate": 7.224483702804207e-05, "loss": 0.4222, "step": 32810 }, { "epoch": 643.53, "learning_rate": 7.220433856546153e-05, "loss": 0.4268, "step": 32820 }, { "epoch": 643.73, "learning_rate": 7.216384091915178e-05, "loss": 0.4273, "step": 32830 }, { "epoch": 643.92, "learning_rate": 7.212334410093727e-05, "loss": 0.4217, "step": 32840 }, { "epoch": 644.0, "eval_loss": 0.43053871393203735, "eval_runtime": 2.4095, "eval_samples_per_second": 945.824, "eval_steps_per_second": 3.735, "step": 32844 }, { "epoch": 644.12, "learning_rate": 7.208284812264208e-05, "loss": 0.43, "step": 32850 }, { "epoch": 644.31, "learning_rate": 7.20423529960902e-05, "loss": 0.424, "step": 32860 }, { "epoch": 644.51, "learning_rate": 7.200185873310526e-05, "loss": 0.4227, "step": 32870 }, { "epoch": 644.71, "learning_rate": 7.196136534551073e-05, "loss": 0.4279, "step": 32880 }, { "epoch": 644.9, "learning_rate": 7.192087284512977e-05, "loss": 0.4204, "step": 32890 }, { "epoch": 645.0, "eval_loss": 0.431392639875412, "eval_runtime": 2.3078, "eval_samples_per_second": 987.517, "eval_steps_per_second": 3.9, "step": 32895 }, { "epoch": 645.1, "learning_rate": 7.188038124378522e-05, "loss": 0.4231, "step": 32900 }, { "epoch": 645.29, "learning_rate": 7.18398905532998e-05, "loss": 0.4287, "step": 32910 }, { "epoch": 645.49, "learning_rate": 7.179940078549585e-05, "loss": 0.4257, "step": 32920 }, { "epoch": 645.69, "learning_rate": 7.17589119521955e-05, "loss": 0.4217, "step": 32930 }, { "epoch": 645.88, "learning_rate": 7.171842406522055e-05, "loss": 0.4268, "step": 32940 }, { "epoch": 646.0, "eval_loss": 0.4283953607082367, "eval_runtime": 2.1783, "eval_samples_per_second": 1046.25, "eval_steps_per_second": 4.132, "step": 32946 }, { "epoch": 646.08, "learning_rate": 7.167793713639264e-05, "loss": 0.4245, "step": 32950 }, { "epoch": 646.27, "learning_rate": 7.163745117753296e-05, "loss": 0.4217, "step": 32960 }, { "epoch": 646.47, "learning_rate": 7.159696620046254e-05, "loss": 0.4245, "step": 32970 }, { "epoch": 646.67, "learning_rate": 7.15564822170021e-05, "loss": 0.4269, "step": 32980 }, { "epoch": 646.86, "learning_rate": 7.151599923897207e-05, "loss": 0.4227, "step": 32990 }, { "epoch": 647.0, "eval_loss": 0.42809709906578064, "eval_runtime": 2.3106, "eval_samples_per_second": 986.327, "eval_steps_per_second": 3.895, "step": 32997 }, { "epoch": 647.06, "learning_rate": 7.147551727819256e-05, "loss": 0.4247, "step": 33000 }, { "epoch": 647.25, "learning_rate": 7.143503634648338e-05, "loss": 0.4278, "step": 33010 }, { "epoch": 647.45, "learning_rate": 7.13945564556641e-05, "loss": 0.424, "step": 33020 }, { "epoch": 647.65, "learning_rate": 7.135407761755393e-05, "loss": 0.4287, "step": 33030 }, { "epoch": 647.84, "learning_rate": 7.131359984397175e-05, "loss": 0.4236, "step": 33040 }, { "epoch": 648.0, "eval_loss": 0.4320383071899414, "eval_runtime": 2.2798, "eval_samples_per_second": 999.632, "eval_steps_per_second": 3.948, "step": 33048 }, { "epoch": 648.04, "learning_rate": 7.127312314673624e-05, "loss": 0.4257, "step": 33050 }, { "epoch": 648.24, "learning_rate": 7.123264753766563e-05, "loss": 0.4239, "step": 33060 }, { "epoch": 648.43, "learning_rate": 7.119217302857792e-05, "loss": 0.4216, "step": 33070 }, { "epoch": 648.63, "learning_rate": 7.115169963129076e-05, "loss": 0.4278, "step": 33080 }, { "epoch": 648.82, "learning_rate": 7.11112273576215e-05, "loss": 0.4245, "step": 33090 }, { "epoch": 649.0, "eval_loss": 0.42947202920913696, "eval_runtime": 2.2983, "eval_samples_per_second": 991.598, "eval_steps_per_second": 3.916, "step": 33099 }, { "epoch": 649.02, "learning_rate": 7.107075621938714e-05, "loss": 0.4214, "step": 33100 }, { "epoch": 649.22, "learning_rate": 7.103028622840429e-05, "loss": 0.4207, "step": 33110 }, { "epoch": 649.41, "learning_rate": 7.098981739648934e-05, "loss": 0.4245, "step": 33120 }, { "epoch": 649.61, "learning_rate": 7.094934973545827e-05, "loss": 0.4224, "step": 33130 }, { "epoch": 649.8, "learning_rate": 7.090888325712676e-05, "loss": 0.4268, "step": 33140 }, { "epoch": 650.0, "learning_rate": 7.086841797331007e-05, "loss": 0.4229, "step": 33150 }, { "epoch": 650.0, "eval_loss": 0.42622441053390503, "eval_runtime": 2.3563, "eval_samples_per_second": 967.185, "eval_steps_per_second": 3.82, "step": 33150 }, { "epoch": 650.2, "learning_rate": 7.082795389582323e-05, "loss": 0.4253, "step": 33160 }, { "epoch": 650.39, "learning_rate": 7.078749103648079e-05, "loss": 0.4191, "step": 33170 }, { "epoch": 650.59, "learning_rate": 7.074702940709699e-05, "loss": 0.4206, "step": 33180 }, { "epoch": 650.78, "learning_rate": 7.070656901948578e-05, "loss": 0.4246, "step": 33190 }, { "epoch": 650.98, "learning_rate": 7.066610988546065e-05, "loss": 0.423, "step": 33200 }, { "epoch": 651.0, "eval_loss": 0.4238925576210022, "eval_runtime": 2.2734, "eval_samples_per_second": 1002.449, "eval_steps_per_second": 3.959, "step": 33201 }, { "epoch": 651.18, "learning_rate": 7.06256520168348e-05, "loss": 0.4184, "step": 33210 }, { "epoch": 651.37, "learning_rate": 7.0585195425421e-05, "loss": 0.421, "step": 33220 }, { "epoch": 651.57, "learning_rate": 7.054474012303166e-05, "loss": 0.4231, "step": 33230 }, { "epoch": 651.76, "learning_rate": 7.050428612147885e-05, "loss": 0.4226, "step": 33240 }, { "epoch": 651.96, "learning_rate": 7.046383343257421e-05, "loss": 0.4209, "step": 33250 }, { "epoch": 652.0, "eval_loss": 0.42940622568130493, "eval_runtime": 2.2106, "eval_samples_per_second": 1030.937, "eval_steps_per_second": 4.071, "step": 33252 }, { "epoch": 652.16, "learning_rate": 7.042338206812907e-05, "loss": 0.4214, "step": 33260 }, { "epoch": 652.35, "learning_rate": 7.038293203995428e-05, "loss": 0.4221, "step": 33270 }, { "epoch": 652.55, "learning_rate": 7.034248335986037e-05, "loss": 0.4252, "step": 33280 }, { "epoch": 652.75, "learning_rate": 7.030203603965747e-05, "loss": 0.4217, "step": 33290 }, { "epoch": 652.94, "learning_rate": 7.026159009115522e-05, "loss": 0.4209, "step": 33300 }, { "epoch": 653.0, "eval_loss": 0.43150344491004944, "eval_runtime": 2.3003, "eval_samples_per_second": 990.759, "eval_steps_per_second": 3.913, "step": 33303 }, { "epoch": 653.14, "learning_rate": 7.022114552616304e-05, "loss": 0.4225, "step": 33310 }, { "epoch": 653.33, "learning_rate": 7.018070235648975e-05, "loss": 0.421, "step": 33320 }, { "epoch": 653.53, "learning_rate": 7.01402605939439e-05, "loss": 0.4221, "step": 33330 }, { "epoch": 653.73, "learning_rate": 7.009982025033356e-05, "loss": 0.4231, "step": 33340 }, { "epoch": 653.92, "learning_rate": 7.005938133746645e-05, "loss": 0.425, "step": 33350 }, { "epoch": 654.0, "eval_loss": 0.42987799644470215, "eval_runtime": 2.1499, "eval_samples_per_second": 1060.042, "eval_steps_per_second": 4.186, "step": 33354 }, { "epoch": 654.12, "learning_rate": 7.001894386714981e-05, "loss": 0.4214, "step": 33360 }, { "epoch": 654.31, "learning_rate": 6.997850785119044e-05, "loss": 0.4247, "step": 33370 }, { "epoch": 654.51, "learning_rate": 6.993807330139481e-05, "loss": 0.4203, "step": 33380 }, { "epoch": 654.71, "learning_rate": 6.989764022956885e-05, "loss": 0.4245, "step": 33390 }, { "epoch": 654.9, "learning_rate": 6.985720864751819e-05, "loss": 0.418, "step": 33400 }, { "epoch": 655.0, "eval_loss": 0.42820972204208374, "eval_runtime": 2.2377, "eval_samples_per_second": 1018.475, "eval_steps_per_second": 4.022, "step": 33405 }, { "epoch": 655.1, "learning_rate": 6.981677856704788e-05, "loss": 0.4256, "step": 33410 }, { "epoch": 655.29, "learning_rate": 6.977634999996266e-05, "loss": 0.4199, "step": 33420 }, { "epoch": 655.49, "learning_rate": 6.973592295806673e-05, "loss": 0.4251, "step": 33430 }, { "epoch": 655.69, "learning_rate": 6.96954974531639e-05, "loss": 0.4221, "step": 33440 }, { "epoch": 655.88, "learning_rate": 6.96550734970575e-05, "loss": 0.423, "step": 33450 }, { "epoch": 656.0, "eval_loss": 0.4263513386249542, "eval_runtime": 2.317, "eval_samples_per_second": 983.589, "eval_steps_per_second": 3.884, "step": 33456 }, { "epoch": 656.08, "learning_rate": 6.961465110155042e-05, "loss": 0.4227, "step": 33460 }, { "epoch": 656.27, "learning_rate": 6.957423027844515e-05, "loss": 0.425, "step": 33470 }, { "epoch": 656.47, "learning_rate": 6.95338110395436e-05, "loss": 0.419, "step": 33480 }, { "epoch": 656.67, "learning_rate": 6.949339339664735e-05, "loss": 0.4221, "step": 33490 }, { "epoch": 656.86, "learning_rate": 6.945297736155742e-05, "loss": 0.4267, "step": 33500 }, { "epoch": 657.0, "eval_loss": 0.42961037158966064, "eval_runtime": 2.3194, "eval_samples_per_second": 982.598, "eval_steps_per_second": 3.88, "step": 33507 }, { "epoch": 657.06, "learning_rate": 6.941256294607435e-05, "loss": 0.4198, "step": 33510 }, { "epoch": 657.25, "learning_rate": 6.937215016199833e-05, "loss": 0.4256, "step": 33520 }, { "epoch": 657.45, "learning_rate": 6.933173902112892e-05, "loss": 0.4255, "step": 33530 }, { "epoch": 657.65, "learning_rate": 6.929132953526531e-05, "loss": 0.424, "step": 33540 }, { "epoch": 657.84, "learning_rate": 6.925092171620616e-05, "loss": 0.4226, "step": 33550 }, { "epoch": 658.0, "eval_loss": 0.4268750548362732, "eval_runtime": 2.2466, "eval_samples_per_second": 1014.438, "eval_steps_per_second": 4.006, "step": 33558 }, { "epoch": 658.04, "learning_rate": 6.921051557574965e-05, "loss": 0.4151, "step": 33560 }, { "epoch": 658.24, "learning_rate": 6.91701111256935e-05, "loss": 0.4231, "step": 33570 }, { "epoch": 658.43, "learning_rate": 6.912970837783485e-05, "loss": 0.4223, "step": 33580 }, { "epoch": 658.63, "learning_rate": 6.908930734397044e-05, "loss": 0.4202, "step": 33590 }, { "epoch": 658.82, "learning_rate": 6.904890803589645e-05, "loss": 0.4213, "step": 33600 }, { "epoch": 659.0, "eval_loss": 0.42958545684814453, "eval_runtime": 2.3495, "eval_samples_per_second": 969.976, "eval_steps_per_second": 3.831, "step": 33609 }, { "epoch": 659.02, "learning_rate": 6.900851046540862e-05, "loss": 0.4146, "step": 33610 }, { "epoch": 659.22, "learning_rate": 6.896811464430209e-05, "loss": 0.4223, "step": 33620 }, { "epoch": 659.41, "learning_rate": 6.892772058437158e-05, "loss": 0.4202, "step": 33630 }, { "epoch": 659.61, "learning_rate": 6.888732829741124e-05, "loss": 0.4214, "step": 33640 }, { "epoch": 659.8, "learning_rate": 6.884693779521468e-05, "loss": 0.4244, "step": 33650 }, { "epoch": 660.0, "learning_rate": 6.880654908957507e-05, "loss": 0.4192, "step": 33660 }, { "epoch": 660.0, "eval_loss": 0.4259309470653534, "eval_runtime": 2.3163, "eval_samples_per_second": 983.884, "eval_steps_per_second": 3.885, "step": 33660 }, { "epoch": 660.2, "learning_rate": 6.876616219228499e-05, "loss": 0.4216, "step": 33670 }, { "epoch": 660.39, "learning_rate": 6.872577711513655e-05, "loss": 0.4201, "step": 33680 }, { "epoch": 660.59, "learning_rate": 6.868539386992124e-05, "loss": 0.4154, "step": 33690 }, { "epoch": 660.78, "learning_rate": 6.86450124684301e-05, "loss": 0.4229, "step": 33700 }, { "epoch": 660.98, "learning_rate": 6.860463292245359e-05, "loss": 0.4234, "step": 33710 }, { "epoch": 661.0, "eval_loss": 0.42434313893318176, "eval_runtime": 2.3499, "eval_samples_per_second": 969.824, "eval_steps_per_second": 3.83, "step": 33711 }, { "epoch": 661.18, "learning_rate": 6.856425524378163e-05, "loss": 0.4272, "step": 33720 }, { "epoch": 661.37, "learning_rate": 6.852387944420363e-05, "loss": 0.4188, "step": 33730 }, { "epoch": 661.57, "learning_rate": 6.848350553550837e-05, "loss": 0.4209, "step": 33740 }, { "epoch": 661.76, "learning_rate": 6.844313352948416e-05, "loss": 0.4176, "step": 33750 }, { "epoch": 661.96, "learning_rate": 6.840276343791873e-05, "loss": 0.4205, "step": 33760 }, { "epoch": 662.0, "eval_loss": 0.42560145258903503, "eval_runtime": 2.2243, "eval_samples_per_second": 1024.574, "eval_steps_per_second": 4.046, "step": 33762 }, { "epoch": 662.16, "learning_rate": 6.836239527259926e-05, "loss": 0.4257, "step": 33770 }, { "epoch": 662.35, "learning_rate": 6.832202904531235e-05, "loss": 0.4229, "step": 33780 }, { "epoch": 662.55, "learning_rate": 6.8281664767844e-05, "loss": 0.4202, "step": 33790 }, { "epoch": 662.75, "learning_rate": 6.824130245197971e-05, "loss": 0.4254, "step": 33800 }, { "epoch": 662.94, "learning_rate": 6.820094210950436e-05, "loss": 0.4185, "step": 33810 }, { "epoch": 663.0, "eval_loss": 0.4250730872154236, "eval_runtime": 2.2197, "eval_samples_per_second": 1026.736, "eval_steps_per_second": 4.055, "step": 33813 }, { "epoch": 663.14, "learning_rate": 6.81605837522023e-05, "loss": 0.424, "step": 33820 }, { "epoch": 663.33, "learning_rate": 6.812022739185722e-05, "loss": 0.4151, "step": 33830 }, { "epoch": 663.53, "learning_rate": 6.807987304025235e-05, "loss": 0.4183, "step": 33840 }, { "epoch": 663.73, "learning_rate": 6.80395207091702e-05, "loss": 0.4142, "step": 33850 }, { "epoch": 663.92, "learning_rate": 6.799917041039276e-05, "loss": 0.4212, "step": 33860 }, { "epoch": 664.0, "eval_loss": 0.42312219738960266, "eval_runtime": 2.2415, "eval_samples_per_second": 1016.727, "eval_steps_per_second": 4.015, "step": 33864 }, { "epoch": 664.12, "learning_rate": 6.795882215570143e-05, "loss": 0.4202, "step": 33870 }, { "epoch": 664.31, "learning_rate": 6.7918475956877e-05, "loss": 0.4145, "step": 33880 }, { "epoch": 664.51, "learning_rate": 6.787813182569968e-05, "loss": 0.4202, "step": 33890 }, { "epoch": 664.71, "learning_rate": 6.783778977394899e-05, "loss": 0.4203, "step": 33900 }, { "epoch": 664.9, "learning_rate": 6.779744981340399e-05, "loss": 0.4228, "step": 33910 }, { "epoch": 665.0, "eval_loss": 0.42498070001602173, "eval_runtime": 2.2609, "eval_samples_per_second": 1008.014, "eval_steps_per_second": 3.981, "step": 33915 }, { "epoch": 665.1, "learning_rate": 6.775711195584299e-05, "loss": 0.421, "step": 33920 }, { "epoch": 665.29, "learning_rate": 6.771677621304376e-05, "loss": 0.4228, "step": 33930 }, { "epoch": 665.49, "learning_rate": 6.767644259678348e-05, "loss": 0.4227, "step": 33940 }, { "epoch": 665.69, "learning_rate": 6.76361111188386e-05, "loss": 0.4181, "step": 33950 }, { "epoch": 665.88, "learning_rate": 6.759578179098505e-05, "loss": 0.421, "step": 33960 }, { "epoch": 666.0, "eval_loss": 0.42840486764907837, "eval_runtime": 2.2641, "eval_samples_per_second": 1006.588, "eval_steps_per_second": 3.975, "step": 33966 }, { "epoch": 666.08, "learning_rate": 6.755545462499812e-05, "loss": 0.4178, "step": 33970 }, { "epoch": 666.27, "learning_rate": 6.751512963265234e-05, "loss": 0.419, "step": 33980 }, { "epoch": 666.47, "learning_rate": 6.747480682572185e-05, "loss": 0.4198, "step": 33990 }, { "epoch": 666.67, "learning_rate": 6.743448621597988e-05, "loss": 0.4206, "step": 34000 }, { "epoch": 666.86, "learning_rate": 6.739416781519924e-05, "loss": 0.4226, "step": 34010 }, { "epoch": 667.0, "eval_loss": 0.4243127107620239, "eval_runtime": 2.2983, "eval_samples_per_second": 991.589, "eval_steps_per_second": 3.916, "step": 34017 }, { "epoch": 667.06, "learning_rate": 6.735385163515194e-05, "loss": 0.4209, "step": 34020 }, { "epoch": 667.25, "learning_rate": 6.731353768760947e-05, "loss": 0.4203, "step": 34030 }, { "epoch": 667.45, "learning_rate": 6.727322598434259e-05, "loss": 0.4172, "step": 34040 }, { "epoch": 667.65, "learning_rate": 6.723291653712135e-05, "loss": 0.4184, "step": 34050 }, { "epoch": 667.84, "learning_rate": 6.719260935771529e-05, "loss": 0.4201, "step": 34060 }, { "epoch": 668.0, "eval_loss": 0.4278631806373596, "eval_runtime": 2.1713, "eval_samples_per_second": 1049.583, "eval_steps_per_second": 4.145, "step": 34068 }, { "epoch": 668.04, "learning_rate": 6.715230445789315e-05, "loss": 0.424, "step": 34070 }, { "epoch": 668.24, "learning_rate": 6.711200184942311e-05, "loss": 0.4177, "step": 34080 }, { "epoch": 668.43, "learning_rate": 6.70717015440726e-05, "loss": 0.4189, "step": 34090 }, { "epoch": 668.63, "learning_rate": 6.703140355360843e-05, "loss": 0.4213, "step": 34100 }, { "epoch": 668.82, "learning_rate": 6.69911078897967e-05, "loss": 0.4213, "step": 34110 }, { "epoch": 669.0, "eval_loss": 0.4210264980792999, "eval_runtime": 2.3532, "eval_samples_per_second": 968.452, "eval_steps_per_second": 3.825, "step": 34119 }, { "epoch": 669.02, "learning_rate": 6.695081456440284e-05, "loss": 0.4204, "step": 34120 }, { "epoch": 669.22, "learning_rate": 6.691052358919162e-05, "loss": 0.4156, "step": 34130 }, { "epoch": 669.41, "learning_rate": 6.687023497592709e-05, "loss": 0.4221, "step": 34140 }, { "epoch": 669.61, "learning_rate": 6.682994873637267e-05, "loss": 0.42, "step": 34150 }, { "epoch": 669.8, "learning_rate": 6.678966488229099e-05, "loss": 0.4154, "step": 34160 }, { "epoch": 670.0, "learning_rate": 6.674938342544404e-05, "loss": 0.4237, "step": 34170 }, { "epoch": 670.0, "eval_loss": 0.4264044165611267, "eval_runtime": 2.2635, "eval_samples_per_second": 1006.853, "eval_steps_per_second": 3.976, "step": 34170 }, { "epoch": 670.2, "learning_rate": 6.670910437759317e-05, "loss": 0.4231, "step": 34180 }, { "epoch": 670.39, "learning_rate": 6.666882775049885e-05, "loss": 0.4167, "step": 34190 }, { "epoch": 670.59, "learning_rate": 6.662855355592109e-05, "loss": 0.4183, "step": 34200 }, { "epoch": 670.78, "learning_rate": 6.658828180561893e-05, "loss": 0.4166, "step": 34210 }, { "epoch": 670.98, "learning_rate": 6.654801251135092e-05, "loss": 0.4228, "step": 34220 }, { "epoch": 671.0, "eval_loss": 0.4236544668674469, "eval_runtime": 2.3232, "eval_samples_per_second": 980.983, "eval_steps_per_second": 3.874, "step": 34221 }, { "epoch": 671.18, "learning_rate": 6.650774568487473e-05, "loss": 0.4168, "step": 34230 }, { "epoch": 671.37, "learning_rate": 6.646748133794743e-05, "loss": 0.4202, "step": 34240 }, { "epoch": 671.57, "learning_rate": 6.64272194823253e-05, "loss": 0.4232, "step": 34250 }, { "epoch": 671.76, "learning_rate": 6.638696012976386e-05, "loss": 0.4189, "step": 34260 }, { "epoch": 671.96, "learning_rate": 6.634670329201798e-05, "loss": 0.4181, "step": 34270 }, { "epoch": 672.0, "eval_loss": 0.4245344400405884, "eval_runtime": 2.3276, "eval_samples_per_second": 979.099, "eval_steps_per_second": 3.867, "step": 34272 }, { "epoch": 672.16, "learning_rate": 6.630644898084175e-05, "loss": 0.4182, "step": 34280 }, { "epoch": 672.35, "learning_rate": 6.626619720798854e-05, "loss": 0.4228, "step": 34290 }, { "epoch": 672.55, "learning_rate": 6.622594798521094e-05, "loss": 0.4226, "step": 34300 }, { "epoch": 672.75, "learning_rate": 6.618570132426088e-05, "loss": 0.4187, "step": 34310 }, { "epoch": 672.94, "learning_rate": 6.614545723688945e-05, "loss": 0.4242, "step": 34320 }, { "epoch": 673.0, "eval_loss": 0.42444732785224915, "eval_runtime": 2.1962, "eval_samples_per_second": 1037.713, "eval_steps_per_second": 4.098, "step": 34323 }, { "epoch": 673.14, "learning_rate": 6.610521573484701e-05, "loss": 0.4215, "step": 34330 }, { "epoch": 673.33, "learning_rate": 6.606497682988323e-05, "loss": 0.4206, "step": 34340 }, { "epoch": 673.53, "learning_rate": 6.60247405337469e-05, "loss": 0.4224, "step": 34350 }, { "epoch": 673.73, "learning_rate": 6.598450685818622e-05, "loss": 0.4226, "step": 34360 }, { "epoch": 673.92, "learning_rate": 6.594427581494844e-05, "loss": 0.4178, "step": 34370 }, { "epoch": 674.0, "eval_loss": 0.424979567527771, "eval_runtime": 2.2868, "eval_samples_per_second": 996.581, "eval_steps_per_second": 3.936, "step": 34374 }, { "epoch": 674.12, "learning_rate": 6.590404741578018e-05, "loss": 0.4136, "step": 34380 }, { "epoch": 674.31, "learning_rate": 6.586382167242722e-05, "loss": 0.414, "step": 34390 }, { "epoch": 674.51, "learning_rate": 6.582359859663454e-05, "loss": 0.4187, "step": 34400 }, { "epoch": 674.71, "learning_rate": 6.578337820014644e-05, "loss": 0.4224, "step": 34410 }, { "epoch": 674.9, "learning_rate": 6.574316049470635e-05, "loss": 0.4184, "step": 34420 }, { "epoch": 675.0, "eval_loss": 0.427442342042923, "eval_runtime": 2.3158, "eval_samples_per_second": 984.096, "eval_steps_per_second": 3.886, "step": 34425 }, { "epoch": 675.1, "learning_rate": 6.570294549205695e-05, "loss": 0.4225, "step": 34430 }, { "epoch": 675.29, "learning_rate": 6.56627332039401e-05, "loss": 0.4172, "step": 34440 }, { "epoch": 675.49, "learning_rate": 6.562252364209694e-05, "loss": 0.4148, "step": 34450 }, { "epoch": 675.69, "learning_rate": 6.558231681826776e-05, "loss": 0.418, "step": 34460 }, { "epoch": 675.88, "learning_rate": 6.5542112744192e-05, "loss": 0.4163, "step": 34470 }, { "epoch": 676.0, "eval_loss": 0.4221233129501343, "eval_runtime": 2.3487, "eval_samples_per_second": 970.314, "eval_steps_per_second": 3.832, "step": 34476 }, { "epoch": 676.08, "learning_rate": 6.550191143160839e-05, "loss": 0.4191, "step": 34480 }, { "epoch": 676.27, "learning_rate": 6.54617128922548e-05, "loss": 0.4216, "step": 34490 }, { "epoch": 676.47, "learning_rate": 6.542151713786834e-05, "loss": 0.4202, "step": 34500 }, { "epoch": 676.67, "learning_rate": 6.538132418018525e-05, "loss": 0.4188, "step": 34510 }, { "epoch": 676.86, "learning_rate": 6.5341134030941e-05, "loss": 0.4288, "step": 34520 }, { "epoch": 677.0, "eval_loss": 0.42452743649482727, "eval_runtime": 2.2039, "eval_samples_per_second": 1034.062, "eval_steps_per_second": 4.084, "step": 34527 }, { "epoch": 677.06, "learning_rate": 6.530094670187019e-05, "loss": 0.4153, "step": 34530 }, { "epoch": 677.25, "learning_rate": 6.526076220470661e-05, "loss": 0.423, "step": 34540 }, { "epoch": 677.45, "learning_rate": 6.52205805511833e-05, "loss": 0.4241, "step": 34550 }, { "epoch": 677.65, "learning_rate": 6.518040175303233e-05, "loss": 0.4211, "step": 34560 }, { "epoch": 677.84, "learning_rate": 6.514022582198508e-05, "loss": 0.4205, "step": 34570 }, { "epoch": 678.0, "eval_loss": 0.42583590745925903, "eval_runtime": 2.3161, "eval_samples_per_second": 983.968, "eval_steps_per_second": 3.886, "step": 34578 }, { "epoch": 678.04, "learning_rate": 6.510005276977197e-05, "loss": 0.425, "step": 34580 }, { "epoch": 678.24, "learning_rate": 6.505988260812268e-05, "loss": 0.4236, "step": 34590 }, { "epoch": 678.43, "learning_rate": 6.501971534876599e-05, "loss": 0.4227, "step": 34600 }, { "epoch": 678.63, "learning_rate": 6.497955100342979e-05, "loss": 0.4136, "step": 34610 }, { "epoch": 678.82, "learning_rate": 6.493938958384127e-05, "loss": 0.4167, "step": 34620 }, { "epoch": 679.0, "eval_loss": 0.4242996275424957, "eval_runtime": 2.1858, "eval_samples_per_second": 1042.625, "eval_steps_per_second": 4.117, "step": 34629 }, { "epoch": 679.02, "learning_rate": 6.489923110172659e-05, "loss": 0.4198, "step": 34630 }, { "epoch": 679.22, "learning_rate": 6.485907556881117e-05, "loss": 0.4204, "step": 34640 }, { "epoch": 679.41, "learning_rate": 6.481892299681953e-05, "loss": 0.4209, "step": 34650 }, { "epoch": 679.61, "learning_rate": 6.477877339747528e-05, "loss": 0.4188, "step": 34660 }, { "epoch": 679.8, "learning_rate": 6.473862678250128e-05, "loss": 0.4228, "step": 34670 }, { "epoch": 680.0, "learning_rate": 6.469848316361938e-05, "loss": 0.4172, "step": 34680 }, { "epoch": 680.0, "eval_loss": 0.4240746796131134, "eval_runtime": 2.2674, "eval_samples_per_second": 1005.137, "eval_steps_per_second": 3.969, "step": 34680 }, { "epoch": 680.2, "learning_rate": 6.465834255255067e-05, "loss": 0.4173, "step": 34690 }, { "epoch": 680.39, "learning_rate": 6.461820496101528e-05, "loss": 0.4136, "step": 34700 }, { "epoch": 680.59, "learning_rate": 6.45780704007325e-05, "loss": 0.4175, "step": 34710 }, { "epoch": 680.78, "learning_rate": 6.453793888342077e-05, "loss": 0.4162, "step": 34720 }, { "epoch": 680.98, "learning_rate": 6.449781042079752e-05, "loss": 0.4212, "step": 34730 }, { "epoch": 681.0, "eval_loss": 0.4216473698616028, "eval_runtime": 2.2937, "eval_samples_per_second": 993.581, "eval_steps_per_second": 3.924, "step": 34731 }, { "epoch": 681.18, "learning_rate": 6.445768502457942e-05, "loss": 0.4212, "step": 34740 }, { "epoch": 681.37, "learning_rate": 6.441756270648216e-05, "loss": 0.4171, "step": 34750 }, { "epoch": 681.57, "learning_rate": 6.43774434782206e-05, "loss": 0.4208, "step": 34760 }, { "epoch": 681.76, "learning_rate": 6.433732735150862e-05, "loss": 0.4218, "step": 34770 }, { "epoch": 681.96, "learning_rate": 6.429721433805928e-05, "loss": 0.4164, "step": 34780 }, { "epoch": 682.0, "eval_loss": 0.42144080996513367, "eval_runtime": 2.2344, "eval_samples_per_second": 1019.974, "eval_steps_per_second": 4.028, "step": 34782 }, { "epoch": 682.16, "learning_rate": 6.425710444958465e-05, "loss": 0.4188, "step": 34790 }, { "epoch": 682.35, "learning_rate": 6.42169976977959e-05, "loss": 0.4141, "step": 34800 }, { "epoch": 682.55, "learning_rate": 6.417689409440339e-05, "loss": 0.4203, "step": 34810 }, { "epoch": 682.75, "learning_rate": 6.413679365111635e-05, "loss": 0.4194, "step": 34820 }, { "epoch": 682.94, "learning_rate": 6.409669637964337e-05, "loss": 0.4171, "step": 34830 }, { "epoch": 683.0, "eval_loss": 0.42304977774620056, "eval_runtime": 2.2703, "eval_samples_per_second": 1003.818, "eval_steps_per_second": 3.964, "step": 34833 }, { "epoch": 683.14, "learning_rate": 6.405660229169183e-05, "loss": 0.4168, "step": 34840 }, { "epoch": 683.33, "learning_rate": 6.401651139896838e-05, "loss": 0.4201, "step": 34850 }, { "epoch": 683.53, "learning_rate": 6.397642371317866e-05, "loss": 0.4153, "step": 34860 }, { "epoch": 683.73, "learning_rate": 6.393633924602733e-05, "loss": 0.4181, "step": 34870 }, { "epoch": 683.92, "learning_rate": 6.389625800921824e-05, "loss": 0.4166, "step": 34880 }, { "epoch": 684.0, "eval_loss": 0.42609888315200806, "eval_runtime": 2.3337, "eval_samples_per_second": 976.565, "eval_steps_per_second": 3.857, "step": 34884 }, { "epoch": 684.12, "learning_rate": 6.385618001445413e-05, "loss": 0.4207, "step": 34890 }, { "epoch": 684.31, "learning_rate": 6.381610527343694e-05, "loss": 0.4198, "step": 34900 }, { "epoch": 684.51, "learning_rate": 6.377603379786757e-05, "loss": 0.415, "step": 34910 }, { "epoch": 684.71, "learning_rate": 6.373596559944604e-05, "loss": 0.4151, "step": 34920 }, { "epoch": 684.9, "learning_rate": 6.369590068987135e-05, "loss": 0.4172, "step": 34930 }, { "epoch": 685.0, "eval_loss": 0.4224391579627991, "eval_runtime": 2.3437, "eval_samples_per_second": 972.39, "eval_steps_per_second": 3.84, "step": 34935 }, { "epoch": 685.1, "learning_rate": 6.365583908084152e-05, "loss": 0.4098, "step": 34940 }, { "epoch": 685.29, "learning_rate": 6.361578078405371e-05, "loss": 0.4191, "step": 34950 }, { "epoch": 685.49, "learning_rate": 6.357572581120399e-05, "loss": 0.4186, "step": 34960 }, { "epoch": 685.69, "learning_rate": 6.353567417398756e-05, "loss": 0.4173, "step": 34970 }, { "epoch": 685.88, "learning_rate": 6.349562588409858e-05, "loss": 0.4188, "step": 34980 }, { "epoch": 686.0, "eval_loss": 0.42092400789260864, "eval_runtime": 2.2152, "eval_samples_per_second": 1028.793, "eval_steps_per_second": 4.063, "step": 34986 }, { "epoch": 686.08, "learning_rate": 6.34555809532303e-05, "loss": 0.422, "step": 34990 }, { "epoch": 686.27, "learning_rate": 6.34155393930749e-05, "loss": 0.416, "step": 35000 }, { "epoch": 686.47, "learning_rate": 6.337550121532362e-05, "loss": 0.4176, "step": 35010 }, { "epoch": 686.67, "learning_rate": 6.333546643166678e-05, "loss": 0.4183, "step": 35020 }, { "epoch": 686.86, "learning_rate": 6.329543505379354e-05, "loss": 0.4187, "step": 35030 }, { "epoch": 687.0, "eval_loss": 0.41680261492729187, "eval_runtime": 2.3457, "eval_samples_per_second": 971.556, "eval_steps_per_second": 3.837, "step": 35037 }, { "epoch": 687.06, "learning_rate": 6.325540709339227e-05, "loss": 0.4162, "step": 35040 }, { "epoch": 687.25, "learning_rate": 6.321538256215017e-05, "loss": 0.4152, "step": 35050 }, { "epoch": 687.45, "learning_rate": 6.317536147175356e-05, "loss": 0.4199, "step": 35060 }, { "epoch": 687.65, "learning_rate": 6.31353438338877e-05, "loss": 0.4167, "step": 35070 }, { "epoch": 687.84, "learning_rate": 6.309532966023678e-05, "loss": 0.4174, "step": 35080 }, { "epoch": 688.0, "eval_loss": 0.4200821816921234, "eval_runtime": 2.3042, "eval_samples_per_second": 989.077, "eval_steps_per_second": 3.906, "step": 35088 }, { "epoch": 688.04, "learning_rate": 6.305531896248415e-05, "loss": 0.4156, "step": 35090 }, { "epoch": 688.24, "learning_rate": 6.301531175231196e-05, "loss": 0.4127, "step": 35100 }, { "epoch": 688.43, "learning_rate": 6.297530804140147e-05, "loss": 0.4149, "step": 35110 }, { "epoch": 688.63, "learning_rate": 6.293530784143284e-05, "loss": 0.4141, "step": 35120 }, { "epoch": 688.82, "learning_rate": 6.289531116408526e-05, "loss": 0.4184, "step": 35130 }, { "epoch": 689.0, "eval_loss": 0.41768765449523926, "eval_runtime": 2.2735, "eval_samples_per_second": 1002.432, "eval_steps_per_second": 3.959, "step": 35139 }, { "epoch": 689.02, "learning_rate": 6.285531802103688e-05, "loss": 0.4164, "step": 35140 }, { "epoch": 689.22, "learning_rate": 6.281532842396476e-05, "loss": 0.4215, "step": 35150 }, { "epoch": 689.41, "learning_rate": 6.2775342384545e-05, "loss": 0.4152, "step": 35160 }, { "epoch": 689.61, "learning_rate": 6.273535991445261e-05, "loss": 0.4231, "step": 35170 }, { "epoch": 689.8, "learning_rate": 6.269538102536163e-05, "loss": 0.4135, "step": 35180 }, { "epoch": 690.0, "learning_rate": 6.265540572894494e-05, "loss": 0.4126, "step": 35190 }, { "epoch": 690.0, "eval_loss": 0.41916319727897644, "eval_runtime": 2.3438, "eval_samples_per_second": 972.34, "eval_steps_per_second": 3.84, "step": 35190 }, { "epoch": 690.2, "learning_rate": 6.26154340368745e-05, "loss": 0.4184, "step": 35200 }, { "epoch": 690.39, "learning_rate": 6.25754659608211e-05, "loss": 0.416, "step": 35210 }, { "epoch": 690.59, "learning_rate": 6.253550151245454e-05, "loss": 0.4136, "step": 35220 }, { "epoch": 690.78, "learning_rate": 6.249554070344358e-05, "loss": 0.4141, "step": 35230 }, { "epoch": 690.98, "learning_rate": 6.245558354545582e-05, "loss": 0.4168, "step": 35240 }, { "epoch": 691.0, "eval_loss": 0.4171481728553772, "eval_runtime": 2.3458, "eval_samples_per_second": 971.51, "eval_steps_per_second": 3.837, "step": 35241 }, { "epoch": 691.18, "learning_rate": 6.241563005015792e-05, "loss": 0.4149, "step": 35250 }, { "epoch": 691.37, "learning_rate": 6.237568022921537e-05, "loss": 0.4149, "step": 35260 }, { "epoch": 691.57, "learning_rate": 6.233573409429267e-05, "loss": 0.4113, "step": 35270 }, { "epoch": 691.76, "learning_rate": 6.229579165705317e-05, "loss": 0.4205, "step": 35280 }, { "epoch": 691.96, "learning_rate": 6.225585292915914e-05, "loss": 0.4152, "step": 35290 }, { "epoch": 692.0, "eval_loss": 0.4202278256416321, "eval_runtime": 2.3116, "eval_samples_per_second": 985.918, "eval_steps_per_second": 3.893, "step": 35292 }, { "epoch": 692.16, "learning_rate": 6.221591792227188e-05, "loss": 0.4188, "step": 35300 }, { "epoch": 692.35, "learning_rate": 6.217598664805143e-05, "loss": 0.4182, "step": 35310 }, { "epoch": 692.55, "learning_rate": 6.21360591181569e-05, "loss": 0.4183, "step": 35320 }, { "epoch": 692.75, "learning_rate": 6.209613534424624e-05, "loss": 0.413, "step": 35330 }, { "epoch": 692.94, "learning_rate": 6.205621533797621e-05, "loss": 0.4137, "step": 35340 }, { "epoch": 693.0, "eval_loss": 0.42095068097114563, "eval_runtime": 2.1897, "eval_samples_per_second": 1040.785, "eval_steps_per_second": 4.11, "step": 35343 }, { "epoch": 693.14, "learning_rate": 6.201629911100269e-05, "loss": 0.4146, "step": 35350 }, { "epoch": 693.33, "learning_rate": 6.197638667498022e-05, "loss": 0.4122, "step": 35360 }, { "epoch": 693.53, "learning_rate": 6.193647804156241e-05, "loss": 0.4166, "step": 35370 }, { "epoch": 693.73, "learning_rate": 6.189657322240165e-05, "loss": 0.4164, "step": 35380 }, { "epoch": 693.92, "learning_rate": 6.185667222914928e-05, "loss": 0.4139, "step": 35390 }, { "epoch": 694.0, "eval_loss": 0.4143226146697998, "eval_runtime": 2.205, "eval_samples_per_second": 1033.539, "eval_steps_per_second": 4.082, "step": 35394 }, { "epoch": 694.12, "learning_rate": 6.181677507345552e-05, "loss": 0.4172, "step": 35400 }, { "epoch": 694.31, "learning_rate": 6.17768817669694e-05, "loss": 0.4172, "step": 35410 }, { "epoch": 694.51, "learning_rate": 6.17369923213389e-05, "loss": 0.4159, "step": 35420 }, { "epoch": 694.71, "learning_rate": 6.169710674821085e-05, "loss": 0.4136, "step": 35430 }, { "epoch": 694.9, "learning_rate": 6.165722505923096e-05, "loss": 0.418, "step": 35440 }, { "epoch": 695.0, "eval_loss": 0.4250470697879791, "eval_runtime": 2.1916, "eval_samples_per_second": 1039.863, "eval_steps_per_second": 4.107, "step": 35445 }, { "epoch": 695.1, "learning_rate": 6.161734726604374e-05, "loss": 0.4154, "step": 35450 }, { "epoch": 695.29, "learning_rate": 6.15774733802927e-05, "loss": 0.4189, "step": 35460 }, { "epoch": 695.49, "learning_rate": 6.153760341362007e-05, "loss": 0.412, "step": 35470 }, { "epoch": 695.69, "learning_rate": 6.1497737377667e-05, "loss": 0.4208, "step": 35480 }, { "epoch": 695.88, "learning_rate": 6.145787528407348e-05, "loss": 0.4116, "step": 35490 }, { "epoch": 696.0, "eval_loss": 0.4236893951892853, "eval_runtime": 2.2843, "eval_samples_per_second": 997.695, "eval_steps_per_second": 3.94, "step": 35496 }, { "epoch": 696.08, "learning_rate": 6.141801714447834e-05, "loss": 0.4162, "step": 35500 }, { "epoch": 696.27, "learning_rate": 6.137816297051934e-05, "loss": 0.4133, "step": 35510 }, { "epoch": 696.47, "learning_rate": 6.13383127738329e-05, "loss": 0.4182, "step": 35520 }, { "epoch": 696.67, "learning_rate": 6.129846656605448e-05, "loss": 0.4202, "step": 35530 }, { "epoch": 696.86, "learning_rate": 6.125862435881826e-05, "loss": 0.4113, "step": 35540 }, { "epoch": 697.0, "eval_loss": 0.41717728972435, "eval_runtime": 2.2931, "eval_samples_per_second": 993.869, "eval_steps_per_second": 3.925, "step": 35547 }, { "epoch": 697.06, "learning_rate": 6.12187861637572e-05, "loss": 0.4114, "step": 35550 }, { "epoch": 697.25, "learning_rate": 6.11789519925033e-05, "loss": 0.4151, "step": 35560 }, { "epoch": 697.45, "learning_rate": 6.113912185668715e-05, "loss": 0.4122, "step": 35570 }, { "epoch": 697.65, "learning_rate": 6.109929576793829e-05, "loss": 0.4172, "step": 35580 }, { "epoch": 697.84, "learning_rate": 6.105947373788505e-05, "loss": 0.4131, "step": 35590 }, { "epoch": 698.0, "eval_loss": 0.4218674898147583, "eval_runtime": 2.2896, "eval_samples_per_second": 995.388, "eval_steps_per_second": 3.931, "step": 35598 }, { "epoch": 698.04, "learning_rate": 6.101965577815458e-05, "loss": 0.4105, "step": 35600 }, { "epoch": 698.24, "learning_rate": 6.097984190037284e-05, "loss": 0.4159, "step": 35610 }, { "epoch": 698.43, "learning_rate": 6.0940032116164555e-05, "loss": 0.4124, "step": 35620 }, { "epoch": 698.63, "learning_rate": 6.090022643715335e-05, "loss": 0.4153, "step": 35630 }, { "epoch": 698.82, "learning_rate": 6.086042487496155e-05, "loss": 0.4148, "step": 35640 }, { "epoch": 699.0, "eval_loss": 0.4179209768772125, "eval_runtime": 2.2257, "eval_samples_per_second": 1023.944, "eval_steps_per_second": 4.044, "step": 35649 }, { "epoch": 699.02, "learning_rate": 6.082062744121038e-05, "loss": 0.4175, "step": 35650 }, { "epoch": 699.22, "learning_rate": 6.0780834147519704e-05, "loss": 0.4176, "step": 35660 }, { "epoch": 699.41, "learning_rate": 6.07410450055084e-05, "loss": 0.4121, "step": 35670 }, { "epoch": 699.61, "learning_rate": 6.070126002679393e-05, "loss": 0.4108, "step": 35680 }, { "epoch": 699.8, "learning_rate": 6.066147922299262e-05, "loss": 0.4144, "step": 35690 }, { "epoch": 700.0, "learning_rate": 6.062170260571963e-05, "loss": 0.4117, "step": 35700 }, { "epoch": 700.0, "eval_loss": 0.4264349341392517, "eval_runtime": 2.3638, "eval_samples_per_second": 964.145, "eval_steps_per_second": 3.808, "step": 35700 }, { "epoch": 700.2, "learning_rate": 6.058193018658876e-05, "loss": 0.4168, "step": 35710 }, { "epoch": 700.39, "learning_rate": 6.0542161977212775e-05, "loss": 0.4138, "step": 35720 }, { "epoch": 700.59, "learning_rate": 6.050239798920303e-05, "loss": 0.4155, "step": 35730 }, { "epoch": 700.78, "learning_rate": 6.046263823416974e-05, "loss": 0.4106, "step": 35740 }, { "epoch": 700.98, "learning_rate": 6.04228827237219e-05, "loss": 0.4115, "step": 35750 }, { "epoch": 701.0, "eval_loss": 0.4244195818901062, "eval_runtime": 2.2262, "eval_samples_per_second": 1023.724, "eval_steps_per_second": 4.043, "step": 35751 }, { "epoch": 701.18, "learning_rate": 6.0383131469467157e-05, "loss": 0.4141, "step": 35760 }, { "epoch": 701.37, "learning_rate": 6.034338448301207e-05, "loss": 0.4116, "step": 35770 }, { "epoch": 701.57, "learning_rate": 6.030364177596182e-05, "loss": 0.4154, "step": 35780 }, { "epoch": 701.76, "learning_rate": 6.0263903359920426e-05, "loss": 0.4175, "step": 35790 }, { "epoch": 701.96, "learning_rate": 6.0224169246490586e-05, "loss": 0.4149, "step": 35800 }, { "epoch": 702.0, "eval_loss": 0.42228615283966064, "eval_runtime": 2.217, "eval_samples_per_second": 1027.969, "eval_steps_per_second": 4.06, "step": 35802 }, { "epoch": 702.16, "learning_rate": 6.018443944727381e-05, "loss": 0.416, "step": 35810 }, { "epoch": 702.35, "learning_rate": 6.014471397387032e-05, "loss": 0.4147, "step": 35820 }, { "epoch": 702.55, "learning_rate": 6.0104992837879e-05, "loss": 0.4142, "step": 35830 }, { "epoch": 702.75, "learning_rate": 6.0065276050897597e-05, "loss": 0.4148, "step": 35840 }, { "epoch": 702.94, "learning_rate": 6.00255636245225e-05, "loss": 0.4129, "step": 35850 }, { "epoch": 703.0, "eval_loss": 0.4189659357070923, "eval_runtime": 2.3057, "eval_samples_per_second": 988.411, "eval_steps_per_second": 3.903, "step": 35853 }, { "epoch": 703.14, "learning_rate": 5.998585557034889e-05, "loss": 0.4139, "step": 35860 }, { "epoch": 703.33, "learning_rate": 5.994615189997056e-05, "loss": 0.4141, "step": 35870 }, { "epoch": 703.53, "learning_rate": 5.990645262498019e-05, "loss": 0.41, "step": 35880 }, { "epoch": 703.73, "learning_rate": 5.986675775696903e-05, "loss": 0.4108, "step": 35890 }, { "epoch": 703.92, "learning_rate": 5.9827067307527067e-05, "loss": 0.4134, "step": 35900 }, { "epoch": 704.0, "eval_loss": 0.41974902153015137, "eval_runtime": 2.3539, "eval_samples_per_second": 968.194, "eval_steps_per_second": 3.823, "step": 35904 }, { "epoch": 704.12, "learning_rate": 5.9787381288243094e-05, "loss": 0.4149, "step": 35910 }, { "epoch": 704.31, "learning_rate": 5.9747699710704466e-05, "loss": 0.4169, "step": 35920 }, { "epoch": 704.51, "learning_rate": 5.970802258649742e-05, "loss": 0.4134, "step": 35930 }, { "epoch": 704.71, "learning_rate": 5.9668349927206696e-05, "loss": 0.4128, "step": 35940 }, { "epoch": 704.9, "learning_rate": 5.9628681744415877e-05, "loss": 0.4155, "step": 35950 }, { "epoch": 705.0, "eval_loss": 0.4202663004398346, "eval_runtime": 2.2216, "eval_samples_per_second": 1025.818, "eval_steps_per_second": 4.051, "step": 35955 }, { "epoch": 705.1, "learning_rate": 5.95890180497072e-05, "loss": 0.4137, "step": 35960 }, { "epoch": 705.29, "learning_rate": 5.95493588546615e-05, "loss": 0.4165, "step": 35970 }, { "epoch": 705.49, "learning_rate": 5.950970417085848e-05, "loss": 0.4115, "step": 35980 }, { "epoch": 705.69, "learning_rate": 5.9470054009876336e-05, "loss": 0.4094, "step": 35990 }, { "epoch": 705.88, "learning_rate": 5.943040838329209e-05, "loss": 0.4112, "step": 36000 }, { "epoch": 706.0, "eval_loss": 0.42057114839553833, "eval_runtime": 2.2541, "eval_samples_per_second": 1011.051, "eval_steps_per_second": 3.993, "step": 36006 }, { "epoch": 706.08, "learning_rate": 5.939076730268135e-05, "loss": 0.4104, "step": 36010 }, { "epoch": 706.27, "learning_rate": 5.935113077961845e-05, "loss": 0.4129, "step": 36020 }, { "epoch": 706.47, "learning_rate": 5.931149882567638e-05, "loss": 0.4138, "step": 36030 }, { "epoch": 706.67, "learning_rate": 5.927187145242672e-05, "loss": 0.4106, "step": 36040 }, { "epoch": 706.86, "learning_rate": 5.923224867143985e-05, "loss": 0.4113, "step": 36050 }, { "epoch": 707.0, "eval_loss": 0.41755741834640503, "eval_runtime": 2.2519, "eval_samples_per_second": 1012.054, "eval_steps_per_second": 3.997, "step": 36057 }, { "epoch": 707.06, "learning_rate": 5.9192630494284696e-05, "loss": 0.4082, "step": 36060 }, { "epoch": 707.25, "learning_rate": 5.915301693252894e-05, "loss": 0.4138, "step": 36070 }, { "epoch": 707.45, "learning_rate": 5.911340799773879e-05, "loss": 0.4161, "step": 36080 }, { "epoch": 707.65, "learning_rate": 5.907380370147919e-05, "loss": 0.412, "step": 36090 }, { "epoch": 707.84, "learning_rate": 5.9034204055313734e-05, "loss": 0.4117, "step": 36100 }, { "epoch": 708.0, "eval_loss": 0.42017531394958496, "eval_runtime": 2.2028, "eval_samples_per_second": 1034.59, "eval_steps_per_second": 4.086, "step": 36108 }, { "epoch": 708.04, "learning_rate": 5.89946090708046e-05, "loss": 0.4043, "step": 36110 }, { "epoch": 708.24, "learning_rate": 5.895501875951271e-05, "loss": 0.4111, "step": 36120 }, { "epoch": 708.43, "learning_rate": 5.891543313299744e-05, "loss": 0.4106, "step": 36130 }, { "epoch": 708.63, "learning_rate": 5.887585220281703e-05, "loss": 0.4154, "step": 36140 }, { "epoch": 708.82, "learning_rate": 5.8836275980528155e-05, "loss": 0.4128, "step": 36150 }, { "epoch": 709.0, "eval_loss": 0.4185832738876343, "eval_runtime": 2.1993, "eval_samples_per_second": 1036.236, "eval_steps_per_second": 4.092, "step": 36159 }, { "epoch": 709.02, "learning_rate": 5.879670447768619e-05, "loss": 0.4109, "step": 36160 }, { "epoch": 709.22, "learning_rate": 5.875713770584518e-05, "loss": 0.4148, "step": 36170 }, { "epoch": 709.41, "learning_rate": 5.8717575676557666e-05, "loss": 0.4093, "step": 36180 }, { "epoch": 709.61, "learning_rate": 5.867801840137497e-05, "loss": 0.4075, "step": 36190 }, { "epoch": 709.8, "learning_rate": 5.8638465891846854e-05, "loss": 0.4155, "step": 36200 }, { "epoch": 710.0, "learning_rate": 5.859891815952181e-05, "loss": 0.4111, "step": 36210 }, { "epoch": 710.0, "eval_loss": 0.41964903473854065, "eval_runtime": 2.3088, "eval_samples_per_second": 987.106, "eval_steps_per_second": 3.898, "step": 36210 }, { "epoch": 710.2, "learning_rate": 5.85593752159469e-05, "loss": 0.4116, "step": 36220 }, { "epoch": 710.39, "learning_rate": 5.8519837072667725e-05, "loss": 0.4109, "step": 36230 }, { "epoch": 710.59, "learning_rate": 5.848030374122862e-05, "loss": 0.4143, "step": 36240 }, { "epoch": 710.78, "learning_rate": 5.844077523317238e-05, "loss": 0.4121, "step": 36250 }, { "epoch": 710.98, "learning_rate": 5.8401251560040463e-05, "loss": 0.4168, "step": 36260 }, { "epoch": 711.0, "eval_loss": 0.42247816920280457, "eval_runtime": 2.3042, "eval_samples_per_second": 989.074, "eval_steps_per_second": 3.906, "step": 36261 }, { "epoch": 711.18, "learning_rate": 5.83617327333729e-05, "loss": 0.4147, "step": 36270 }, { "epoch": 711.37, "learning_rate": 5.8322218764708336e-05, "loss": 0.41, "step": 36280 }, { "epoch": 711.57, "learning_rate": 5.828270966558392e-05, "loss": 0.4146, "step": 36290 }, { "epoch": 711.76, "learning_rate": 5.824320544753545e-05, "loss": 0.4069, "step": 36300 }, { "epoch": 711.96, "learning_rate": 5.8203706122097275e-05, "loss": 0.408, "step": 36310 }, { "epoch": 712.0, "eval_loss": 0.41460511088371277, "eval_runtime": 2.3599, "eval_samples_per_second": 965.725, "eval_steps_per_second": 3.814, "step": 36312 }, { "epoch": 712.16, "learning_rate": 5.8164211700802316e-05, "loss": 0.409, "step": 36320 }, { "epoch": 712.35, "learning_rate": 5.812472219518209e-05, "loss": 0.4118, "step": 36330 }, { "epoch": 712.55, "learning_rate": 5.808523761676658e-05, "loss": 0.4099, "step": 36340 }, { "epoch": 712.75, "learning_rate": 5.8045757977084504e-05, "loss": 0.4136, "step": 36350 }, { "epoch": 712.94, "learning_rate": 5.800628328766296e-05, "loss": 0.4117, "step": 36360 }, { "epoch": 713.0, "eval_loss": 0.4185248911380768, "eval_runtime": 2.2916, "eval_samples_per_second": 994.5, "eval_steps_per_second": 3.927, "step": 36363 }, { "epoch": 713.14, "learning_rate": 5.796681356002769e-05, "loss": 0.4059, "step": 36370 }, { "epoch": 713.33, "learning_rate": 5.792734880570301e-05, "loss": 0.408, "step": 36380 }, { "epoch": 713.53, "learning_rate": 5.788788903621168e-05, "loss": 0.4113, "step": 36390 }, { "epoch": 713.73, "learning_rate": 5.784843426307516e-05, "loss": 0.4136, "step": 36400 }, { "epoch": 713.92, "learning_rate": 5.780898449781328e-05, "loss": 0.4089, "step": 36410 }, { "epoch": 714.0, "eval_loss": 0.4214355945587158, "eval_runtime": 2.3419, "eval_samples_per_second": 973.124, "eval_steps_per_second": 3.843, "step": 36414 }, { "epoch": 714.12, "learning_rate": 5.7769539751944544e-05, "loss": 0.4134, "step": 36420 }, { "epoch": 714.31, "learning_rate": 5.773010003698595e-05, "loss": 0.4166, "step": 36430 }, { "epoch": 714.51, "learning_rate": 5.769066536445294e-05, "loss": 0.4101, "step": 36440 }, { "epoch": 714.71, "learning_rate": 5.765123574585965e-05, "loss": 0.4171, "step": 36450 }, { "epoch": 714.9, "learning_rate": 5.7611811192718576e-05, "loss": 0.408, "step": 36460 }, { "epoch": 715.0, "eval_loss": 0.4195824861526489, "eval_runtime": 2.2721, "eval_samples_per_second": 1003.05, "eval_steps_per_second": 3.961, "step": 36465 }, { "epoch": 715.1, "learning_rate": 5.757239171654086e-05, "loss": 0.4114, "step": 36470 }, { "epoch": 715.29, "learning_rate": 5.753297732883607e-05, "loss": 0.4121, "step": 36480 }, { "epoch": 715.49, "learning_rate": 5.749356804111238e-05, "loss": 0.411, "step": 36490 }, { "epoch": 715.69, "learning_rate": 5.7454163864876376e-05, "loss": 0.4138, "step": 36500 }, { "epoch": 715.88, "learning_rate": 5.741476481163319e-05, "loss": 0.4126, "step": 36510 }, { "epoch": 716.0, "eval_loss": 0.41745418310165405, "eval_runtime": 2.1957, "eval_samples_per_second": 1037.945, "eval_steps_per_second": 4.099, "step": 36516 }, { "epoch": 716.08, "learning_rate": 5.737537089288652e-05, "loss": 0.4114, "step": 36520 }, { "epoch": 716.27, "learning_rate": 5.7335982120138456e-05, "loss": 0.4128, "step": 36530 }, { "epoch": 716.47, "learning_rate": 5.72965985048897e-05, "loss": 0.4117, "step": 36540 }, { "epoch": 716.67, "learning_rate": 5.725722005863931e-05, "loss": 0.4086, "step": 36550 }, { "epoch": 716.86, "learning_rate": 5.7217846792885e-05, "loss": 0.4106, "step": 36560 }, { "epoch": 717.0, "eval_loss": 0.4145427942276001, "eval_runtime": 2.3265, "eval_samples_per_second": 979.583, "eval_steps_per_second": 3.868, "step": 36567 }, { "epoch": 717.06, "learning_rate": 5.717847871912284e-05, "loss": 0.4051, "step": 36570 }, { "epoch": 717.25, "learning_rate": 5.7139115848847425e-05, "loss": 0.409, "step": 36580 }, { "epoch": 717.45, "learning_rate": 5.709975819355187e-05, "loss": 0.4174, "step": 36590 }, { "epoch": 717.65, "learning_rate": 5.706040576472766e-05, "loss": 0.4098, "step": 36600 }, { "epoch": 717.84, "learning_rate": 5.7021058573864924e-05, "loss": 0.4112, "step": 36610 }, { "epoch": 718.0, "eval_loss": 0.41604405641555786, "eval_runtime": 2.2861, "eval_samples_per_second": 996.902, "eval_steps_per_second": 3.937, "step": 36618 }, { "epoch": 718.04, "learning_rate": 5.6981716632452086e-05, "loss": 0.4096, "step": 36620 }, { "epoch": 718.24, "learning_rate": 5.694237995197615e-05, "loss": 0.4088, "step": 36630 }, { "epoch": 718.43, "learning_rate": 5.690304854392257e-05, "loss": 0.41, "step": 36640 }, { "epoch": 718.63, "learning_rate": 5.6863722419775166e-05, "loss": 0.4145, "step": 36650 }, { "epoch": 718.82, "learning_rate": 5.6824401591016385e-05, "loss": 0.4064, "step": 36660 }, { "epoch": 719.0, "eval_loss": 0.41746461391448975, "eval_runtime": 2.2192, "eval_samples_per_second": 1026.951, "eval_steps_per_second": 4.056, "step": 36669 }, { "epoch": 719.02, "learning_rate": 5.678508606912694e-05, "loss": 0.409, "step": 36670 }, { "epoch": 719.22, "learning_rate": 5.674577586558616e-05, "loss": 0.4115, "step": 36680 }, { "epoch": 719.41, "learning_rate": 5.6706470991871706e-05, "loss": 0.4076, "step": 36690 }, { "epoch": 719.61, "learning_rate": 5.666717145945976e-05, "loss": 0.4139, "step": 36700 }, { "epoch": 719.8, "learning_rate": 5.662787727982487e-05, "loss": 0.4089, "step": 36710 }, { "epoch": 720.0, "learning_rate": 5.658858846444006e-05, "loss": 0.41, "step": 36720 }, { "epoch": 720.0, "eval_loss": 0.4181264638900757, "eval_runtime": 2.2542, "eval_samples_per_second": 1011.006, "eval_steps_per_second": 3.993, "step": 36720 }, { "epoch": 720.2, "learning_rate": 5.654930502477682e-05, "loss": 0.4102, "step": 36730 }, { "epoch": 720.39, "learning_rate": 5.651002697230501e-05, "loss": 0.4134, "step": 36740 }, { "epoch": 720.59, "learning_rate": 5.647075431849299e-05, "loss": 0.4091, "step": 36750 }, { "epoch": 720.78, "learning_rate": 5.643148707480745e-05, "loss": 0.4132, "step": 36760 }, { "epoch": 720.98, "learning_rate": 5.639222525271355e-05, "loss": 0.4046, "step": 36770 }, { "epoch": 721.0, "eval_loss": 0.4158802032470703, "eval_runtime": 2.327, "eval_samples_per_second": 979.383, "eval_steps_per_second": 3.868, "step": 36771 }, { "epoch": 721.18, "learning_rate": 5.63529688636749e-05, "loss": 0.4074, "step": 36780 }, { "epoch": 721.37, "learning_rate": 5.631371791915345e-05, "loss": 0.4129, "step": 36790 }, { "epoch": 721.57, "learning_rate": 5.627447243060967e-05, "loss": 0.4086, "step": 36800 }, { "epoch": 721.76, "learning_rate": 5.623523240950225e-05, "loss": 0.4106, "step": 36810 }, { "epoch": 721.96, "learning_rate": 5.6195997867288534e-05, "loss": 0.4141, "step": 36820 }, { "epoch": 722.0, "eval_loss": 0.41193127632141113, "eval_runtime": 2.2521, "eval_samples_per_second": 1011.933, "eval_steps_per_second": 3.996, "step": 36822 }, { "epoch": 722.16, "learning_rate": 5.615676881542405e-05, "loss": 0.4093, "step": 36830 }, { "epoch": 722.35, "learning_rate": 5.611754526536282e-05, "loss": 0.4119, "step": 36840 }, { "epoch": 722.55, "learning_rate": 5.6078327228557274e-05, "loss": 0.4092, "step": 36850 }, { "epoch": 722.75, "learning_rate": 5.6039114716458145e-05, "loss": 0.4097, "step": 36860 }, { "epoch": 722.94, "learning_rate": 5.599990774051469e-05, "loss": 0.414, "step": 36870 }, { "epoch": 723.0, "eval_loss": 0.4167172610759735, "eval_runtime": 2.2632, "eval_samples_per_second": 1006.978, "eval_steps_per_second": 3.977, "step": 36873 }, { "epoch": 723.14, "learning_rate": 5.596070631217441e-05, "loss": 0.409, "step": 36880 }, { "epoch": 723.33, "learning_rate": 5.592151044288327e-05, "loss": 0.4143, "step": 36890 }, { "epoch": 723.53, "learning_rate": 5.588232014408561e-05, "loss": 0.4101, "step": 36900 }, { "epoch": 723.73, "learning_rate": 5.5843135427224076e-05, "loss": 0.4125, "step": 36910 }, { "epoch": 723.92, "learning_rate": 5.580395630373977e-05, "loss": 0.4118, "step": 36920 }, { "epoch": 724.0, "eval_loss": 0.4165693521499634, "eval_runtime": 2.1966, "eval_samples_per_second": 1037.5, "eval_steps_per_second": 4.097, "step": 36924 }, { "epoch": 724.12, "learning_rate": 5.57647827850721e-05, "loss": 0.4082, "step": 36930 }, { "epoch": 724.31, "learning_rate": 5.5725614882658874e-05, "loss": 0.4093, "step": 36940 }, { "epoch": 724.51, "learning_rate": 5.5686452607936226e-05, "loss": 0.4138, "step": 36950 }, { "epoch": 724.71, "learning_rate": 5.564729597233873e-05, "loss": 0.4079, "step": 36960 }, { "epoch": 724.9, "learning_rate": 5.560814498729918e-05, "loss": 0.4106, "step": 36970 }, { "epoch": 725.0, "eval_loss": 0.41572317481040955, "eval_runtime": 2.3431, "eval_samples_per_second": 972.633, "eval_steps_per_second": 3.841, "step": 36975 }, { "epoch": 725.1, "learning_rate": 5.556899966424879e-05, "loss": 0.4097, "step": 36980 }, { "epoch": 725.29, "learning_rate": 5.552986001461716e-05, "loss": 0.4088, "step": 36990 }, { "epoch": 725.49, "learning_rate": 5.549072604983218e-05, "loss": 0.4092, "step": 37000 }, { "epoch": 725.69, "learning_rate": 5.545159778132011e-05, "loss": 0.4109, "step": 37010 }, { "epoch": 725.88, "learning_rate": 5.5412475220505475e-05, "loss": 0.4079, "step": 37020 }, { "epoch": 726.0, "eval_loss": 0.4175969958305359, "eval_runtime": 2.3512, "eval_samples_per_second": 969.275, "eval_steps_per_second": 3.828, "step": 37026 }, { "epoch": 726.08, "learning_rate": 5.537335837881127e-05, "loss": 0.4021, "step": 37030 }, { "epoch": 726.27, "learning_rate": 5.53342472676587e-05, "loss": 0.4103, "step": 37040 }, { "epoch": 726.47, "learning_rate": 5.529514189846732e-05, "loss": 0.4125, "step": 37050 }, { "epoch": 726.67, "learning_rate": 5.525604228265507e-05, "loss": 0.4095, "step": 37060 }, { "epoch": 726.86, "learning_rate": 5.521694843163809e-05, "loss": 0.4114, "step": 37070 }, { "epoch": 727.0, "eval_loss": 0.4107976257801056, "eval_runtime": 2.2678, "eval_samples_per_second": 1004.941, "eval_steps_per_second": 3.969, "step": 37077 }, { "epoch": 727.06, "learning_rate": 5.517786035683102e-05, "loss": 0.4111, "step": 37080 }, { "epoch": 727.25, "learning_rate": 5.5138778069646614e-05, "loss": 0.4112, "step": 37090 }, { "epoch": 727.45, "learning_rate": 5.509970158149608e-05, "loss": 0.4078, "step": 37100 }, { "epoch": 727.65, "learning_rate": 5.5060630903788886e-05, "loss": 0.4088, "step": 37110 }, { "epoch": 727.84, "learning_rate": 5.5021566047932736e-05, "loss": 0.4117, "step": 37120 }, { "epoch": 728.0, "eval_loss": 0.41353392601013184, "eval_runtime": 2.2102, "eval_samples_per_second": 1031.137, "eval_steps_per_second": 4.072, "step": 37128 }, { "epoch": 728.04, "learning_rate": 5.4982507025333756e-05, "loss": 0.4077, "step": 37130 }, { "epoch": 728.24, "learning_rate": 5.4943453847396275e-05, "loss": 0.4131, "step": 37140 }, { "epoch": 728.43, "learning_rate": 5.490440652552298e-05, "loss": 0.4127, "step": 37150 }, { "epoch": 728.63, "learning_rate": 5.48653650711148e-05, "loss": 0.4126, "step": 37160 }, { "epoch": 728.82, "learning_rate": 5.4826329495571e-05, "loss": 0.4155, "step": 37170 }, { "epoch": 729.0, "eval_loss": 0.4170650243759155, "eval_runtime": 2.2637, "eval_samples_per_second": 1006.775, "eval_steps_per_second": 3.976, "step": 37179 }, { "epoch": 729.02, "learning_rate": 5.478729981028905e-05, "loss": 0.4111, "step": 37180 }, { "epoch": 729.22, "learning_rate": 5.474827602666475e-05, "loss": 0.4102, "step": 37190 }, { "epoch": 729.41, "learning_rate": 5.4709258156092214e-05, "loss": 0.4115, "step": 37200 }, { "epoch": 729.61, "learning_rate": 5.467024620996375e-05, "loss": 0.4083, "step": 37210 }, { "epoch": 729.8, "learning_rate": 5.4631240199670036e-05, "loss": 0.4099, "step": 37220 }, { "epoch": 730.0, "learning_rate": 5.4592240136599856e-05, "loss": 0.4117, "step": 37230 }, { "epoch": 730.0, "eval_loss": 0.4147026836872101, "eval_runtime": 2.228, "eval_samples_per_second": 1022.869, "eval_steps_per_second": 4.039, "step": 37230 }, { "epoch": 730.2, "learning_rate": 5.455324603214047e-05, "loss": 0.4102, "step": 37240 }, { "epoch": 730.39, "learning_rate": 5.4514257897677227e-05, "loss": 0.4123, "step": 37250 }, { "epoch": 730.59, "learning_rate": 5.447527574459378e-05, "loss": 0.4088, "step": 37260 }, { "epoch": 730.78, "learning_rate": 5.44362995842721e-05, "loss": 0.4096, "step": 37270 }, { "epoch": 730.98, "learning_rate": 5.439732942809228e-05, "loss": 0.4092, "step": 37280 }, { "epoch": 731.0, "eval_loss": 0.4093756079673767, "eval_runtime": 2.3646, "eval_samples_per_second": 963.815, "eval_steps_per_second": 3.806, "step": 37281 }, { "epoch": 731.18, "learning_rate": 5.435836528743283e-05, "loss": 0.4079, "step": 37290 }, { "epoch": 731.37, "learning_rate": 5.431940717367033e-05, "loss": 0.4105, "step": 37300 }, { "epoch": 731.57, "learning_rate": 5.428045509817974e-05, "loss": 0.4073, "step": 37310 }, { "epoch": 731.76, "learning_rate": 5.424150907233418e-05, "loss": 0.403, "step": 37320 }, { "epoch": 731.96, "learning_rate": 5.420256910750497e-05, "loss": 0.4091, "step": 37330 }, { "epoch": 732.0, "eval_loss": 0.41333431005477905, "eval_runtime": 2.2202, "eval_samples_per_second": 1026.491, "eval_steps_per_second": 4.054, "step": 37332 }, { "epoch": 732.16, "learning_rate": 5.416363521506178e-05, "loss": 0.4111, "step": 37340 }, { "epoch": 732.35, "learning_rate": 5.4124707406372384e-05, "loss": 0.4095, "step": 37350 }, { "epoch": 732.55, "learning_rate": 5.408578569280289e-05, "loss": 0.4087, "step": 37360 }, { "epoch": 732.75, "learning_rate": 5.404687008571752e-05, "loss": 0.4072, "step": 37370 }, { "epoch": 732.94, "learning_rate": 5.400796059647882e-05, "loss": 0.4081, "step": 37380 }, { "epoch": 733.0, "eval_loss": 0.4142116606235504, "eval_runtime": 2.3382, "eval_samples_per_second": 974.674, "eval_steps_per_second": 3.849, "step": 37383 }, { "epoch": 733.14, "learning_rate": 5.396905723644744e-05, "loss": 0.4078, "step": 37390 }, { "epoch": 733.33, "learning_rate": 5.39301600169823e-05, "loss": 0.4053, "step": 37400 }, { "epoch": 733.53, "learning_rate": 5.389126894944054e-05, "loss": 0.4094, "step": 37410 }, { "epoch": 733.73, "learning_rate": 5.385238404517747e-05, "loss": 0.4079, "step": 37420 }, { "epoch": 733.92, "learning_rate": 5.381350531554664e-05, "loss": 0.4084, "step": 37430 }, { "epoch": 734.0, "eval_loss": 0.4169609546661377, "eval_runtime": 2.3612, "eval_samples_per_second": 965.176, "eval_steps_per_second": 3.812, "step": 37434 }, { "epoch": 734.12, "learning_rate": 5.377463277189971e-05, "loss": 0.4031, "step": 37440 }, { "epoch": 734.31, "learning_rate": 5.3735766425586685e-05, "loss": 0.4042, "step": 37450 }, { "epoch": 734.51, "learning_rate": 5.3696906287955614e-05, "loss": 0.408, "step": 37460 }, { "epoch": 734.71, "learning_rate": 5.365805237035279e-05, "loss": 0.4106, "step": 37470 }, { "epoch": 734.9, "learning_rate": 5.361920468412273e-05, "loss": 0.4082, "step": 37480 }, { "epoch": 735.0, "eval_loss": 0.41578948497772217, "eval_runtime": 2.3433, "eval_samples_per_second": 972.578, "eval_steps_per_second": 3.841, "step": 37485 }, { "epoch": 735.1, "learning_rate": 5.3580363240608015e-05, "loss": 0.4131, "step": 37490 }, { "epoch": 735.29, "learning_rate": 5.3541528051149574e-05, "loss": 0.4123, "step": 37500 }, { "epoch": 735.49, "learning_rate": 5.350269912708636e-05, "loss": 0.4099, "step": 37510 }, { "epoch": 735.69, "learning_rate": 5.346387647975555e-05, "loss": 0.4101, "step": 37520 }, { "epoch": 735.88, "learning_rate": 5.342506012049253e-05, "loss": 0.4097, "step": 37530 }, { "epoch": 736.0, "eval_loss": 0.41176244616508484, "eval_runtime": 2.2484, "eval_samples_per_second": 1013.627, "eval_steps_per_second": 4.003, "step": 37536 }, { "epoch": 736.08, "learning_rate": 5.3386250060630765e-05, "loss": 0.4079, "step": 37540 }, { "epoch": 736.27, "learning_rate": 5.334744631150196e-05, "loss": 0.4045, "step": 37550 }, { "epoch": 736.47, "learning_rate": 5.3308648884435914e-05, "loss": 0.4082, "step": 37560 }, { "epoch": 736.67, "learning_rate": 5.326985779076066e-05, "loss": 0.4075, "step": 37570 }, { "epoch": 736.86, "learning_rate": 5.32310730418023e-05, "loss": 0.4082, "step": 37580 }, { "epoch": 737.0, "eval_loss": 0.4105346202850342, "eval_runtime": 2.3444, "eval_samples_per_second": 972.103, "eval_steps_per_second": 3.839, "step": 37587 }, { "epoch": 737.06, "learning_rate": 5.3192294648885086e-05, "loss": 0.4049, "step": 37590 }, { "epoch": 737.25, "learning_rate": 5.3153522623331504e-05, "loss": 0.4119, "step": 37600 }, { "epoch": 737.45, "learning_rate": 5.311475697646207e-05, "loss": 0.4071, "step": 37610 }, { "epoch": 737.65, "learning_rate": 5.307599771959553e-05, "loss": 0.404, "step": 37620 }, { "epoch": 737.84, "learning_rate": 5.303724486404868e-05, "loss": 0.4043, "step": 37630 }, { "epoch": 738.0, "eval_loss": 0.4161665141582489, "eval_runtime": 2.3799, "eval_samples_per_second": 957.598, "eval_steps_per_second": 3.782, "step": 37638 }, { "epoch": 738.04, "learning_rate": 5.2998498421136554e-05, "loss": 0.4074, "step": 37640 }, { "epoch": 738.24, "learning_rate": 5.2959758402172184e-05, "loss": 0.4047, "step": 37650 }, { "epoch": 738.43, "learning_rate": 5.29210248184668e-05, "loss": 0.4049, "step": 37660 }, { "epoch": 738.63, "learning_rate": 5.288229768132978e-05, "loss": 0.4089, "step": 37670 }, { "epoch": 738.82, "learning_rate": 5.284357700206855e-05, "loss": 0.4011, "step": 37680 }, { "epoch": 739.0, "eval_loss": 0.4121991693973541, "eval_runtime": 2.3743, "eval_samples_per_second": 959.861, "eval_steps_per_second": 3.791, "step": 37689 }, { "epoch": 739.02, "learning_rate": 5.2804862791988724e-05, "loss": 0.4083, "step": 37690 }, { "epoch": 739.22, "learning_rate": 5.276615506239393e-05, "loss": 0.4038, "step": 37700 }, { "epoch": 739.41, "learning_rate": 5.272745382458602e-05, "loss": 0.4015, "step": 37710 }, { "epoch": 739.61, "learning_rate": 5.2688759089864874e-05, "loss": 0.4029, "step": 37720 }, { "epoch": 739.8, "learning_rate": 5.265007086952845e-05, "loss": 0.4126, "step": 37730 }, { "epoch": 740.0, "learning_rate": 5.2611389174872926e-05, "loss": 0.4082, "step": 37740 }, { "epoch": 740.0, "eval_loss": 0.4157661199569702, "eval_runtime": 2.2442, "eval_samples_per_second": 1015.507, "eval_steps_per_second": 4.01, "step": 37740 }, { "epoch": 740.2, "learning_rate": 5.25727140171924e-05, "loss": 0.4108, "step": 37750 }, { "epoch": 740.39, "learning_rate": 5.253404540777924e-05, "loss": 0.4039, "step": 37760 }, { "epoch": 740.59, "learning_rate": 5.249538335792376e-05, "loss": 0.4067, "step": 37770 }, { "epoch": 740.78, "learning_rate": 5.245672787891444e-05, "loss": 0.4043, "step": 37780 }, { "epoch": 740.98, "learning_rate": 5.241807898203785e-05, "loss": 0.4098, "step": 37790 }, { "epoch": 741.0, "eval_loss": 0.41529256105422974, "eval_runtime": 2.2331, "eval_samples_per_second": 1020.576, "eval_steps_per_second": 4.03, "step": 37791 }, { "epoch": 741.18, "learning_rate": 5.237943667857853e-05, "loss": 0.4056, "step": 37800 }, { "epoch": 741.37, "learning_rate": 5.234080097981923e-05, "loss": 0.407, "step": 37810 }, { "epoch": 741.57, "learning_rate": 5.230217189704068e-05, "loss": 0.4052, "step": 37820 }, { "epoch": 741.76, "learning_rate": 5.226354944152174e-05, "loss": 0.4068, "step": 37830 }, { "epoch": 741.96, "learning_rate": 5.222493362453928e-05, "loss": 0.4082, "step": 37840 }, { "epoch": 742.0, "eval_loss": 0.4107462167739868, "eval_runtime": 2.3772, "eval_samples_per_second": 958.707, "eval_steps_per_second": 3.786, "step": 37842 }, { "epoch": 742.16, "learning_rate": 5.218632445736829e-05, "loss": 0.4024, "step": 37850 }, { "epoch": 742.35, "learning_rate": 5.214772195128175e-05, "loss": 0.4072, "step": 37860 }, { "epoch": 742.55, "learning_rate": 5.2109126117550734e-05, "loss": 0.4071, "step": 37870 }, { "epoch": 742.75, "learning_rate": 5.207053696744439e-05, "loss": 0.4052, "step": 37880 }, { "epoch": 742.94, "learning_rate": 5.203195451222986e-05, "loss": 0.4073, "step": 37890 }, { "epoch": 743.0, "eval_loss": 0.4117482900619507, "eval_runtime": 2.3515, "eval_samples_per_second": 969.164, "eval_steps_per_second": 3.827, "step": 37893 }, { "epoch": 743.14, "learning_rate": 5.1993378763172405e-05, "loss": 0.4065, "step": 37900 }, { "epoch": 743.33, "learning_rate": 5.1954809731535205e-05, "loss": 0.4052, "step": 37910 }, { "epoch": 743.53, "learning_rate": 5.1916247428579655e-05, "loss": 0.4077, "step": 37920 }, { "epoch": 743.73, "learning_rate": 5.187769186556503e-05, "loss": 0.4065, "step": 37930 }, { "epoch": 743.92, "learning_rate": 5.183914305374867e-05, "loss": 0.403, "step": 37940 }, { "epoch": 744.0, "eval_loss": 0.4163016974925995, "eval_runtime": 2.3484, "eval_samples_per_second": 970.467, "eval_steps_per_second": 3.832, "step": 37944 }, { "epoch": 744.12, "learning_rate": 5.180060100438604e-05, "loss": 0.4029, "step": 37950 }, { "epoch": 744.31, "learning_rate": 5.176206572873049e-05, "loss": 0.4093, "step": 37960 }, { "epoch": 744.51, "learning_rate": 5.172353723803352e-05, "loss": 0.4116, "step": 37970 }, { "epoch": 744.71, "learning_rate": 5.1685015543544524e-05, "loss": 0.4061, "step": 37980 }, { "epoch": 744.9, "learning_rate": 5.164650065651104e-05, "loss": 0.4024, "step": 37990 }, { "epoch": 745.0, "eval_loss": 0.4079929292201996, "eval_runtime": 2.2235, "eval_samples_per_second": 1024.945, "eval_steps_per_second": 4.048, "step": 37995 }, { "epoch": 745.1, "learning_rate": 5.160799258817854e-05, "loss": 0.4086, "step": 38000 }, { "epoch": 745.29, "learning_rate": 5.156949134979049e-05, "loss": 0.4048, "step": 38010 }, { "epoch": 745.49, "learning_rate": 5.153099695258843e-05, "loss": 0.4021, "step": 38020 }, { "epoch": 745.69, "learning_rate": 5.149250940781183e-05, "loss": 0.4061, "step": 38030 }, { "epoch": 745.88, "learning_rate": 5.145402872669824e-05, "loss": 0.4098, "step": 38040 }, { "epoch": 746.0, "eval_loss": 0.4082351624965668, "eval_runtime": 2.2082, "eval_samples_per_second": 1032.048, "eval_steps_per_second": 4.076, "step": 38046 }, { "epoch": 746.08, "learning_rate": 5.141555492048311e-05, "loss": 0.4062, "step": 38050 }, { "epoch": 746.27, "learning_rate": 5.137708800039999e-05, "loss": 0.4041, "step": 38060 }, { "epoch": 746.47, "learning_rate": 5.1338627977680316e-05, "loss": 0.408, "step": 38070 }, { "epoch": 746.67, "learning_rate": 5.130017486355356e-05, "loss": 0.4061, "step": 38080 }, { "epoch": 746.86, "learning_rate": 5.1261728669247204e-05, "loss": 0.4072, "step": 38090 }, { "epoch": 747.0, "eval_loss": 0.4111115038394928, "eval_runtime": 2.3737, "eval_samples_per_second": 960.092, "eval_steps_per_second": 3.792, "step": 38097 }, { "epoch": 747.06, "learning_rate": 5.1223289405986644e-05, "loss": 0.4063, "step": 38100 }, { "epoch": 747.25, "learning_rate": 5.118485708499533e-05, "loss": 0.4049, "step": 38110 }, { "epoch": 747.45, "learning_rate": 5.114643171749458e-05, "loss": 0.4083, "step": 38120 }, { "epoch": 747.65, "learning_rate": 5.1108013314703824e-05, "loss": 0.4074, "step": 38130 }, { "epoch": 747.84, "learning_rate": 5.106960188784033e-05, "loss": 0.4065, "step": 38140 }, { "epoch": 748.0, "eval_loss": 0.4118553102016449, "eval_runtime": 2.1762, "eval_samples_per_second": 1047.261, "eval_steps_per_second": 4.136, "step": 38148 }, { "epoch": 748.04, "learning_rate": 5.103119744811936e-05, "loss": 0.4044, "step": 38150 }, { "epoch": 748.24, "learning_rate": 5.099280000675421e-05, "loss": 0.4071, "step": 38160 }, { "epoch": 748.43, "learning_rate": 5.095440957495602e-05, "loss": 0.4055, "step": 38170 }, { "epoch": 748.63, "learning_rate": 5.0916026163933973e-05, "loss": 0.4047, "step": 38180 }, { "epoch": 748.82, "learning_rate": 5.0877649784895176e-05, "loss": 0.404, "step": 38190 }, { "epoch": 749.0, "eval_loss": 0.40867891907691956, "eval_runtime": 2.2236, "eval_samples_per_second": 1024.932, "eval_steps_per_second": 4.048, "step": 38199 }, { "epoch": 749.02, "learning_rate": 5.083928044904464e-05, "loss": 0.4102, "step": 38200 }, { "epoch": 749.22, "learning_rate": 5.08009181675854e-05, "loss": 0.4052, "step": 38210 }, { "epoch": 749.41, "learning_rate": 5.0762562951718336e-05, "loss": 0.4052, "step": 38220 }, { "epoch": 749.61, "learning_rate": 5.0724214812642355e-05, "loss": 0.4073, "step": 38230 }, { "epoch": 749.8, "learning_rate": 5.068587376155423e-05, "loss": 0.4002, "step": 38240 }, { "epoch": 750.0, "learning_rate": 5.064753980964874e-05, "loss": 0.4024, "step": 38250 }, { "epoch": 750.0, "eval_loss": 0.40933549404144287, "eval_runtime": 2.3322, "eval_samples_per_second": 977.173, "eval_steps_per_second": 3.859, "step": 38250 }, { "epoch": 750.2, "learning_rate": 5.060921296811852e-05, "loss": 0.4045, "step": 38260 }, { "epoch": 750.39, "learning_rate": 5.0570893248154106e-05, "loss": 0.4013, "step": 38270 }, { "epoch": 750.59, "learning_rate": 5.053258066094407e-05, "loss": 0.4054, "step": 38280 }, { "epoch": 750.78, "learning_rate": 5.0494275217674776e-05, "loss": 0.4054, "step": 38290 }, { "epoch": 750.98, "learning_rate": 5.045597692953061e-05, "loss": 0.4054, "step": 38300 }, { "epoch": 751.0, "eval_loss": 0.41108617186546326, "eval_runtime": 2.3915, "eval_samples_per_second": 952.947, "eval_steps_per_second": 3.763, "step": 38301 }, { "epoch": 751.18, "learning_rate": 5.0417685807693785e-05, "loss": 0.4077, "step": 38310 }, { "epoch": 751.37, "learning_rate": 5.037940186334449e-05, "loss": 0.4014, "step": 38320 }, { "epoch": 751.57, "learning_rate": 5.034112510766074e-05, "loss": 0.4087, "step": 38330 }, { "epoch": 751.76, "learning_rate": 5.0302855551818505e-05, "loss": 0.4092, "step": 38340 }, { "epoch": 751.96, "learning_rate": 5.026459320699166e-05, "loss": 0.403, "step": 38350 }, { "epoch": 752.0, "eval_loss": 0.40934062004089355, "eval_runtime": 2.194, "eval_samples_per_second": 1038.731, "eval_steps_per_second": 4.102, "step": 38352 }, { "epoch": 752.16, "learning_rate": 5.022633808435193e-05, "loss": 0.403, "step": 38360 }, { "epoch": 752.35, "learning_rate": 5.0188090195069e-05, "loss": 0.4061, "step": 38370 }, { "epoch": 752.55, "learning_rate": 5.0149849550310346e-05, "loss": 0.4043, "step": 38380 }, { "epoch": 752.75, "learning_rate": 5.0111616161241436e-05, "loss": 0.4077, "step": 38390 }, { "epoch": 752.94, "learning_rate": 5.007339003902553e-05, "loss": 0.4042, "step": 38400 }, { "epoch": 753.0, "eval_loss": 0.4117041826248169, "eval_runtime": 2.3245, "eval_samples_per_second": 980.409, "eval_steps_per_second": 3.872, "step": 38403 }, { "epoch": 753.14, "learning_rate": 5.0035171194823804e-05, "loss": 0.4031, "step": 38410 }, { "epoch": 753.33, "learning_rate": 4.9996959639795356e-05, "loss": 0.4069, "step": 38420 }, { "epoch": 753.53, "learning_rate": 4.9958755385097035e-05, "loss": 0.4078, "step": 38430 }, { "epoch": 753.73, "learning_rate": 4.992055844188368e-05, "loss": 0.4035, "step": 38440 }, { "epoch": 753.92, "learning_rate": 4.988236882130792e-05, "loss": 0.4025, "step": 38450 }, { "epoch": 754.0, "eval_loss": 0.40878942608833313, "eval_runtime": 2.2485, "eval_samples_per_second": 1013.564, "eval_steps_per_second": 4.003, "step": 38454 }, { "epoch": 754.12, "learning_rate": 4.9844186534520305e-05, "loss": 0.4056, "step": 38460 }, { "epoch": 754.31, "learning_rate": 4.9806011592669205e-05, "loss": 0.4034, "step": 38470 }, { "epoch": 754.51, "learning_rate": 4.9767844006900806e-05, "loss": 0.4106, "step": 38480 }, { "epoch": 754.71, "learning_rate": 4.9729683788359245e-05, "loss": 0.4055, "step": 38490 }, { "epoch": 754.9, "learning_rate": 4.969153094818643e-05, "loss": 0.4025, "step": 38500 }, { "epoch": 755.0, "eval_loss": 0.41021928191185, "eval_runtime": 2.3051, "eval_samples_per_second": 988.664, "eval_steps_per_second": 3.904, "step": 38505 }, { "epoch": 755.1, "learning_rate": 4.965338549752215e-05, "loss": 0.4049, "step": 38510 }, { "epoch": 755.29, "learning_rate": 4.9615247447504e-05, "loss": 0.4027, "step": 38520 }, { "epoch": 755.49, "learning_rate": 4.9577116809267496e-05, "loss": 0.4066, "step": 38530 }, { "epoch": 755.69, "learning_rate": 4.9538993593945874e-05, "loss": 0.4049, "step": 38540 }, { "epoch": 755.88, "learning_rate": 4.9500877812670294e-05, "loss": 0.4056, "step": 38550 }, { "epoch": 756.0, "eval_loss": 0.41347235441207886, "eval_runtime": 2.3625, "eval_samples_per_second": 964.654, "eval_steps_per_second": 3.81, "step": 38556 }, { "epoch": 756.08, "learning_rate": 4.94627694765697e-05, "loss": 0.4062, "step": 38560 }, { "epoch": 756.27, "learning_rate": 4.942466859677087e-05, "loss": 0.4062, "step": 38570 }, { "epoch": 756.47, "learning_rate": 4.938657518439847e-05, "loss": 0.4028, "step": 38580 }, { "epoch": 756.67, "learning_rate": 4.934848925057484e-05, "loss": 0.4066, "step": 38590 }, { "epoch": 756.86, "learning_rate": 4.931041080642028e-05, "loss": 0.4025, "step": 38600 }, { "epoch": 757.0, "eval_loss": 0.4124828279018402, "eval_runtime": 2.3537, "eval_samples_per_second": 968.283, "eval_steps_per_second": 3.824, "step": 38607 }, { "epoch": 757.06, "learning_rate": 4.927233986305284e-05, "loss": 0.4103, "step": 38610 }, { "epoch": 757.25, "learning_rate": 4.923427643158835e-05, "loss": 0.4059, "step": 38620 }, { "epoch": 757.45, "learning_rate": 4.9196220523140555e-05, "loss": 0.4034, "step": 38630 }, { "epoch": 757.65, "learning_rate": 4.9158172148820846e-05, "loss": 0.4035, "step": 38640 }, { "epoch": 757.84, "learning_rate": 4.9120131319738555e-05, "loss": 0.4035, "step": 38650 }, { "epoch": 758.0, "eval_loss": 0.410969078540802, "eval_runtime": 2.213, "eval_samples_per_second": 1029.805, "eval_steps_per_second": 4.067, "step": 38658 }, { "epoch": 758.04, "learning_rate": 4.908209804700074e-05, "loss": 0.4063, "step": 38660 }, { "epoch": 758.24, "learning_rate": 4.904407234171227e-05, "loss": 0.4016, "step": 38670 }, { "epoch": 758.43, "learning_rate": 4.900605421497583e-05, "loss": 0.4021, "step": 38680 }, { "epoch": 758.63, "learning_rate": 4.896804367789179e-05, "loss": 0.3997, "step": 38690 }, { "epoch": 758.82, "learning_rate": 4.893004074155846e-05, "loss": 0.4026, "step": 38700 }, { "epoch": 759.0, "eval_loss": 0.41265445947647095, "eval_runtime": 2.3125, "eval_samples_per_second": 985.522, "eval_steps_per_second": 3.892, "step": 38709 }, { "epoch": 759.02, "learning_rate": 4.889204541707179e-05, "loss": 0.4016, "step": 38710 }, { "epoch": 759.22, "learning_rate": 4.885405771552561e-05, "loss": 0.3999, "step": 38720 }, { "epoch": 759.41, "learning_rate": 4.881607764801146e-05, "loss": 0.4046, "step": 38730 }, { "epoch": 759.61, "learning_rate": 4.8778105225618705e-05, "loss": 0.4063, "step": 38740 }, { "epoch": 759.8, "learning_rate": 4.8740140459434405e-05, "loss": 0.4072, "step": 38750 }, { "epoch": 760.0, "learning_rate": 4.8702183360543426e-05, "loss": 0.4028, "step": 38760 }, { "epoch": 760.0, "eval_loss": 0.4106810688972473, "eval_runtime": 2.3297, "eval_samples_per_second": 978.218, "eval_steps_per_second": 3.863, "step": 38760 }, { "epoch": 760.2, "learning_rate": 4.866423394002841e-05, "loss": 0.402, "step": 38770 }, { "epoch": 760.39, "learning_rate": 4.8626292208969734e-05, "loss": 0.4087, "step": 38780 }, { "epoch": 760.59, "learning_rate": 4.858835817844557e-05, "loss": 0.4029, "step": 38790 }, { "epoch": 760.78, "learning_rate": 4.855043185953175e-05, "loss": 0.4045, "step": 38800 }, { "epoch": 760.98, "learning_rate": 4.851251326330196e-05, "loss": 0.4007, "step": 38810 }, { "epoch": 761.0, "eval_loss": 0.4079250693321228, "eval_runtime": 2.2583, "eval_samples_per_second": 1009.151, "eval_steps_per_second": 3.985, "step": 38811 }, { "epoch": 761.18, "learning_rate": 4.8474602400827575e-05, "loss": 0.4066, "step": 38820 }, { "epoch": 761.37, "learning_rate": 4.843669928317769e-05, "loss": 0.4041, "step": 38830 }, { "epoch": 761.57, "learning_rate": 4.8398803921419235e-05, "loss": 0.4006, "step": 38840 }, { "epoch": 761.76, "learning_rate": 4.8360916326616735e-05, "loss": 0.4042, "step": 38850 }, { "epoch": 761.96, "learning_rate": 4.832303650983258e-05, "loss": 0.4043, "step": 38860 }, { "epoch": 762.0, "eval_loss": 0.4105600118637085, "eval_runtime": 2.2483, "eval_samples_per_second": 1013.643, "eval_steps_per_second": 4.003, "step": 38862 }, { "epoch": 762.16, "learning_rate": 4.82851644821268e-05, "loss": 0.407, "step": 38870 }, { "epoch": 762.35, "learning_rate": 4.824730025455719e-05, "loss": 0.4043, "step": 38880 }, { "epoch": 762.55, "learning_rate": 4.820944383817928e-05, "loss": 0.3991, "step": 38890 }, { "epoch": 762.75, "learning_rate": 4.8171595244046256e-05, "loss": 0.4011, "step": 38900 }, { "epoch": 762.94, "learning_rate": 4.8133754483209105e-05, "loss": 0.3979, "step": 38910 }, { "epoch": 763.0, "eval_loss": 0.40839433670043945, "eval_runtime": 2.2395, "eval_samples_per_second": 1017.652, "eval_steps_per_second": 4.019, "step": 38913 }, { "epoch": 763.14, "learning_rate": 4.809592156671645e-05, "loss": 0.4054, "step": 38920 }, { "epoch": 763.33, "learning_rate": 4.8058096505614704e-05, "loss": 0.4027, "step": 38930 }, { "epoch": 763.53, "learning_rate": 4.8020279310947924e-05, "loss": 0.401, "step": 38940 }, { "epoch": 763.73, "learning_rate": 4.798246999375785e-05, "loss": 0.4027, "step": 38950 }, { "epoch": 763.92, "learning_rate": 4.7944668565084e-05, "loss": 0.4071, "step": 38960 }, { "epoch": 764.0, "eval_loss": 0.4093049466609955, "eval_runtime": 2.2294, "eval_samples_per_second": 1022.268, "eval_steps_per_second": 4.037, "step": 38964 }, { "epoch": 764.12, "learning_rate": 4.790687503596353e-05, "loss": 0.4103, "step": 38970 }, { "epoch": 764.31, "learning_rate": 4.786908941743132e-05, "loss": 0.4023, "step": 38980 }, { "epoch": 764.51, "learning_rate": 4.783131172051991e-05, "loss": 0.4053, "step": 38990 }, { "epoch": 764.71, "learning_rate": 4.779354195625958e-05, "loss": 0.403, "step": 39000 }, { "epoch": 764.9, "learning_rate": 4.775578013567824e-05, "loss": 0.4097, "step": 39010 }, { "epoch": 765.0, "eval_loss": 0.41303664445877075, "eval_runtime": 2.3567, "eval_samples_per_second": 967.04, "eval_steps_per_second": 3.819, "step": 39015 }, { "epoch": 765.1, "learning_rate": 4.7718026269801465e-05, "loss": 0.4063, "step": 39020 }, { "epoch": 765.29, "learning_rate": 4.7680280369652595e-05, "loss": 0.4052, "step": 39030 }, { "epoch": 765.49, "learning_rate": 4.7642542446252544e-05, "loss": 0.4057, "step": 39040 }, { "epoch": 765.69, "learning_rate": 4.760481251062001e-05, "loss": 0.4035, "step": 39050 }, { "epoch": 765.88, "learning_rate": 4.756709057377121e-05, "loss": 0.4052, "step": 39060 }, { "epoch": 766.0, "eval_loss": 0.4117512106895447, "eval_runtime": 2.2303, "eval_samples_per_second": 1021.815, "eval_steps_per_second": 4.035, "step": 39066 }, { "epoch": 766.08, "learning_rate": 4.7529376646720166e-05, "loss": 0.4075, "step": 39070 }, { "epoch": 766.27, "learning_rate": 4.7491670740478496e-05, "loss": 0.4024, "step": 39080 }, { "epoch": 766.47, "learning_rate": 4.745397286605545e-05, "loss": 0.4008, "step": 39090 }, { "epoch": 766.67, "learning_rate": 4.741628303445802e-05, "loss": 0.3995, "step": 39100 }, { "epoch": 766.86, "learning_rate": 4.737860125669074e-05, "loss": 0.4063, "step": 39110 }, { "epoch": 767.0, "eval_loss": 0.4054567217826843, "eval_runtime": 2.1606, "eval_samples_per_second": 1054.78, "eval_steps_per_second": 4.165, "step": 39117 }, { "epoch": 767.06, "learning_rate": 4.73409275437559e-05, "loss": 0.4031, "step": 39120 }, { "epoch": 767.25, "learning_rate": 4.730326190665333e-05, "loss": 0.4016, "step": 39130 }, { "epoch": 767.45, "learning_rate": 4.726560435638061e-05, "loss": 0.4006, "step": 39140 }, { "epoch": 767.65, "learning_rate": 4.72279549039329e-05, "loss": 0.4071, "step": 39150 }, { "epoch": 767.84, "learning_rate": 4.719031356030294e-05, "loss": 0.4051, "step": 39160 }, { "epoch": 768.0, "eval_loss": 0.4055671691894531, "eval_runtime": 2.2198, "eval_samples_per_second": 1026.654, "eval_steps_per_second": 4.054, "step": 39168 }, { "epoch": 768.04, "learning_rate": 4.715268033648123e-05, "loss": 0.4019, "step": 39170 }, { "epoch": 768.24, "learning_rate": 4.711505524345578e-05, "loss": 0.4041, "step": 39180 }, { "epoch": 768.43, "learning_rate": 4.707743829221233e-05, "loss": 0.4001, "step": 39190 }, { "epoch": 768.63, "learning_rate": 4.703982949373414e-05, "loss": 0.4049, "step": 39200 }, { "epoch": 768.82, "learning_rate": 4.700222885900221e-05, "loss": 0.403, "step": 39210 }, { "epoch": 769.0, "eval_loss": 0.4054199457168579, "eval_runtime": 2.2681, "eval_samples_per_second": 1004.796, "eval_steps_per_second": 3.968, "step": 39219 }, { "epoch": 769.02, "learning_rate": 4.696463639899501e-05, "loss": 0.3987, "step": 39220 }, { "epoch": 769.22, "learning_rate": 4.692705212468873e-05, "loss": 0.3996, "step": 39230 }, { "epoch": 769.41, "learning_rate": 4.688947604705715e-05, "loss": 0.4056, "step": 39240 }, { "epoch": 769.61, "learning_rate": 4.685190817707163e-05, "loss": 0.4049, "step": 39250 }, { "epoch": 769.8, "learning_rate": 4.681434852570118e-05, "loss": 0.402, "step": 39260 }, { "epoch": 770.0, "learning_rate": 4.6776797103912336e-05, "loss": 0.4061, "step": 39270 }, { "epoch": 770.0, "eval_loss": 0.41016271710395813, "eval_runtime": 2.241, "eval_samples_per_second": 1016.966, "eval_steps_per_second": 4.016, "step": 39270 }, { "epoch": 770.2, "learning_rate": 4.673925392266931e-05, "loss": 0.403, "step": 39280 }, { "epoch": 770.39, "learning_rate": 4.670171899293387e-05, "loss": 0.4001, "step": 39290 }, { "epoch": 770.59, "learning_rate": 4.6664192325665355e-05, "loss": 0.3978, "step": 39300 }, { "epoch": 770.78, "learning_rate": 4.6626673931820754e-05, "loss": 0.4044, "step": 39310 }, { "epoch": 770.98, "learning_rate": 4.658916382235455e-05, "loss": 0.3989, "step": 39320 }, { "epoch": 771.0, "eval_loss": 0.4141434133052826, "eval_runtime": 2.3668, "eval_samples_per_second": 962.921, "eval_steps_per_second": 3.803, "step": 39321 }, { "epoch": 771.18, "learning_rate": 4.655166200821891e-05, "loss": 0.4017, "step": 39330 }, { "epoch": 771.37, "learning_rate": 4.651416850036347e-05, "loss": 0.4017, "step": 39340 }, { "epoch": 771.57, "learning_rate": 4.6476683309735577e-05, "loss": 0.4026, "step": 39350 }, { "epoch": 771.76, "learning_rate": 4.6439206447280014e-05, "loss": 0.3998, "step": 39360 }, { "epoch": 771.96, "learning_rate": 4.640173792393918e-05, "loss": 0.4022, "step": 39370 }, { "epoch": 772.0, "eval_loss": 0.4049689769744873, "eval_runtime": 2.3487, "eval_samples_per_second": 970.308, "eval_steps_per_second": 3.832, "step": 39372 }, { "epoch": 772.16, "learning_rate": 4.636427775065309e-05, "loss": 0.4021, "step": 39380 }, { "epoch": 772.35, "learning_rate": 4.632682593835923e-05, "loss": 0.3976, "step": 39390 }, { "epoch": 772.55, "learning_rate": 4.628938249799275e-05, "loss": 0.4025, "step": 39400 }, { "epoch": 772.75, "learning_rate": 4.6251947440486256e-05, "loss": 0.4031, "step": 39410 }, { "epoch": 772.94, "learning_rate": 4.621452077676999e-05, "loss": 0.4018, "step": 39420 }, { "epoch": 773.0, "eval_loss": 0.4097810685634613, "eval_runtime": 2.2555, "eval_samples_per_second": 1010.415, "eval_steps_per_second": 3.99, "step": 39423 }, { "epoch": 773.14, "learning_rate": 4.6177102517771665e-05, "loss": 0.4007, "step": 39430 }, { "epoch": 773.33, "learning_rate": 4.613969267441658e-05, "loss": 0.4034, "step": 39440 }, { "epoch": 773.53, "learning_rate": 4.6102291257627594e-05, "loss": 0.4008, "step": 39450 }, { "epoch": 773.73, "learning_rate": 4.606489827832507e-05, "loss": 0.3996, "step": 39460 }, { "epoch": 773.92, "learning_rate": 4.602751374742697e-05, "loss": 0.3993, "step": 39470 }, { "epoch": 774.0, "eval_loss": 0.40897953510284424, "eval_runtime": 2.2539, "eval_samples_per_second": 1011.143, "eval_steps_per_second": 3.993, "step": 39474 }, { "epoch": 774.12, "learning_rate": 4.5990137675848666e-05, "loss": 0.3995, "step": 39480 }, { "epoch": 774.31, "learning_rate": 4.595277007450319e-05, "loss": 0.4037, "step": 39490 }, { "epoch": 774.51, "learning_rate": 4.591541095430105e-05, "loss": 0.4052, "step": 39500 }, { "epoch": 774.71, "learning_rate": 4.5878060326150234e-05, "loss": 0.3992, "step": 39510 }, { "epoch": 774.9, "learning_rate": 4.584071820095636e-05, "loss": 0.3984, "step": 39520 }, { "epoch": 775.0, "eval_loss": 0.40743353962898254, "eval_runtime": 2.3001, "eval_samples_per_second": 990.816, "eval_steps_per_second": 3.913, "step": 39525 }, { "epoch": 775.1, "learning_rate": 4.580338458962242e-05, "loss": 0.4075, "step": 39530 }, { "epoch": 775.29, "learning_rate": 4.576605950304905e-05, "loss": 0.401, "step": 39540 }, { "epoch": 775.49, "learning_rate": 4.572874295213431e-05, "loss": 0.4026, "step": 39550 }, { "epoch": 775.69, "learning_rate": 4.569143494777383e-05, "loss": 0.4046, "step": 39560 }, { "epoch": 775.88, "learning_rate": 4.5654135500860715e-05, "loss": 0.4034, "step": 39570 }, { "epoch": 776.0, "eval_loss": 0.40677332878112793, "eval_runtime": 2.3793, "eval_samples_per_second": 957.851, "eval_steps_per_second": 3.783, "step": 39576 }, { "epoch": 776.08, "learning_rate": 4.561684462228553e-05, "loss": 0.4034, "step": 39580 }, { "epoch": 776.27, "learning_rate": 4.5579562322936416e-05, "loss": 0.3984, "step": 39590 }, { "epoch": 776.47, "learning_rate": 4.554228861369895e-05, "loss": 0.3992, "step": 39600 }, { "epoch": 776.67, "learning_rate": 4.550502350545626e-05, "loss": 0.4, "step": 39610 }, { "epoch": 776.86, "learning_rate": 4.546776700908892e-05, "loss": 0.4036, "step": 39620 }, { "epoch": 777.0, "eval_loss": 0.4042729437351227, "eval_runtime": 2.3533, "eval_samples_per_second": 968.434, "eval_steps_per_second": 3.824, "step": 39627 }, { "epoch": 777.06, "learning_rate": 4.543051913547495e-05, "loss": 0.4006, "step": 39630 }, { "epoch": 777.25, "learning_rate": 4.5393279895489934e-05, "loss": 0.403, "step": 39640 }, { "epoch": 777.45, "learning_rate": 4.535604930000689e-05, "loss": 0.4016, "step": 39650 }, { "epoch": 777.65, "learning_rate": 4.531882735989633e-05, "loss": 0.398, "step": 39660 }, { "epoch": 777.84, "learning_rate": 4.5281614086026227e-05, "loss": 0.4027, "step": 39670 }, { "epoch": 778.0, "eval_loss": 0.40563011169433594, "eval_runtime": 2.2438, "eval_samples_per_second": 1015.69, "eval_steps_per_second": 4.011, "step": 39678 }, { "epoch": 778.04, "learning_rate": 4.5244409489262054e-05, "loss": 0.3998, "step": 39680 }, { "epoch": 778.24, "learning_rate": 4.520721358046667e-05, "loss": 0.3989, "step": 39690 }, { "epoch": 778.43, "learning_rate": 4.5170026370500465e-05, "loss": 0.3968, "step": 39700 }, { "epoch": 778.63, "learning_rate": 4.51328478702213e-05, "loss": 0.4031, "step": 39710 }, { "epoch": 778.82, "learning_rate": 4.509567809048445e-05, "loss": 0.3999, "step": 39720 }, { "epoch": 779.0, "eval_loss": 0.410388708114624, "eval_runtime": 2.3154, "eval_samples_per_second": 984.289, "eval_steps_per_second": 3.887, "step": 39729 }, { "epoch": 779.02, "learning_rate": 4.505851704214269e-05, "loss": 0.4029, "step": 39730 }, { "epoch": 779.22, "learning_rate": 4.502136473604616e-05, "loss": 0.3985, "step": 39740 }, { "epoch": 779.41, "learning_rate": 4.4984221183042566e-05, "loss": 0.3996, "step": 39750 }, { "epoch": 779.61, "learning_rate": 4.494708639397696e-05, "loss": 0.4005, "step": 39760 }, { "epoch": 779.8, "learning_rate": 4.490996037969187e-05, "loss": 0.4032, "step": 39770 }, { "epoch": 780.0, "learning_rate": 4.48728431510273e-05, "loss": 0.401, "step": 39780 }, { "epoch": 780.0, "eval_loss": 0.4033023416996002, "eval_runtime": 2.1991, "eval_samples_per_second": 1036.345, "eval_steps_per_second": 4.093, "step": 39780 }, { "epoch": 780.2, "learning_rate": 4.483573471882061e-05, "loss": 0.3999, "step": 39790 }, { "epoch": 780.39, "learning_rate": 4.479863509390666e-05, "loss": 0.3978, "step": 39800 }, { "epoch": 780.59, "learning_rate": 4.4761544287117696e-05, "loss": 0.3999, "step": 39810 }, { "epoch": 780.78, "learning_rate": 4.472446230928343e-05, "loss": 0.4011, "step": 39820 }, { "epoch": 780.98, "learning_rate": 4.4687389171230975e-05, "loss": 0.4058, "step": 39830 }, { "epoch": 781.0, "eval_loss": 0.40577030181884766, "eval_runtime": 2.3092, "eval_samples_per_second": 986.928, "eval_steps_per_second": 3.897, "step": 39831 }, { "epoch": 781.18, "learning_rate": 4.465032488378481e-05, "loss": 0.3979, "step": 39840 }, { "epoch": 781.37, "learning_rate": 4.461326945776694e-05, "loss": 0.4013, "step": 39850 }, { "epoch": 781.57, "learning_rate": 4.457622290399668e-05, "loss": 0.4018, "step": 39860 }, { "epoch": 781.76, "learning_rate": 4.453918523329084e-05, "loss": 0.4026, "step": 39870 }, { "epoch": 781.96, "learning_rate": 4.4502156456463536e-05, "loss": 0.3977, "step": 39880 }, { "epoch": 782.0, "eval_loss": 0.40937620401382446, "eval_runtime": 2.3002, "eval_samples_per_second": 990.777, "eval_steps_per_second": 3.913, "step": 39882 }, { "epoch": 782.16, "learning_rate": 4.446513658432642e-05, "loss": 0.4016, "step": 39890 }, { "epoch": 782.35, "learning_rate": 4.44281256276884e-05, "loss": 0.3995, "step": 39900 }, { "epoch": 782.55, "learning_rate": 4.439112359735588e-05, "loss": 0.4021, "step": 39910 }, { "epoch": 782.75, "learning_rate": 4.4354130504132636e-05, "loss": 0.399, "step": 39920 }, { "epoch": 782.94, "learning_rate": 4.4317146358819794e-05, "loss": 0.402, "step": 39930 }, { "epoch": 783.0, "eval_loss": 0.4056869447231293, "eval_runtime": 2.1949, "eval_samples_per_second": 1038.316, "eval_steps_per_second": 4.1, "step": 39933 }, { "epoch": 783.14, "learning_rate": 4.428017117221596e-05, "loss": 0.3983, "step": 39940 }, { "epoch": 783.33, "learning_rate": 4.4243204955116995e-05, "loss": 0.3968, "step": 39950 }, { "epoch": 783.53, "learning_rate": 4.420624771831625e-05, "loss": 0.3991, "step": 39960 }, { "epoch": 783.73, "learning_rate": 4.41692994726044e-05, "loss": 0.3984, "step": 39970 }, { "epoch": 783.92, "learning_rate": 4.4132360228769506e-05, "loss": 0.3972, "step": 39980 }, { "epoch": 784.0, "eval_loss": 0.4044448733329773, "eval_runtime": 2.3046, "eval_samples_per_second": 988.877, "eval_steps_per_second": 3.905, "step": 39984 }, { "epoch": 784.12, "learning_rate": 4.409542999759703e-05, "loss": 0.404, "step": 39990 }, { "epoch": 784.31, "learning_rate": 4.4058508789869736e-05, "loss": 0.4004, "step": 40000 }, { "epoch": 784.51, "learning_rate": 4.4021596616367825e-05, "loss": 0.3969, "step": 40010 }, { "epoch": 784.71, "learning_rate": 4.3984693487868806e-05, "loss": 0.4001, "step": 40020 }, { "epoch": 784.9, "learning_rate": 4.394779941514759e-05, "loss": 0.3997, "step": 40030 }, { "epoch": 785.0, "eval_loss": 0.40749338269233704, "eval_runtime": 2.2723, "eval_samples_per_second": 1002.965, "eval_steps_per_second": 3.961, "step": 40035 }, { "epoch": 785.1, "learning_rate": 4.3910914408976426e-05, "loss": 0.3994, "step": 40040 }, { "epoch": 785.29, "learning_rate": 4.3874038480124876e-05, "loss": 0.4003, "step": 40050 }, { "epoch": 785.49, "learning_rate": 4.383717163935992e-05, "loss": 0.3993, "step": 40060 }, { "epoch": 785.69, "learning_rate": 4.380031389744584e-05, "loss": 0.4, "step": 40070 }, { "epoch": 785.88, "learning_rate": 4.376346526514429e-05, "loss": 0.4003, "step": 40080 }, { "epoch": 786.0, "eval_loss": 0.4073701798915863, "eval_runtime": 2.1998, "eval_samples_per_second": 1036.005, "eval_steps_per_second": 4.091, "step": 40086 }, { "epoch": 786.08, "learning_rate": 4.372662575321423e-05, "loss": 0.3957, "step": 40090 }, { "epoch": 786.27, "learning_rate": 4.368979537241202e-05, "loss": 0.3999, "step": 40100 }, { "epoch": 786.47, "learning_rate": 4.365297413349127e-05, "loss": 0.3975, "step": 40110 }, { "epoch": 786.67, "learning_rate": 4.3616162047202904e-05, "loss": 0.3998, "step": 40120 }, { "epoch": 786.86, "learning_rate": 4.3579359124295356e-05, "loss": 0.3973, "step": 40130 }, { "epoch": 787.0, "eval_loss": 0.4044763445854187, "eval_runtime": 2.3538, "eval_samples_per_second": 968.204, "eval_steps_per_second": 3.824, "step": 40137 }, { "epoch": 787.06, "learning_rate": 4.3542565375514164e-05, "loss": 0.3938, "step": 40140 }, { "epoch": 787.25, "learning_rate": 4.350578081160235e-05, "loss": 0.3976, "step": 40150 }, { "epoch": 787.45, "learning_rate": 4.346900544330011e-05, "loss": 0.3953, "step": 40160 }, { "epoch": 787.65, "learning_rate": 4.343223928134511e-05, "loss": 0.4008, "step": 40170 }, { "epoch": 787.84, "learning_rate": 4.3395482336472175e-05, "loss": 0.3989, "step": 40180 }, { "epoch": 788.0, "eval_loss": 0.4078381061553955, "eval_runtime": 2.2214, "eval_samples_per_second": 1025.951, "eval_steps_per_second": 4.052, "step": 40188 }, { "epoch": 788.04, "learning_rate": 4.335873461941355e-05, "loss": 0.3958, "step": 40190 }, { "epoch": 788.24, "learning_rate": 4.332199614089878e-05, "loss": 0.4011, "step": 40200 }, { "epoch": 788.43, "learning_rate": 4.328526691165462e-05, "loss": 0.398, "step": 40210 }, { "epoch": 788.63, "learning_rate": 4.3248546942405235e-05, "loss": 0.3984, "step": 40220 }, { "epoch": 788.82, "learning_rate": 4.321183624387196e-05, "loss": 0.4029, "step": 40230 }, { "epoch": 789.0, "eval_loss": 0.40923169255256653, "eval_runtime": 2.2528, "eval_samples_per_second": 1011.619, "eval_steps_per_second": 3.995, "step": 40239 }, { "epoch": 789.02, "learning_rate": 4.3175134826773626e-05, "loss": 0.3972, "step": 40240 }, { "epoch": 789.22, "learning_rate": 4.313844270182615e-05, "loss": 0.3998, "step": 40250 }, { "epoch": 789.41, "learning_rate": 4.31017598797428e-05, "loss": 0.4007, "step": 40260 }, { "epoch": 789.61, "learning_rate": 4.306508637123419e-05, "loss": 0.3986, "step": 40270 }, { "epoch": 789.8, "learning_rate": 4.302842218700808e-05, "loss": 0.396, "step": 40280 }, { "epoch": 790.0, "learning_rate": 4.299176733776972e-05, "loss": 0.4011, "step": 40290 }, { "epoch": 790.0, "eval_loss": 0.4051341116428375, "eval_runtime": 2.1983, "eval_samples_per_second": 1036.73, "eval_steps_per_second": 4.094, "step": 40290 }, { "epoch": 790.2, "learning_rate": 4.295512183422145e-05, "loss": 0.3981, "step": 40300 }, { "epoch": 790.39, "learning_rate": 4.291848568706289e-05, "loss": 0.401, "step": 40310 }, { "epoch": 790.59, "learning_rate": 4.288185890699107e-05, "loss": 0.4029, "step": 40320 }, { "epoch": 790.78, "learning_rate": 4.284524150470007e-05, "loss": 0.3987, "step": 40330 }, { "epoch": 790.98, "learning_rate": 4.28086334908815e-05, "loss": 0.3975, "step": 40340 }, { "epoch": 791.0, "eval_loss": 0.4008138179779053, "eval_runtime": 2.3676, "eval_samples_per_second": 962.596, "eval_steps_per_second": 3.801, "step": 40341 }, { "epoch": 791.18, "learning_rate": 4.277203487622397e-05, "loss": 0.4027, "step": 40350 }, { "epoch": 791.37, "learning_rate": 4.273544567141354e-05, "loss": 0.3978, "step": 40360 }, { "epoch": 791.57, "learning_rate": 4.2698865887133414e-05, "loss": 0.3928, "step": 40370 }, { "epoch": 791.76, "learning_rate": 4.266229553406403e-05, "loss": 0.3963, "step": 40380 }, { "epoch": 791.96, "learning_rate": 4.262573462288314e-05, "loss": 0.3952, "step": 40390 }, { "epoch": 792.0, "eval_loss": 0.4049001634120941, "eval_runtime": 2.1953, "eval_samples_per_second": 1038.115, "eval_steps_per_second": 4.1, "step": 40392 }, { "epoch": 792.16, "learning_rate": 4.258918316426573e-05, "loss": 0.3965, "step": 40400 }, { "epoch": 792.35, "learning_rate": 4.255264116888404e-05, "loss": 0.3969, "step": 40410 }, { "epoch": 792.55, "learning_rate": 4.251610864740744e-05, "loss": 0.3987, "step": 40420 }, { "epoch": 792.75, "learning_rate": 4.247958561050269e-05, "loss": 0.3956, "step": 40430 }, { "epoch": 792.94, "learning_rate": 4.244307206883364e-05, "loss": 0.4032, "step": 40440 }, { "epoch": 793.0, "eval_loss": 0.4053691029548645, "eval_runtime": 2.2536, "eval_samples_per_second": 1011.28, "eval_steps_per_second": 3.994, "step": 40443 }, { "epoch": 793.14, "learning_rate": 4.240656803306145e-05, "loss": 0.3987, "step": 40450 }, { "epoch": 793.33, "learning_rate": 4.2370073513844523e-05, "loss": 0.3955, "step": 40460 }, { "epoch": 793.53, "learning_rate": 4.233358852183838e-05, "loss": 0.3925, "step": 40470 }, { "epoch": 793.73, "learning_rate": 4.2297113067695884e-05, "loss": 0.3954, "step": 40480 }, { "epoch": 793.92, "learning_rate": 4.2260647162066976e-05, "loss": 0.4027, "step": 40490 }, { "epoch": 794.0, "eval_loss": 0.40342316031455994, "eval_runtime": 2.3542, "eval_samples_per_second": 968.056, "eval_steps_per_second": 3.823, "step": 40494 }, { "epoch": 794.12, "learning_rate": 4.222419081559899e-05, "loss": 0.4001, "step": 40500 }, { "epoch": 794.31, "learning_rate": 4.218774403893632e-05, "loss": 0.3975, "step": 40510 }, { "epoch": 794.51, "learning_rate": 4.2151306842720574e-05, "loss": 0.3954, "step": 40520 }, { "epoch": 794.71, "learning_rate": 4.211487923759066e-05, "loss": 0.3969, "step": 40530 }, { "epoch": 794.9, "learning_rate": 4.207846123418254e-05, "loss": 0.397, "step": 40540 }, { "epoch": 795.0, "eval_loss": 0.40420642495155334, "eval_runtime": 2.219, "eval_samples_per_second": 1027.021, "eval_steps_per_second": 4.056, "step": 40545 }, { "epoch": 795.1, "learning_rate": 4.2042052843129586e-05, "loss": 0.3991, "step": 40550 }, { "epoch": 795.29, "learning_rate": 4.200565407506214e-05, "loss": 0.3996, "step": 40560 }, { "epoch": 795.49, "learning_rate": 4.196926494060788e-05, "loss": 0.3975, "step": 40570 }, { "epoch": 795.69, "learning_rate": 4.1932885450391594e-05, "loss": 0.3958, "step": 40580 }, { "epoch": 795.88, "learning_rate": 4.189651561503527e-05, "loss": 0.3941, "step": 40590 }, { "epoch": 796.0, "eval_loss": 0.40304508805274963, "eval_runtime": 2.3229, "eval_samples_per_second": 981.082, "eval_steps_per_second": 3.874, "step": 40596 }, { "epoch": 796.08, "learning_rate": 4.1860155445158104e-05, "loss": 0.3953, "step": 40600 }, { "epoch": 796.27, "learning_rate": 4.182380495137646e-05, "loss": 0.3923, "step": 40610 }, { "epoch": 796.47, "learning_rate": 4.1787464144303895e-05, "loss": 0.4072, "step": 40620 }, { "epoch": 796.67, "learning_rate": 4.175113303455106e-05, "loss": 0.3953, "step": 40630 }, { "epoch": 796.86, "learning_rate": 4.1714811632725886e-05, "loss": 0.3929, "step": 40640 }, { "epoch": 797.0, "eval_loss": 0.4031014144420624, "eval_runtime": 2.2705, "eval_samples_per_second": 1003.756, "eval_steps_per_second": 3.964, "step": 40647 }, { "epoch": 797.06, "learning_rate": 4.167849994943336e-05, "loss": 0.4023, "step": 40650 }, { "epoch": 797.25, "learning_rate": 4.16421979952757e-05, "loss": 0.4006, "step": 40660 }, { "epoch": 797.45, "learning_rate": 4.16059057808523e-05, "loss": 0.399, "step": 40670 }, { "epoch": 797.65, "learning_rate": 4.156962331675963e-05, "loss": 0.3976, "step": 40680 }, { "epoch": 797.84, "learning_rate": 4.153335061359141e-05, "loss": 0.4016, "step": 40690 }, { "epoch": 798.0, "eval_loss": 0.4003075361251831, "eval_runtime": 2.2633, "eval_samples_per_second": 1006.944, "eval_steps_per_second": 3.977, "step": 40698 }, { "epoch": 798.04, "learning_rate": 4.149708768193837e-05, "loss": 0.4008, "step": 40700 }, { "epoch": 798.24, "learning_rate": 4.14608345323886e-05, "loss": 0.3992, "step": 40710 }, { "epoch": 798.43, "learning_rate": 4.142459117552715e-05, "loss": 0.4014, "step": 40720 }, { "epoch": 798.63, "learning_rate": 4.1388357621936246e-05, "loss": 0.3954, "step": 40730 }, { "epoch": 798.82, "learning_rate": 4.1352133882195335e-05, "loss": 0.3926, "step": 40740 }, { "epoch": 799.0, "eval_loss": 0.4025706648826599, "eval_runtime": 2.2185, "eval_samples_per_second": 1027.263, "eval_steps_per_second": 4.057, "step": 40749 }, { "epoch": 799.02, "learning_rate": 4.131591996688084e-05, "loss": 0.395, "step": 40750 }, { "epoch": 799.22, "learning_rate": 4.127971588656656e-05, "loss": 0.3989, "step": 40760 }, { "epoch": 799.41, "learning_rate": 4.124352165182317e-05, "loss": 0.3976, "step": 40770 }, { "epoch": 799.61, "learning_rate": 4.120733727321864e-05, "loss": 0.3955, "step": 40780 }, { "epoch": 799.8, "learning_rate": 4.117116276131798e-05, "loss": 0.3981, "step": 40790 }, { "epoch": 800.0, "learning_rate": 4.113499812668331e-05, "loss": 0.3985, "step": 40800 }, { "epoch": 800.0, "eval_loss": 0.4045719802379608, "eval_runtime": 2.3024, "eval_samples_per_second": 989.823, "eval_steps_per_second": 3.909, "step": 40800 }, { "epoch": 800.2, "learning_rate": 4.1098843379873926e-05, "loss": 0.3982, "step": 40810 }, { "epoch": 800.39, "learning_rate": 4.10626985314462e-05, "loss": 0.3994, "step": 40820 }, { "epoch": 800.59, "learning_rate": 4.102656359195366e-05, "loss": 0.3951, "step": 40830 }, { "epoch": 800.78, "learning_rate": 4.099043857194684e-05, "loss": 0.3987, "step": 40840 }, { "epoch": 800.98, "learning_rate": 4.09543234819735e-05, "loss": 0.3978, "step": 40850 }, { "epoch": 801.0, "eval_loss": 0.4002394676208496, "eval_runtime": 2.2534, "eval_samples_per_second": 1011.373, "eval_steps_per_second": 3.994, "step": 40851 }, { "epoch": 801.18, "learning_rate": 4.091821833257838e-05, "loss": 0.4006, "step": 40860 }, { "epoch": 801.37, "learning_rate": 4.088212313430342e-05, "loss": 0.3967, "step": 40870 }, { "epoch": 801.57, "learning_rate": 4.084603789768762e-05, "loss": 0.4034, "step": 40880 }, { "epoch": 801.76, "learning_rate": 4.080996263326702e-05, "loss": 0.3974, "step": 40890 }, { "epoch": 801.96, "learning_rate": 4.0773897351574846e-05, "loss": 0.3972, "step": 40900 }, { "epoch": 802.0, "eval_loss": 0.4057813584804535, "eval_runtime": 2.287, "eval_samples_per_second": 996.519, "eval_steps_per_second": 3.935, "step": 40902 }, { "epoch": 802.16, "learning_rate": 4.073784206314127e-05, "loss": 0.3923, "step": 40910 }, { "epoch": 802.35, "learning_rate": 4.070179677849375e-05, "loss": 0.3933, "step": 40920 }, { "epoch": 802.55, "learning_rate": 4.0665761508156654e-05, "loss": 0.3955, "step": 40930 }, { "epoch": 802.75, "learning_rate": 4.062973626265144e-05, "loss": 0.3995, "step": 40940 }, { "epoch": 802.94, "learning_rate": 4.0593721052496725e-05, "loss": 0.3993, "step": 40950 }, { "epoch": 803.0, "eval_loss": 0.4025868773460388, "eval_runtime": 2.3877, "eval_samples_per_second": 954.49, "eval_steps_per_second": 3.769, "step": 40953 }, { "epoch": 803.14, "learning_rate": 4.055771588820808e-05, "loss": 0.3984, "step": 40960 }, { "epoch": 803.33, "learning_rate": 4.0521720780298315e-05, "loss": 0.3989, "step": 40970 }, { "epoch": 803.53, "learning_rate": 4.04857357392771e-05, "loss": 0.3969, "step": 40980 }, { "epoch": 803.73, "learning_rate": 4.044976077565136e-05, "loss": 0.3943, "step": 40990 }, { "epoch": 803.92, "learning_rate": 4.041379589992491e-05, "loss": 0.3935, "step": 41000 }, { "epoch": 804.0, "eval_loss": 0.4048832058906555, "eval_runtime": 2.3357, "eval_samples_per_second": 975.736, "eval_steps_per_second": 3.853, "step": 41004 }, { "epoch": 804.12, "learning_rate": 4.037784112259868e-05, "loss": 0.3994, "step": 41010 }, { "epoch": 804.31, "learning_rate": 4.03418964541707e-05, "loss": 0.3917, "step": 41020 }, { "epoch": 804.51, "learning_rate": 4.0305961905135996e-05, "loss": 0.3989, "step": 41030 }, { "epoch": 804.71, "learning_rate": 4.0270037485986705e-05, "loss": 0.3977, "step": 41040 }, { "epoch": 804.9, "learning_rate": 4.023412320721191e-05, "loss": 0.3973, "step": 41050 }, { "epoch": 805.0, "eval_loss": 0.39889949560165405, "eval_runtime": 2.2245, "eval_samples_per_second": 1024.521, "eval_steps_per_second": 4.046, "step": 41055 }, { "epoch": 805.1, "learning_rate": 4.019821907929776e-05, "loss": 0.3906, "step": 41060 }, { "epoch": 805.29, "learning_rate": 4.016232511272747e-05, "loss": 0.3924, "step": 41070 }, { "epoch": 805.49, "learning_rate": 4.0126441317981306e-05, "loss": 0.3991, "step": 41080 }, { "epoch": 805.69, "learning_rate": 4.009056770553654e-05, "loss": 0.3966, "step": 41090 }, { "epoch": 805.88, "learning_rate": 4.0054704285867425e-05, "loss": 0.4002, "step": 41100 }, { "epoch": 806.0, "eval_loss": 0.40028414130210876, "eval_runtime": 2.2018, "eval_samples_per_second": 1035.077, "eval_steps_per_second": 4.088, "step": 41106 }, { "epoch": 806.08, "learning_rate": 4.0018851069445334e-05, "loss": 0.3994, "step": 41110 }, { "epoch": 806.27, "learning_rate": 3.9983008066738534e-05, "loss": 0.3967, "step": 41120 }, { "epoch": 806.47, "learning_rate": 3.9947175288212434e-05, "loss": 0.3946, "step": 41130 }, { "epoch": 806.67, "learning_rate": 3.9911352744329424e-05, "loss": 0.3947, "step": 41140 }, { "epoch": 806.86, "learning_rate": 3.9875540445548835e-05, "loss": 0.3918, "step": 41150 }, { "epoch": 807.0, "eval_loss": 0.4006493389606476, "eval_runtime": 2.2025, "eval_samples_per_second": 1034.733, "eval_steps_per_second": 4.086, "step": 41157 }, { "epoch": 807.06, "learning_rate": 3.9839738402327106e-05, "loss": 0.3991, "step": 41160 }, { "epoch": 807.25, "learning_rate": 3.980394662511756e-05, "loss": 0.3929, "step": 41170 }, { "epoch": 807.45, "learning_rate": 3.976816512437071e-05, "loss": 0.3981, "step": 41180 }, { "epoch": 807.65, "learning_rate": 3.973239391053389e-05, "loss": 0.3941, "step": 41190 }, { "epoch": 807.84, "learning_rate": 3.9696632994051476e-05, "loss": 0.4001, "step": 41200 }, { "epoch": 808.0, "eval_loss": 0.3997151553630829, "eval_runtime": 2.2178, "eval_samples_per_second": 1027.58, "eval_steps_per_second": 4.058, "step": 41208 }, { "epoch": 808.04, "learning_rate": 3.966088238536492e-05, "loss": 0.3915, "step": 41210 }, { "epoch": 808.24, "learning_rate": 3.962514209491254e-05, "loss": 0.3964, "step": 41220 }, { "epoch": 808.43, "learning_rate": 3.958941213312973e-05, "loss": 0.3962, "step": 41230 }, { "epoch": 808.63, "learning_rate": 3.955369251044884e-05, "loss": 0.3923, "step": 41240 }, { "epoch": 808.82, "learning_rate": 3.951798323729925e-05, "loss": 0.397, "step": 41250 }, { "epoch": 809.0, "eval_loss": 0.40183350443840027, "eval_runtime": 2.2856, "eval_samples_per_second": 997.107, "eval_steps_per_second": 3.938, "step": 41259 }, { "epoch": 809.02, "learning_rate": 3.948228432410722e-05, "loss": 0.3932, "step": 41260 }, { "epoch": 809.22, "learning_rate": 3.944659578129602e-05, "loss": 0.3923, "step": 41270 }, { "epoch": 809.41, "learning_rate": 3.9410917619285926e-05, "loss": 0.3966, "step": 41280 }, { "epoch": 809.61, "learning_rate": 3.9375249848494184e-05, "loss": 0.3941, "step": 41290 }, { "epoch": 809.8, "learning_rate": 3.9339592479335e-05, "loss": 0.3929, "step": 41300 }, { "epoch": 810.0, "learning_rate": 3.930394552221948e-05, "loss": 0.3984, "step": 41310 }, { "epoch": 810.0, "eval_loss": 0.4029523730278015, "eval_runtime": 2.225, "eval_samples_per_second": 1024.248, "eval_steps_per_second": 4.045, "step": 41310 }, { "epoch": 810.2, "learning_rate": 3.9268308987555794e-05, "loss": 0.3987, "step": 41320 }, { "epoch": 810.39, "learning_rate": 3.9232682885748965e-05, "loss": 0.3974, "step": 41330 }, { "epoch": 810.59, "learning_rate": 3.9197067227201044e-05, "loss": 0.3971, "step": 41340 }, { "epoch": 810.78, "learning_rate": 3.916146202231105e-05, "loss": 0.3933, "step": 41350 }, { "epoch": 810.98, "learning_rate": 3.912586728147482e-05, "loss": 0.3925, "step": 41360 }, { "epoch": 811.0, "eval_loss": 0.40738365054130554, "eval_runtime": 2.2715, "eval_samples_per_second": 1003.286, "eval_steps_per_second": 3.962, "step": 41361 }, { "epoch": 811.18, "learning_rate": 3.9090283015085305e-05, "loss": 0.4008, "step": 41370 }, { "epoch": 811.37, "learning_rate": 3.905470923353224e-05, "loss": 0.3922, "step": 41380 }, { "epoch": 811.57, "learning_rate": 3.901914594720247e-05, "loss": 0.3944, "step": 41390 }, { "epoch": 811.76, "learning_rate": 3.8983593166479635e-05, "loss": 0.3957, "step": 41400 }, { "epoch": 811.96, "learning_rate": 3.894805090174432e-05, "loss": 0.398, "step": 41410 }, { "epoch": 812.0, "eval_loss": 0.4031858444213867, "eval_runtime": 2.2163, "eval_samples_per_second": 1028.271, "eval_steps_per_second": 4.061, "step": 41412 }, { "epoch": 812.16, "learning_rate": 3.891251916337413e-05, "loss": 0.3923, "step": 41420 }, { "epoch": 812.35, "learning_rate": 3.8876997961743495e-05, "loss": 0.3998, "step": 41430 }, { "epoch": 812.55, "learning_rate": 3.8841487307223826e-05, "loss": 0.393, "step": 41440 }, { "epoch": 812.75, "learning_rate": 3.880598721018346e-05, "loss": 0.3957, "step": 41450 }, { "epoch": 812.94, "learning_rate": 3.8770497680987645e-05, "loss": 0.4, "step": 41460 }, { "epoch": 813.0, "eval_loss": 0.3986700773239136, "eval_runtime": 2.2742, "eval_samples_per_second": 1002.097, "eval_steps_per_second": 3.957, "step": 41463 }, { "epoch": 813.14, "learning_rate": 3.873501872999851e-05, "loss": 0.392, "step": 41470 }, { "epoch": 813.33, "learning_rate": 3.8699550367575105e-05, "loss": 0.3931, "step": 41480 }, { "epoch": 813.53, "learning_rate": 3.8664092604073404e-05, "loss": 0.3973, "step": 41490 }, { "epoch": 813.73, "learning_rate": 3.862864544984628e-05, "loss": 0.3959, "step": 41500 }, { "epoch": 813.92, "learning_rate": 3.8593208915243566e-05, "loss": 0.3943, "step": 41510 }, { "epoch": 814.0, "eval_loss": 0.40154093503952026, "eval_runtime": 2.3168, "eval_samples_per_second": 983.691, "eval_steps_per_second": 3.885, "step": 41514 }, { "epoch": 814.12, "learning_rate": 3.855778301061188e-05, "loss": 0.4014, "step": 41520 }, { "epoch": 814.31, "learning_rate": 3.852236774629483e-05, "loss": 0.3984, "step": 41530 }, { "epoch": 814.51, "learning_rate": 3.848696313263284e-05, "loss": 0.3954, "step": 41540 }, { "epoch": 814.71, "learning_rate": 3.8451569179963295e-05, "loss": 0.3955, "step": 41550 }, { "epoch": 814.9, "learning_rate": 3.8416185898620465e-05, "loss": 0.3973, "step": 41560 }, { "epoch": 815.0, "eval_loss": 0.3962329924106598, "eval_runtime": 2.2968, "eval_samples_per_second": 992.272, "eval_steps_per_second": 3.919, "step": 41565 }, { "epoch": 815.1, "learning_rate": 3.838081329893543e-05, "loss": 0.3943, "step": 41570 }, { "epoch": 815.29, "learning_rate": 3.834545139123626e-05, "loss": 0.3969, "step": 41580 }, { "epoch": 815.49, "learning_rate": 3.831010018584774e-05, "loss": 0.3928, "step": 41590 }, { "epoch": 815.69, "learning_rate": 3.827475969309177e-05, "loss": 0.3924, "step": 41600 }, { "epoch": 815.88, "learning_rate": 3.823942992328691e-05, "loss": 0.3922, "step": 41610 }, { "epoch": 816.0, "eval_loss": 0.403202623128891, "eval_runtime": 2.1921, "eval_samples_per_second": 1039.664, "eval_steps_per_second": 4.106, "step": 41616 }, { "epoch": 816.08, "learning_rate": 3.8204110886748645e-05, "loss": 0.3916, "step": 41620 }, { "epoch": 816.27, "learning_rate": 3.816880259378941e-05, "loss": 0.393, "step": 41630 }, { "epoch": 816.47, "learning_rate": 3.813350505471836e-05, "loss": 0.3974, "step": 41640 }, { "epoch": 816.67, "learning_rate": 3.809821827984164e-05, "loss": 0.3995, "step": 41650 }, { "epoch": 816.86, "learning_rate": 3.806294227946219e-05, "loss": 0.3902, "step": 41660 }, { "epoch": 817.0, "eval_loss": 0.3992672860622406, "eval_runtime": 2.2198, "eval_samples_per_second": 1026.655, "eval_steps_per_second": 4.054, "step": 41667 }, { "epoch": 817.06, "learning_rate": 3.8027677063879836e-05, "loss": 0.3939, "step": 41670 }, { "epoch": 817.25, "learning_rate": 3.799242264339123e-05, "loss": 0.3945, "step": 41680 }, { "epoch": 817.45, "learning_rate": 3.7957179028289835e-05, "loss": 0.3941, "step": 41690 }, { "epoch": 817.65, "learning_rate": 3.792194622886602e-05, "loss": 0.3934, "step": 41700 }, { "epoch": 817.84, "learning_rate": 3.788672425540699e-05, "loss": 0.3942, "step": 41710 }, { "epoch": 818.0, "eval_loss": 0.40182411670684814, "eval_runtime": 2.2756, "eval_samples_per_second": 1001.5, "eval_steps_per_second": 3.955, "step": 41718 }, { "epoch": 818.04, "learning_rate": 3.78515131181968e-05, "loss": 0.4016, "step": 41720 }, { "epoch": 818.24, "learning_rate": 3.781631282751629e-05, "loss": 0.3948, "step": 41730 }, { "epoch": 818.43, "learning_rate": 3.7781123393643125e-05, "loss": 0.3937, "step": 41740 }, { "epoch": 818.63, "learning_rate": 3.7745944826851866e-05, "loss": 0.3977, "step": 41750 }, { "epoch": 818.82, "learning_rate": 3.771077713741388e-05, "loss": 0.3994, "step": 41760 }, { "epoch": 819.0, "eval_loss": 0.40313833951950073, "eval_runtime": 2.2345, "eval_samples_per_second": 1019.911, "eval_steps_per_second": 4.028, "step": 41769 }, { "epoch": 819.02, "learning_rate": 3.767562033559736e-05, "loss": 0.3932, "step": 41770 }, { "epoch": 819.22, "learning_rate": 3.7640474431667264e-05, "loss": 0.391, "step": 41780 }, { "epoch": 819.41, "learning_rate": 3.760533943588546e-05, "loss": 0.3949, "step": 41790 }, { "epoch": 819.61, "learning_rate": 3.757021535851053e-05, "loss": 0.3966, "step": 41800 }, { "epoch": 819.8, "learning_rate": 3.753510220979795e-05, "loss": 0.3985, "step": 41810 }, { "epoch": 820.0, "learning_rate": 3.750000000000001e-05, "loss": 0.3959, "step": 41820 }, { "epoch": 820.0, "eval_loss": 0.4008371829986572, "eval_runtime": 2.3152, "eval_samples_per_second": 984.368, "eval_steps_per_second": 3.887, "step": 41820 }, { "epoch": 820.2, "learning_rate": 3.746490873936571e-05, "loss": 0.396, "step": 41830 }, { "epoch": 820.39, "learning_rate": 3.742982843814097e-05, "loss": 0.3978, "step": 41840 }, { "epoch": 820.59, "learning_rate": 3.73947591065684e-05, "loss": 0.3908, "step": 41850 }, { "epoch": 820.78, "learning_rate": 3.73597007548875e-05, "loss": 0.4008, "step": 41860 }, { "epoch": 820.98, "learning_rate": 3.732465339333454e-05, "loss": 0.3911, "step": 41870 }, { "epoch": 821.0, "eval_loss": 0.4035691022872925, "eval_runtime": 2.3143, "eval_samples_per_second": 984.737, "eval_steps_per_second": 3.889, "step": 41871 }, { "epoch": 821.18, "learning_rate": 3.728961703214252e-05, "loss": 0.3986, "step": 41880 }, { "epoch": 821.37, "learning_rate": 3.7254591681541327e-05, "loss": 0.3976, "step": 41890 }, { "epoch": 821.57, "learning_rate": 3.721957735175754e-05, "loss": 0.3909, "step": 41900 }, { "epoch": 821.76, "learning_rate": 3.7184574053014585e-05, "loss": 0.3977, "step": 41910 }, { "epoch": 821.96, "learning_rate": 3.714958179553263e-05, "loss": 0.3941, "step": 41920 }, { "epoch": 822.0, "eval_loss": 0.3997298777103424, "eval_runtime": 2.3656, "eval_samples_per_second": 963.412, "eval_steps_per_second": 3.805, "step": 41922 }, { "epoch": 822.16, "learning_rate": 3.7114600589528675e-05, "loss": 0.3965, "step": 41930 }, { "epoch": 822.35, "learning_rate": 3.707963044521642e-05, "loss": 0.3952, "step": 41940 }, { "epoch": 822.55, "learning_rate": 3.704467137280635e-05, "loss": 0.3936, "step": 41950 }, { "epoch": 822.75, "learning_rate": 3.700972338250574e-05, "loss": 0.3961, "step": 41960 }, { "epoch": 822.94, "learning_rate": 3.697478648451864e-05, "loss": 0.3936, "step": 41970 }, { "epoch": 823.0, "eval_loss": 0.3970935344696045, "eval_runtime": 2.2729, "eval_samples_per_second": 1002.69, "eval_steps_per_second": 3.96, "step": 41973 }, { "epoch": 823.14, "learning_rate": 3.693986068904588e-05, "loss": 0.3919, "step": 41980 }, { "epoch": 823.33, "learning_rate": 3.6904946006284936e-05, "loss": 0.3954, "step": 41990 }, { "epoch": 823.53, "learning_rate": 3.6870042446430185e-05, "loss": 0.3947, "step": 42000 }, { "epoch": 823.73, "learning_rate": 3.683515001967264e-05, "loss": 0.391, "step": 42010 }, { "epoch": 823.92, "learning_rate": 3.680026873620012e-05, "loss": 0.397, "step": 42020 }, { "epoch": 824.0, "eval_loss": 0.4010617733001709, "eval_runtime": 2.2259, "eval_samples_per_second": 1023.865, "eval_steps_per_second": 4.043, "step": 42024 }, { "epoch": 824.12, "learning_rate": 3.676539860619723e-05, "loss": 0.393, "step": 42030 }, { "epoch": 824.31, "learning_rate": 3.67305396398452e-05, "loss": 0.392, "step": 42040 }, { "epoch": 824.51, "learning_rate": 3.669569184732213e-05, "loss": 0.3962, "step": 42050 }, { "epoch": 824.71, "learning_rate": 3.666085523880274e-05, "loss": 0.3946, "step": 42060 }, { "epoch": 824.9, "learning_rate": 3.662602982445859e-05, "loss": 0.3974, "step": 42070 }, { "epoch": 825.0, "eval_loss": 0.3963702917098999, "eval_runtime": 2.2579, "eval_samples_per_second": 1009.332, "eval_steps_per_second": 3.986, "step": 42075 }, { "epoch": 825.1, "learning_rate": 3.659121561445792e-05, "loss": 0.3919, "step": 42080 }, { "epoch": 825.29, "learning_rate": 3.655641261896567e-05, "loss": 0.3924, "step": 42090 }, { "epoch": 825.49, "learning_rate": 3.6521620848143584e-05, "loss": 0.3921, "step": 42100 }, { "epoch": 825.69, "learning_rate": 3.648684031215004e-05, "loss": 0.398, "step": 42110 }, { "epoch": 825.88, "learning_rate": 3.6452071021140184e-05, "loss": 0.3921, "step": 42120 }, { "epoch": 826.0, "eval_loss": 0.4010373055934906, "eval_runtime": 2.3877, "eval_samples_per_second": 954.483, "eval_steps_per_second": 3.769, "step": 42126 }, { "epoch": 826.08, "learning_rate": 3.64173129852659e-05, "loss": 0.3915, "step": 42130 }, { "epoch": 826.27, "learning_rate": 3.638256621467577e-05, "loss": 0.3926, "step": 42140 }, { "epoch": 826.47, "learning_rate": 3.634783071951506e-05, "loss": 0.393, "step": 42150 }, { "epoch": 826.67, "learning_rate": 3.631310650992572e-05, "loss": 0.3911, "step": 42160 }, { "epoch": 826.86, "learning_rate": 3.6278393596046476e-05, "loss": 0.3961, "step": 42170 }, { "epoch": 827.0, "eval_loss": 0.401947021484375, "eval_runtime": 2.2833, "eval_samples_per_second": 998.113, "eval_steps_per_second": 3.942, "step": 42177 }, { "epoch": 827.06, "learning_rate": 3.624369198801272e-05, "loss": 0.3977, "step": 42180 }, { "epoch": 827.25, "learning_rate": 3.620900169595659e-05, "loss": 0.3955, "step": 42190 }, { "epoch": 827.45, "learning_rate": 3.617432273000681e-05, "loss": 0.3979, "step": 42200 }, { "epoch": 827.65, "learning_rate": 3.613965510028893e-05, "loss": 0.3954, "step": 42210 }, { "epoch": 827.84, "learning_rate": 3.610499881692506e-05, "loss": 0.3912, "step": 42220 }, { "epoch": 828.0, "eval_loss": 0.4004402756690979, "eval_runtime": 2.3254, "eval_samples_per_second": 980.067, "eval_steps_per_second": 3.87, "step": 42228 }, { "epoch": 828.04, "learning_rate": 3.607035389003409e-05, "loss": 0.391, "step": 42230 }, { "epoch": 828.24, "learning_rate": 3.60357203297316e-05, "loss": 0.3938, "step": 42240 }, { "epoch": 828.43, "learning_rate": 3.6001098146129756e-05, "loss": 0.3945, "step": 42250 }, { "epoch": 828.63, "learning_rate": 3.596648734933752e-05, "loss": 0.3921, "step": 42260 }, { "epoch": 828.82, "learning_rate": 3.5931887949460425e-05, "loss": 0.3939, "step": 42270 }, { "epoch": 829.0, "eval_loss": 0.39803311228752136, "eval_runtime": 2.2329, "eval_samples_per_second": 1020.649, "eval_steps_per_second": 4.031, "step": 42279 }, { "epoch": 829.02, "learning_rate": 3.5897299956600735e-05, "loss": 0.4018, "step": 42280 }, { "epoch": 829.22, "learning_rate": 3.586272338085742e-05, "loss": 0.3925, "step": 42290 }, { "epoch": 829.41, "learning_rate": 3.5828158232326e-05, "loss": 0.3942, "step": 42300 }, { "epoch": 829.61, "learning_rate": 3.5793604521098796e-05, "loss": 0.3919, "step": 42310 }, { "epoch": 829.8, "learning_rate": 3.5759062257264645e-05, "loss": 0.393, "step": 42320 }, { "epoch": 830.0, "learning_rate": 3.572453145090916e-05, "loss": 0.3917, "step": 42330 }, { "epoch": 830.0, "eval_loss": 0.40272367000579834, "eval_runtime": 2.3185, "eval_samples_per_second": 982.944, "eval_steps_per_second": 3.882, "step": 42330 }, { "epoch": 830.2, "learning_rate": 3.569001211211456e-05, "loss": 0.3938, "step": 42340 }, { "epoch": 830.39, "learning_rate": 3.565550425095976e-05, "loss": 0.3942, "step": 42350 }, { "epoch": 830.59, "learning_rate": 3.562100787752025e-05, "loss": 0.3922, "step": 42360 }, { "epoch": 830.78, "learning_rate": 3.558652300186817e-05, "loss": 0.3926, "step": 42370 }, { "epoch": 830.98, "learning_rate": 3.5552049634072366e-05, "loss": 0.3977, "step": 42380 }, { "epoch": 831.0, "eval_loss": 0.4004882574081421, "eval_runtime": 2.3806, "eval_samples_per_second": 957.34, "eval_steps_per_second": 3.781, "step": 42381 }, { "epoch": 831.18, "learning_rate": 3.55175877841983e-05, "loss": 0.3928, "step": 42390 }, { "epoch": 831.37, "learning_rate": 3.548313746230809e-05, "loss": 0.3944, "step": 42400 }, { "epoch": 831.57, "learning_rate": 3.544869867846039e-05, "loss": 0.3948, "step": 42410 }, { "epoch": 831.76, "learning_rate": 3.541427144271064e-05, "loss": 0.392, "step": 42420 }, { "epoch": 831.96, "learning_rate": 3.537985576511074e-05, "loss": 0.3881, "step": 42430 }, { "epoch": 832.0, "eval_loss": 0.39829736948013306, "eval_runtime": 2.2221, "eval_samples_per_second": 1025.626, "eval_steps_per_second": 4.05, "step": 42432 }, { "epoch": 832.16, "learning_rate": 3.534545165570934e-05, "loss": 0.3913, "step": 42440 }, { "epoch": 832.35, "learning_rate": 3.531105912455172e-05, "loss": 0.3917, "step": 42450 }, { "epoch": 832.55, "learning_rate": 3.5276678181679636e-05, "loss": 0.3904, "step": 42460 }, { "epoch": 832.75, "learning_rate": 3.524230883713164e-05, "loss": 0.3893, "step": 42470 }, { "epoch": 832.94, "learning_rate": 3.5207951100942765e-05, "loss": 0.3939, "step": 42480 }, { "epoch": 833.0, "eval_loss": 0.4025621712207794, "eval_runtime": 2.2271, "eval_samples_per_second": 1023.286, "eval_steps_per_second": 4.041, "step": 42483 }, { "epoch": 833.14, "learning_rate": 3.5173604983144714e-05, "loss": 0.3946, "step": 42490 }, { "epoch": 833.33, "learning_rate": 3.513927049376582e-05, "loss": 0.3929, "step": 42500 }, { "epoch": 833.53, "learning_rate": 3.5104947642830934e-05, "loss": 0.3909, "step": 42510 }, { "epoch": 833.73, "learning_rate": 3.5070636440361615e-05, "loss": 0.3942, "step": 42520 }, { "epoch": 833.92, "learning_rate": 3.5036336896375924e-05, "loss": 0.393, "step": 42530 }, { "epoch": 834.0, "eval_loss": 0.399141788482666, "eval_runtime": 2.1939, "eval_samples_per_second": 1038.783, "eval_steps_per_second": 4.102, "step": 42534 }, { "epoch": 834.12, "learning_rate": 3.500204902088857e-05, "loss": 0.3934, "step": 42540 }, { "epoch": 834.31, "learning_rate": 3.49677728239109e-05, "loss": 0.3953, "step": 42550 }, { "epoch": 834.51, "learning_rate": 3.493350831545073e-05, "loss": 0.3942, "step": 42560 }, { "epoch": 834.71, "learning_rate": 3.4899255505512593e-05, "loss": 0.394, "step": 42570 }, { "epoch": 834.9, "learning_rate": 3.4865014404097475e-05, "loss": 0.3928, "step": 42580 }, { "epoch": 835.0, "eval_loss": 0.398049920797348, "eval_runtime": 2.3222, "eval_samples_per_second": 981.383, "eval_steps_per_second": 3.876, "step": 42585 }, { "epoch": 835.1, "learning_rate": 3.483078502120307e-05, "loss": 0.3925, "step": 42590 }, { "epoch": 835.29, "learning_rate": 3.4796567366823564e-05, "loss": 0.3931, "step": 42600 }, { "epoch": 835.49, "learning_rate": 3.47623614509498e-05, "loss": 0.3872, "step": 42610 }, { "epoch": 835.69, "learning_rate": 3.47281672835691e-05, "loss": 0.392, "step": 42620 }, { "epoch": 835.88, "learning_rate": 3.4693984874665384e-05, "loss": 0.394, "step": 42630 }, { "epoch": 836.0, "eval_loss": 0.39526310563087463, "eval_runtime": 2.2715, "eval_samples_per_second": 1003.309, "eval_steps_per_second": 3.962, "step": 42636 }, { "epoch": 836.08, "learning_rate": 3.465981423421917e-05, "loss": 0.3924, "step": 42640 }, { "epoch": 836.27, "learning_rate": 3.462565537220753e-05, "loss": 0.3902, "step": 42650 }, { "epoch": 836.47, "learning_rate": 3.459150829860411e-05, "loss": 0.3939, "step": 42660 }, { "epoch": 836.67, "learning_rate": 3.455737302337904e-05, "loss": 0.3937, "step": 42670 }, { "epoch": 836.86, "learning_rate": 3.452324955649911e-05, "loss": 0.3908, "step": 42680 }, { "epoch": 837.0, "eval_loss": 0.4002179503440857, "eval_runtime": 2.3723, "eval_samples_per_second": 960.663, "eval_steps_per_second": 3.794, "step": 42687 }, { "epoch": 837.06, "learning_rate": 3.448913790792757e-05, "loss": 0.3886, "step": 42690 }, { "epoch": 837.25, "learning_rate": 3.445503808762429e-05, "loss": 0.3923, "step": 42700 }, { "epoch": 837.45, "learning_rate": 3.442095010554567e-05, "loss": 0.3926, "step": 42710 }, { "epoch": 837.65, "learning_rate": 3.4386873971644586e-05, "loss": 0.3957, "step": 42720 }, { "epoch": 837.84, "learning_rate": 3.4352809695870565e-05, "loss": 0.3926, "step": 42730 }, { "epoch": 838.0, "eval_loss": 0.4014919400215149, "eval_runtime": 2.3297, "eval_samples_per_second": 978.232, "eval_steps_per_second": 3.863, "step": 42738 }, { "epoch": 838.04, "learning_rate": 3.431875728816958e-05, "loss": 0.3962, "step": 42740 }, { "epoch": 838.24, "learning_rate": 3.4284716758484175e-05, "loss": 0.3914, "step": 42750 }, { "epoch": 838.43, "learning_rate": 3.4250688116753464e-05, "loss": 0.392, "step": 42760 }, { "epoch": 838.63, "learning_rate": 3.4216671372913005e-05, "loss": 0.3918, "step": 42770 }, { "epoch": 838.82, "learning_rate": 3.418266653689497e-05, "loss": 0.3947, "step": 42780 }, { "epoch": 839.0, "eval_loss": 0.3990994989871979, "eval_runtime": 2.3005, "eval_samples_per_second": 990.663, "eval_steps_per_second": 3.912, "step": 42789 }, { "epoch": 839.02, "learning_rate": 3.414867361862797e-05, "loss": 0.3898, "step": 42790 }, { "epoch": 839.22, "learning_rate": 3.41146926280372e-05, "loss": 0.3938, "step": 42800 }, { "epoch": 839.41, "learning_rate": 3.408072357504435e-05, "loss": 0.3916, "step": 42810 }, { "epoch": 839.61, "learning_rate": 3.404676646956765e-05, "loss": 0.3888, "step": 42820 }, { "epoch": 839.8, "learning_rate": 3.4012821321521806e-05, "loss": 0.3912, "step": 42830 }, { "epoch": 840.0, "learning_rate": 3.3978888140817996e-05, "loss": 0.3965, "step": 42840 }, { "epoch": 840.0, "eval_loss": 0.3969307243824005, "eval_runtime": 2.2053, "eval_samples_per_second": 1033.431, "eval_steps_per_second": 4.081, "step": 42840 }, { "epoch": 840.2, "learning_rate": 3.394496693736399e-05, "loss": 0.3947, "step": 42850 }, { "epoch": 840.39, "learning_rate": 3.391105772106403e-05, "loss": 0.3936, "step": 42860 }, { "epoch": 840.59, "learning_rate": 3.387716050181886e-05, "loss": 0.3952, "step": 42870 }, { "epoch": 840.78, "learning_rate": 3.384327528952568e-05, "loss": 0.3956, "step": 42880 }, { "epoch": 840.98, "learning_rate": 3.380940209407825e-05, "loss": 0.3934, "step": 42890 }, { "epoch": 841.0, "eval_loss": 0.4002283215522766, "eval_runtime": 2.2711, "eval_samples_per_second": 1003.473, "eval_steps_per_second": 3.963, "step": 42891 }, { "epoch": 841.18, "learning_rate": 3.377554092536674e-05, "loss": 0.3937, "step": 42900 }, { "epoch": 841.37, "learning_rate": 3.374169179327789e-05, "loss": 0.3956, "step": 42910 }, { "epoch": 841.57, "learning_rate": 3.370785470769491e-05, "loss": 0.3965, "step": 42920 }, { "epoch": 841.76, "learning_rate": 3.367402967849743e-05, "loss": 0.395, "step": 42930 }, { "epoch": 841.96, "learning_rate": 3.364021671556165e-05, "loss": 0.3916, "step": 42940 }, { "epoch": 842.0, "eval_loss": 0.39687296748161316, "eval_runtime": 2.3553, "eval_samples_per_second": 967.62, "eval_steps_per_second": 3.821, "step": 42942 }, { "epoch": 842.16, "learning_rate": 3.360641582876015e-05, "loss": 0.3895, "step": 42950 }, { "epoch": 842.35, "learning_rate": 3.357262702796206e-05, "loss": 0.3921, "step": 42960 }, { "epoch": 842.55, "learning_rate": 3.3538850323032984e-05, "loss": 0.3965, "step": 42970 }, { "epoch": 842.75, "learning_rate": 3.3505085723834917e-05, "loss": 0.3899, "step": 42980 }, { "epoch": 842.94, "learning_rate": 3.3471333240226414e-05, "loss": 0.3887, "step": 42990 }, { "epoch": 843.0, "eval_loss": 0.39406681060791016, "eval_runtime": 2.385, "eval_samples_per_second": 955.561, "eval_steps_per_second": 3.774, "step": 42993 }, { "epoch": 843.14, "learning_rate": 3.3437592882062406e-05, "loss": 0.3873, "step": 43000 }, { "epoch": 843.33, "learning_rate": 3.340386465919434e-05, "loss": 0.3939, "step": 43010 }, { "epoch": 843.53, "learning_rate": 3.3370148581470106e-05, "loss": 0.3885, "step": 43020 }, { "epoch": 843.73, "learning_rate": 3.333644465873408e-05, "loss": 0.3911, "step": 43030 }, { "epoch": 843.92, "learning_rate": 3.3302752900827025e-05, "loss": 0.3938, "step": 43040 }, { "epoch": 844.0, "eval_loss": 0.3971950113773346, "eval_runtime": 2.3473, "eval_samples_per_second": 970.919, "eval_steps_per_second": 3.834, "step": 43044 }, { "epoch": 844.12, "learning_rate": 3.3269073317586156e-05, "loss": 0.39, "step": 43050 }, { "epoch": 844.31, "learning_rate": 3.32354059188452e-05, "loss": 0.3895, "step": 43060 }, { "epoch": 844.51, "learning_rate": 3.3201750714434264e-05, "loss": 0.3975, "step": 43070 }, { "epoch": 844.71, "learning_rate": 3.3168107714179954e-05, "loss": 0.3905, "step": 43080 }, { "epoch": 844.9, "learning_rate": 3.3134476927905234e-05, "loss": 0.3928, "step": 43090 }, { "epoch": 845.0, "eval_loss": 0.40146586298942566, "eval_runtime": 2.1799, "eval_samples_per_second": 1045.449, "eval_steps_per_second": 4.129, "step": 43095 }, { "epoch": 845.1, "learning_rate": 3.3100858365429575e-05, "loss": 0.3932, "step": 43100 }, { "epoch": 845.29, "learning_rate": 3.306725203656881e-05, "loss": 0.3904, "step": 43110 }, { "epoch": 845.49, "learning_rate": 3.303365795113525e-05, "loss": 0.392, "step": 43120 }, { "epoch": 845.69, "learning_rate": 3.300007611893766e-05, "loss": 0.3893, "step": 43130 }, { "epoch": 845.88, "learning_rate": 3.2966506549781134e-05, "loss": 0.3948, "step": 43140 }, { "epoch": 846.0, "eval_loss": 0.39760321378707886, "eval_runtime": 2.3454, "eval_samples_per_second": 971.692, "eval_steps_per_second": 3.837, "step": 43146 }, { "epoch": 846.08, "learning_rate": 3.2932949253467276e-05, "loss": 0.395, "step": 43150 }, { "epoch": 846.27, "learning_rate": 3.2899404239794034e-05, "loss": 0.388, "step": 43160 }, { "epoch": 846.47, "learning_rate": 3.2865871518555814e-05, "loss": 0.394, "step": 43170 }, { "epoch": 846.67, "learning_rate": 3.283235109954345e-05, "loss": 0.3921, "step": 43180 }, { "epoch": 846.86, "learning_rate": 3.279884299254411e-05, "loss": 0.3925, "step": 43190 }, { "epoch": 847.0, "eval_loss": 0.3952951729297638, "eval_runtime": 2.212, "eval_samples_per_second": 1030.312, "eval_steps_per_second": 4.069, "step": 43197 }, { "epoch": 847.06, "learning_rate": 3.276534720734147e-05, "loss": 0.3906, "step": 43200 }, { "epoch": 847.25, "learning_rate": 3.273186375371549e-05, "loss": 0.3919, "step": 43210 }, { "epoch": 847.45, "learning_rate": 3.269839264144263e-05, "loss": 0.3918, "step": 43220 }, { "epoch": 847.65, "learning_rate": 3.266493388029572e-05, "loss": 0.3954, "step": 43230 }, { "epoch": 847.84, "learning_rate": 3.263148748004393e-05, "loss": 0.3876, "step": 43240 }, { "epoch": 848.0, "eval_loss": 0.3958490490913391, "eval_runtime": 2.2194, "eval_samples_per_second": 1026.867, "eval_steps_per_second": 4.055, "step": 43248 }, { "epoch": 848.04, "learning_rate": 3.2598053450452914e-05, "loss": 0.3967, "step": 43250 }, { "epoch": 848.24, "learning_rate": 3.2564631801284604e-05, "loss": 0.3884, "step": 43260 }, { "epoch": 848.43, "learning_rate": 3.253122254229742e-05, "loss": 0.3852, "step": 43270 }, { "epoch": 848.63, "learning_rate": 3.24978256832461e-05, "loss": 0.3918, "step": 43280 }, { "epoch": 848.82, "learning_rate": 3.246444123388181e-05, "loss": 0.3857, "step": 43290 }, { "epoch": 849.0, "eval_loss": 0.39667844772338867, "eval_runtime": 2.3103, "eval_samples_per_second": 986.453, "eval_steps_per_second": 3.896, "step": 43299 }, { "epoch": 849.02, "learning_rate": 3.2431069203952046e-05, "loss": 0.393, "step": 43300 }, { "epoch": 849.22, "learning_rate": 3.239770960320067e-05, "loss": 0.391, "step": 43310 }, { "epoch": 849.41, "learning_rate": 3.236436244136797e-05, "loss": 0.3919, "step": 43320 }, { "epoch": 849.61, "learning_rate": 3.233102772819057e-05, "loss": 0.3887, "step": 43330 }, { "epoch": 849.8, "learning_rate": 3.22977054734015e-05, "loss": 0.3876, "step": 43340 }, { "epoch": 850.0, "learning_rate": 3.226439568673003e-05, "loss": 0.389, "step": 43350 }, { "epoch": 850.0, "eval_loss": 0.3974584639072418, "eval_runtime": 2.2721, "eval_samples_per_second": 1003.056, "eval_steps_per_second": 3.961, "step": 43350 }, { "epoch": 850.2, "learning_rate": 3.2231098377901966e-05, "loss": 0.3892, "step": 43360 }, { "epoch": 850.39, "learning_rate": 3.21978135566393e-05, "loss": 0.3901, "step": 43370 }, { "epoch": 850.59, "learning_rate": 3.21645412326605e-05, "loss": 0.3902, "step": 43380 }, { "epoch": 850.78, "learning_rate": 3.2131281415680365e-05, "loss": 0.3908, "step": 43390 }, { "epoch": 850.98, "learning_rate": 3.2098034115409956e-05, "loss": 0.3905, "step": 43400 }, { "epoch": 851.0, "eval_loss": 0.3915613889694214, "eval_runtime": 2.2983, "eval_samples_per_second": 991.585, "eval_steps_per_second": 3.916, "step": 43401 }, { "epoch": 851.18, "learning_rate": 3.206479934155681e-05, "loss": 0.3891, "step": 43410 }, { "epoch": 851.37, "learning_rate": 3.203157710382469e-05, "loss": 0.3889, "step": 43420 }, { "epoch": 851.57, "learning_rate": 3.199836741191375e-05, "loss": 0.3898, "step": 43430 }, { "epoch": 851.76, "learning_rate": 3.1965170275520534e-05, "loss": 0.3915, "step": 43440 }, { "epoch": 851.96, "learning_rate": 3.1931985704337804e-05, "loss": 0.389, "step": 43450 }, { "epoch": 852.0, "eval_loss": 0.3987075686454773, "eval_runtime": 2.2702, "eval_samples_per_second": 1003.882, "eval_steps_per_second": 3.964, "step": 43452 }, { "epoch": 852.16, "learning_rate": 3.189881370805475e-05, "loss": 0.3933, "step": 43460 }, { "epoch": 852.35, "learning_rate": 3.1865654296356835e-05, "loss": 0.3869, "step": 43470 }, { "epoch": 852.55, "learning_rate": 3.183250747892587e-05, "loss": 0.39, "step": 43480 }, { "epoch": 852.75, "learning_rate": 3.1799373265439985e-05, "loss": 0.391, "step": 43490 }, { "epoch": 852.94, "learning_rate": 3.1766251665573676e-05, "loss": 0.3872, "step": 43500 }, { "epoch": 853.0, "eval_loss": 0.39647892117500305, "eval_runtime": 2.3604, "eval_samples_per_second": 965.511, "eval_steps_per_second": 3.813, "step": 43503 }, { "epoch": 853.14, "learning_rate": 3.173314268899767e-05, "loss": 0.3842, "step": 43510 }, { "epoch": 853.33, "learning_rate": 3.170004634537903e-05, "loss": 0.3962, "step": 43520 }, { "epoch": 853.53, "learning_rate": 3.1666962644381165e-05, "loss": 0.3919, "step": 43530 }, { "epoch": 853.73, "learning_rate": 3.1633891595663795e-05, "loss": 0.3922, "step": 43540 }, { "epoch": 853.92, "learning_rate": 3.1600833208882954e-05, "loss": 0.3902, "step": 43550 }, { "epoch": 854.0, "eval_loss": 0.3962956666946411, "eval_runtime": 2.2913, "eval_samples_per_second": 994.612, "eval_steps_per_second": 3.928, "step": 43554 }, { "epoch": 854.12, "learning_rate": 3.156778749369088e-05, "loss": 0.3889, "step": 43560 }, { "epoch": 854.31, "learning_rate": 3.1534754459736256e-05, "loss": 0.3916, "step": 43570 }, { "epoch": 854.51, "learning_rate": 3.150173411666394e-05, "loss": 0.389, "step": 43580 }, { "epoch": 854.71, "learning_rate": 3.1468726474115156e-05, "loss": 0.3866, "step": 43590 }, { "epoch": 854.9, "learning_rate": 3.143573154172743e-05, "loss": 0.3883, "step": 43600 }, { "epoch": 855.0, "eval_loss": 0.3941084146499634, "eval_runtime": 2.2726, "eval_samples_per_second": 1002.825, "eval_steps_per_second": 3.96, "step": 43605 }, { "epoch": 855.1, "learning_rate": 3.1402749329134476e-05, "loss": 0.3919, "step": 43610 }, { "epoch": 855.29, "learning_rate": 3.1369779845966446e-05, "loss": 0.3902, "step": 43620 }, { "epoch": 855.49, "learning_rate": 3.133682310184961e-05, "loss": 0.3866, "step": 43630 }, { "epoch": 855.69, "learning_rate": 3.1303879106406664e-05, "loss": 0.39, "step": 43640 }, { "epoch": 855.88, "learning_rate": 3.127094786925651e-05, "loss": 0.393, "step": 43650 }, { "epoch": 856.0, "eval_loss": 0.3944731056690216, "eval_runtime": 2.2622, "eval_samples_per_second": 1007.445, "eval_steps_per_second": 3.979, "step": 43656 }, { "epoch": 856.08, "learning_rate": 3.1238029400014305e-05, "loss": 0.388, "step": 43660 }, { "epoch": 856.27, "learning_rate": 3.120512370829156e-05, "loss": 0.3884, "step": 43670 }, { "epoch": 856.47, "learning_rate": 3.1172230803695945e-05, "loss": 0.3895, "step": 43680 }, { "epoch": 856.67, "learning_rate": 3.113935069583147e-05, "loss": 0.3919, "step": 43690 }, { "epoch": 856.86, "learning_rate": 3.1106483394298416e-05, "loss": 0.3908, "step": 43700 }, { "epoch": 857.0, "eval_loss": 0.3987371325492859, "eval_runtime": 2.2357, "eval_samples_per_second": 1019.351, "eval_steps_per_second": 4.026, "step": 43707 }, { "epoch": 857.06, "learning_rate": 3.107362890869332e-05, "loss": 0.3887, "step": 43710 }, { "epoch": 857.25, "learning_rate": 3.104078724860892e-05, "loss": 0.3931, "step": 43720 }, { "epoch": 857.45, "learning_rate": 3.1007958423634235e-05, "loss": 0.3914, "step": 43730 }, { "epoch": 857.65, "learning_rate": 3.097514244335457e-05, "loss": 0.3868, "step": 43740 }, { "epoch": 857.84, "learning_rate": 3.094233931735147e-05, "loss": 0.3891, "step": 43750 }, { "epoch": 858.0, "eval_loss": 0.3969500958919525, "eval_runtime": 2.2738, "eval_samples_per_second": 1002.285, "eval_steps_per_second": 3.958, "step": 43758 }, { "epoch": 858.04, "learning_rate": 3.090954905520272e-05, "loss": 0.3888, "step": 43760 }, { "epoch": 858.24, "learning_rate": 3.087677166648232e-05, "loss": 0.3955, "step": 43770 }, { "epoch": 858.43, "learning_rate": 3.0844007160760576e-05, "loss": 0.3885, "step": 43780 }, { "epoch": 858.63, "learning_rate": 3.0811255547603925e-05, "loss": 0.3864, "step": 43790 }, { "epoch": 858.82, "learning_rate": 3.077851683657517e-05, "loss": 0.39, "step": 43800 }, { "epoch": 859.0, "eval_loss": 0.3933936655521393, "eval_runtime": 2.2192, "eval_samples_per_second": 1026.96, "eval_steps_per_second": 4.056, "step": 43809 }, { "epoch": 859.02, "learning_rate": 3.074579103723327e-05, "loss": 0.3814, "step": 43810 }, { "epoch": 859.22, "learning_rate": 3.071307815913342e-05, "loss": 0.3884, "step": 43820 }, { "epoch": 859.41, "learning_rate": 3.068037821182706e-05, "loss": 0.3902, "step": 43830 }, { "epoch": 859.61, "learning_rate": 3.064769120486182e-05, "loss": 0.3885, "step": 43840 }, { "epoch": 859.8, "learning_rate": 3.061501714778159e-05, "loss": 0.3898, "step": 43850 }, { "epoch": 860.0, "learning_rate": 3.05823560501265e-05, "loss": 0.3894, "step": 43860 }, { "epoch": 860.0, "eval_loss": 0.3981279730796814, "eval_runtime": 2.295, "eval_samples_per_second": 993.016, "eval_steps_per_second": 3.922, "step": 43860 }, { "epoch": 860.2, "learning_rate": 3.054970792143282e-05, "loss": 0.3903, "step": 43870 }, { "epoch": 860.39, "learning_rate": 3.0517072771233103e-05, "loss": 0.388, "step": 43880 }, { "epoch": 860.59, "learning_rate": 3.0484450609056048e-05, "loss": 0.3893, "step": 43890 }, { "epoch": 860.78, "learning_rate": 3.0451841444426625e-05, "loss": 0.3894, "step": 43900 }, { "epoch": 860.98, "learning_rate": 3.0419245286865998e-05, "loss": 0.3859, "step": 43910 }, { "epoch": 861.0, "eval_loss": 0.39396482706069946, "eval_runtime": 2.2654, "eval_samples_per_second": 1005.99, "eval_steps_per_second": 3.973, "step": 43911 }, { "epoch": 861.18, "learning_rate": 3.038666214589148e-05, "loss": 0.3893, "step": 43920 }, { "epoch": 861.37, "learning_rate": 3.035409203101667e-05, "loss": 0.3916, "step": 43930 }, { "epoch": 861.57, "learning_rate": 3.032153495175126e-05, "loss": 0.3859, "step": 43940 }, { "epoch": 861.76, "learning_rate": 3.028899091760121e-05, "loss": 0.389, "step": 43950 }, { "epoch": 861.96, "learning_rate": 3.025645993806866e-05, "loss": 0.3896, "step": 43960 }, { "epoch": 862.0, "eval_loss": 0.39559245109558105, "eval_runtime": 2.3499, "eval_samples_per_second": 969.843, "eval_steps_per_second": 3.83, "step": 43962 }, { "epoch": 862.16, "learning_rate": 3.022394202265196e-05, "loss": 0.3863, "step": 43970 }, { "epoch": 862.35, "learning_rate": 3.019143718084559e-05, "loss": 0.3895, "step": 43980 }, { "epoch": 862.55, "learning_rate": 3.015894542214021e-05, "loss": 0.3908, "step": 43990 }, { "epoch": 862.75, "learning_rate": 3.0126466756022707e-05, "loss": 0.3892, "step": 44000 }, { "epoch": 862.94, "learning_rate": 3.0094001191976134e-05, "loss": 0.3897, "step": 44010 }, { "epoch": 863.0, "eval_loss": 0.3952128291130066, "eval_runtime": 2.2684, "eval_samples_per_second": 1004.679, "eval_steps_per_second": 3.968, "step": 44013 }, { "epoch": 863.14, "learning_rate": 3.0061548739479748e-05, "loss": 0.3845, "step": 44020 }, { "epoch": 863.33, "learning_rate": 3.0029109408008867e-05, "loss": 0.3908, "step": 44030 }, { "epoch": 863.53, "learning_rate": 2.9996683207035127e-05, "loss": 0.3871, "step": 44040 }, { "epoch": 863.73, "learning_rate": 2.9964270146026188e-05, "loss": 0.3866, "step": 44050 }, { "epoch": 863.92, "learning_rate": 2.993187023444597e-05, "loss": 0.385, "step": 44060 }, { "epoch": 864.0, "eval_loss": 0.3940654695034027, "eval_runtime": 2.3365, "eval_samples_per_second": 975.387, "eval_steps_per_second": 3.852, "step": 44064 }, { "epoch": 864.12, "learning_rate": 2.989948348175456e-05, "loss": 0.3915, "step": 44070 }, { "epoch": 864.31, "learning_rate": 2.98671098974081e-05, "loss": 0.389, "step": 44080 }, { "epoch": 864.51, "learning_rate": 2.983474949085902e-05, "loss": 0.3863, "step": 44090 }, { "epoch": 864.71, "learning_rate": 2.9802402271555775e-05, "loss": 0.3926, "step": 44100 }, { "epoch": 864.9, "learning_rate": 2.9770068248943062e-05, "loss": 0.3876, "step": 44110 }, { "epoch": 865.0, "eval_loss": 0.3937053680419922, "eval_runtime": 2.3814, "eval_samples_per_second": 956.993, "eval_steps_per_second": 3.779, "step": 44115 }, { "epoch": 865.1, "learning_rate": 2.973774743246173e-05, "loss": 0.3901, "step": 44120 }, { "epoch": 865.29, "learning_rate": 2.9705439831548672e-05, "loss": 0.3932, "step": 44130 }, { "epoch": 865.49, "learning_rate": 2.967314545563704e-05, "loss": 0.3903, "step": 44140 }, { "epoch": 865.69, "learning_rate": 2.9640864314156017e-05, "loss": 0.3917, "step": 44150 }, { "epoch": 865.88, "learning_rate": 2.9608596416531015e-05, "loss": 0.3889, "step": 44160 }, { "epoch": 866.0, "eval_loss": 0.39745286107063293, "eval_runtime": 2.2179, "eval_samples_per_second": 1027.526, "eval_steps_per_second": 4.058, "step": 44166 }, { "epoch": 866.08, "learning_rate": 2.957634177218353e-05, "loss": 0.3876, "step": 44170 }, { "epoch": 866.27, "learning_rate": 2.954410039053123e-05, "loss": 0.3854, "step": 44180 }, { "epoch": 866.47, "learning_rate": 2.951187228098785e-05, "loss": 0.3888, "step": 44190 }, { "epoch": 866.67, "learning_rate": 2.9479657452963253e-05, "loss": 0.3906, "step": 44200 }, { "epoch": 866.86, "learning_rate": 2.9447455915863477e-05, "loss": 0.3926, "step": 44210 }, { "epoch": 867.0, "eval_loss": 0.39533084630966187, "eval_runtime": 2.2146, "eval_samples_per_second": 1029.072, "eval_steps_per_second": 4.064, "step": 44217 }, { "epoch": 867.06, "learning_rate": 2.9415267679090657e-05, "loss": 0.3895, "step": 44220 }, { "epoch": 867.25, "learning_rate": 2.938309275204306e-05, "loss": 0.3908, "step": 44230 }, { "epoch": 867.45, "learning_rate": 2.9350931144115e-05, "loss": 0.3886, "step": 44240 }, { "epoch": 867.65, "learning_rate": 2.9318782864696995e-05, "loss": 0.385, "step": 44250 }, { "epoch": 867.84, "learning_rate": 2.928664792317558e-05, "loss": 0.3895, "step": 44260 }, { "epoch": 868.0, "eval_loss": 0.391824334859848, "eval_runtime": 2.2277, "eval_samples_per_second": 1023.015, "eval_steps_per_second": 4.04, "step": 44268 }, { "epoch": 868.04, "learning_rate": 2.925452632893346e-05, "loss": 0.3865, "step": 44270 }, { "epoch": 868.24, "learning_rate": 2.9222418091349463e-05, "loss": 0.3874, "step": 44280 }, { "epoch": 868.43, "learning_rate": 2.9190323219798413e-05, "loss": 0.3864, "step": 44290 }, { "epoch": 868.63, "learning_rate": 2.9158241723651357e-05, "loss": 0.3867, "step": 44300 }, { "epoch": 868.82, "learning_rate": 2.9126173612275315e-05, "loss": 0.3926, "step": 44310 }, { "epoch": 869.0, "eval_loss": 0.39258652925491333, "eval_runtime": 2.2004, "eval_samples_per_second": 1035.742, "eval_steps_per_second": 4.09, "step": 44319 }, { "epoch": 869.02, "learning_rate": 2.9094118895033494e-05, "loss": 0.3904, "step": 44320 }, { "epoch": 869.22, "learning_rate": 2.9062077581285187e-05, "loss": 0.3853, "step": 44330 }, { "epoch": 869.41, "learning_rate": 2.9030049680385685e-05, "loss": 0.3888, "step": 44340 }, { "epoch": 869.61, "learning_rate": 2.899803520168647e-05, "loss": 0.3925, "step": 44350 }, { "epoch": 869.8, "learning_rate": 2.8966034154535005e-05, "loss": 0.3892, "step": 44360 }, { "epoch": 870.0, "learning_rate": 2.893404654827491e-05, "loss": 0.3861, "step": 44370 }, { "epoch": 870.0, "eval_loss": 0.39331433176994324, "eval_runtime": 2.2363, "eval_samples_per_second": 1019.085, "eval_steps_per_second": 4.024, "step": 44370 }, { "epoch": 870.2, "learning_rate": 2.8902072392245856e-05, "loss": 0.3868, "step": 44380 }, { "epoch": 870.39, "learning_rate": 2.8870111695783603e-05, "loss": 0.3874, "step": 44390 }, { "epoch": 870.59, "learning_rate": 2.883816446821994e-05, "loss": 0.3876, "step": 44400 }, { "epoch": 870.78, "learning_rate": 2.880623071888271e-05, "loss": 0.386, "step": 44410 }, { "epoch": 870.98, "learning_rate": 2.87743104570959e-05, "loss": 0.3881, "step": 44420 }, { "epoch": 871.0, "eval_loss": 0.39406803250312805, "eval_runtime": 2.3068, "eval_samples_per_second": 987.965, "eval_steps_per_second": 3.902, "step": 44421 }, { "epoch": 871.18, "learning_rate": 2.87424036921795e-05, "loss": 0.3829, "step": 44430 }, { "epoch": 871.37, "learning_rate": 2.8710510433449598e-05, "loss": 0.3881, "step": 44440 }, { "epoch": 871.57, "learning_rate": 2.8678630690218274e-05, "loss": 0.3879, "step": 44450 }, { "epoch": 871.76, "learning_rate": 2.864676447179375e-05, "loss": 0.3864, "step": 44460 }, { "epoch": 871.96, "learning_rate": 2.8614911787480188e-05, "loss": 0.3863, "step": 44470 }, { "epoch": 872.0, "eval_loss": 0.3938988447189331, "eval_runtime": 2.3231, "eval_samples_per_second": 981.03, "eval_steps_per_second": 3.874, "step": 44472 }, { "epoch": 872.16, "learning_rate": 2.8583072646577905e-05, "loss": 0.3849, "step": 44480 }, { "epoch": 872.35, "learning_rate": 2.8551247058383234e-05, "loss": 0.3887, "step": 44490 }, { "epoch": 872.55, "learning_rate": 2.8519435032188488e-05, "loss": 0.3927, "step": 44500 }, { "epoch": 872.75, "learning_rate": 2.8487636577282115e-05, "loss": 0.3897, "step": 44510 }, { "epoch": 872.94, "learning_rate": 2.8455851702948522e-05, "loss": 0.3863, "step": 44520 }, { "epoch": 873.0, "eval_loss": 0.3912711441516876, "eval_runtime": 2.1836, "eval_samples_per_second": 1043.695, "eval_steps_per_second": 4.122, "step": 44523 }, { "epoch": 873.14, "learning_rate": 2.8424080418468184e-05, "loss": 0.392, "step": 44530 }, { "epoch": 873.33, "learning_rate": 2.8392322733117654e-05, "loss": 0.3873, "step": 44540 }, { "epoch": 873.53, "learning_rate": 2.83605786561694e-05, "loss": 0.3863, "step": 44550 }, { "epoch": 873.73, "learning_rate": 2.832884819689205e-05, "loss": 0.3924, "step": 44560 }, { "epoch": 873.92, "learning_rate": 2.8297131364550138e-05, "loss": 0.386, "step": 44570 }, { "epoch": 874.0, "eval_loss": 0.3918991982936859, "eval_runtime": 2.2676, "eval_samples_per_second": 1005.049, "eval_steps_per_second": 3.969, "step": 44574 }, { "epoch": 874.12, "learning_rate": 2.8265428168404287e-05, "loss": 0.3857, "step": 44580 }, { "epoch": 874.31, "learning_rate": 2.8233738617711158e-05, "loss": 0.3906, "step": 44590 }, { "epoch": 874.51, "learning_rate": 2.8202062721723325e-05, "loss": 0.3855, "step": 44600 }, { "epoch": 874.71, "learning_rate": 2.817040048968952e-05, "loss": 0.3865, "step": 44610 }, { "epoch": 874.9, "learning_rate": 2.8138751930854347e-05, "loss": 0.382, "step": 44620 }, { "epoch": 875.0, "eval_loss": 0.38788464665412903, "eval_runtime": 2.2983, "eval_samples_per_second": 991.581, "eval_steps_per_second": 3.916, "step": 44625 }, { "epoch": 875.1, "learning_rate": 2.8107117054458496e-05, "loss": 0.3885, "step": 44630 }, { "epoch": 875.29, "learning_rate": 2.8075495869738657e-05, "loss": 0.3876, "step": 44640 }, { "epoch": 875.49, "learning_rate": 2.8043888385927525e-05, "loss": 0.385, "step": 44650 }, { "epoch": 875.69, "learning_rate": 2.8012294612253767e-05, "loss": 0.3894, "step": 44660 }, { "epoch": 875.88, "learning_rate": 2.798071455794203e-05, "loss": 0.384, "step": 44670 }, { "epoch": 876.0, "eval_loss": 0.393778532743454, "eval_runtime": 2.3202, "eval_samples_per_second": 982.244, "eval_steps_per_second": 3.879, "step": 44676 }, { "epoch": 876.08, "learning_rate": 2.7949148232213006e-05, "loss": 0.387, "step": 44680 }, { "epoch": 876.27, "learning_rate": 2.7917595644283365e-05, "loss": 0.3904, "step": 44690 }, { "epoch": 876.47, "learning_rate": 2.7886056803365777e-05, "loss": 0.3851, "step": 44700 }, { "epoch": 876.67, "learning_rate": 2.7854531718668842e-05, "loss": 0.389, "step": 44710 }, { "epoch": 876.86, "learning_rate": 2.7823020399397213e-05, "loss": 0.3898, "step": 44720 }, { "epoch": 877.0, "eval_loss": 0.3949425220489502, "eval_runtime": 2.2739, "eval_samples_per_second": 1002.245, "eval_steps_per_second": 3.958, "step": 44727 }, { "epoch": 877.06, "learning_rate": 2.779152285475146e-05, "loss": 0.3867, "step": 44730 }, { "epoch": 877.25, "learning_rate": 2.776003909392819e-05, "loss": 0.384, "step": 44740 }, { "epoch": 877.45, "learning_rate": 2.7728569126119966e-05, "loss": 0.3847, "step": 44750 }, { "epoch": 877.65, "learning_rate": 2.7697112960515283e-05, "loss": 0.3864, "step": 44760 }, { "epoch": 877.84, "learning_rate": 2.7665670606298682e-05, "loss": 0.3913, "step": 44770 }, { "epoch": 878.0, "eval_loss": 0.3947102725505829, "eval_runtime": 2.3101, "eval_samples_per_second": 986.554, "eval_steps_per_second": 3.896, "step": 44778 }, { "epoch": 878.04, "learning_rate": 2.7634242072650577e-05, "loss": 0.3859, "step": 44780 }, { "epoch": 878.24, "learning_rate": 2.760282736874743e-05, "loss": 0.3849, "step": 44790 }, { "epoch": 878.43, "learning_rate": 2.7571426503761657e-05, "loss": 0.3884, "step": 44800 }, { "epoch": 878.63, "learning_rate": 2.754003948686156e-05, "loss": 0.3871, "step": 44810 }, { "epoch": 878.82, "learning_rate": 2.75086663272115e-05, "loss": 0.3859, "step": 44820 }, { "epoch": 879.0, "eval_loss": 0.3952098488807678, "eval_runtime": 2.2397, "eval_samples_per_second": 1017.564, "eval_steps_per_second": 4.018, "step": 44829 }, { "epoch": 879.02, "learning_rate": 2.7477307033971687e-05, "loss": 0.3867, "step": 44830 }, { "epoch": 879.22, "learning_rate": 2.744596161629836e-05, "loss": 0.3881, "step": 44840 }, { "epoch": 879.41, "learning_rate": 2.7414630083343687e-05, "loss": 0.3871, "step": 44850 }, { "epoch": 879.61, "learning_rate": 2.7383312444255793e-05, "loss": 0.383, "step": 44860 }, { "epoch": 879.8, "learning_rate": 2.7352008708178714e-05, "loss": 0.3874, "step": 44870 }, { "epoch": 880.0, "learning_rate": 2.7320718884252412e-05, "loss": 0.385, "step": 44880 }, { "epoch": 880.0, "eval_loss": 0.395025372505188, "eval_runtime": 2.3571, "eval_samples_per_second": 966.867, "eval_steps_per_second": 3.818, "step": 44880 }, { "epoch": 880.2, "learning_rate": 2.728944298161284e-05, "loss": 0.388, "step": 44890 }, { "epoch": 880.39, "learning_rate": 2.725818100939187e-05, "loss": 0.3887, "step": 44900 }, { "epoch": 880.59, "learning_rate": 2.7226932976717336e-05, "loss": 0.3882, "step": 44910 }, { "epoch": 880.78, "learning_rate": 2.7195698892712894e-05, "loss": 0.3846, "step": 44920 }, { "epoch": 880.98, "learning_rate": 2.716447876649826e-05, "loss": 0.3872, "step": 44930 }, { "epoch": 881.0, "eval_loss": 0.3877263069152832, "eval_runtime": 2.2028, "eval_samples_per_second": 1034.586, "eval_steps_per_second": 4.086, "step": 44931 }, { "epoch": 881.18, "learning_rate": 2.7133272607188975e-05, "loss": 0.3849, "step": 44940 }, { "epoch": 881.37, "learning_rate": 2.710208042389655e-05, "loss": 0.3876, "step": 44950 }, { "epoch": 881.57, "learning_rate": 2.707090222572844e-05, "loss": 0.3888, "step": 44960 }, { "epoch": 881.76, "learning_rate": 2.7039738021787926e-05, "loss": 0.3888, "step": 44970 }, { "epoch": 881.96, "learning_rate": 2.7008587821174328e-05, "loss": 0.383, "step": 44980 }, { "epoch": 882.0, "eval_loss": 0.3904988169670105, "eval_runtime": 2.2822, "eval_samples_per_second": 998.591, "eval_steps_per_second": 3.944, "step": 44982 }, { "epoch": 882.16, "learning_rate": 2.697745163298274e-05, "loss": 0.3868, "step": 44990 }, { "epoch": 882.35, "learning_rate": 2.6946329466304274e-05, "loss": 0.3821, "step": 45000 }, { "epoch": 882.55, "learning_rate": 2.691522133022593e-05, "loss": 0.3869, "step": 45010 }, { "epoch": 882.75, "learning_rate": 2.6884127233830533e-05, "loss": 0.3848, "step": 45020 }, { "epoch": 882.94, "learning_rate": 2.6853047186196924e-05, "loss": 0.387, "step": 45030 }, { "epoch": 883.0, "eval_loss": 0.39386025071144104, "eval_runtime": 2.2917, "eval_samples_per_second": 994.475, "eval_steps_per_second": 3.927, "step": 45033 }, { "epoch": 883.14, "learning_rate": 2.6821981196399727e-05, "loss": 0.3901, "step": 45040 }, { "epoch": 883.33, "learning_rate": 2.6790929273509545e-05, "loss": 0.3869, "step": 45050 }, { "epoch": 883.53, "learning_rate": 2.675989142659285e-05, "loss": 0.3875, "step": 45060 }, { "epoch": 883.73, "learning_rate": 2.6728867664712033e-05, "loss": 0.3818, "step": 45070 }, { "epoch": 883.92, "learning_rate": 2.66978579969253e-05, "loss": 0.3834, "step": 45080 }, { "epoch": 884.0, "eval_loss": 0.39473608136177063, "eval_runtime": 2.2587, "eval_samples_per_second": 1009.006, "eval_steps_per_second": 3.985, "step": 45084 }, { "epoch": 884.12, "learning_rate": 2.6666862432286758e-05, "loss": 0.3834, "step": 45090 }, { "epoch": 884.31, "learning_rate": 2.6635880979846462e-05, "loss": 0.3862, "step": 45100 }, { "epoch": 884.51, "learning_rate": 2.6604913648650295e-05, "loss": 0.3878, "step": 45110 }, { "epoch": 884.71, "learning_rate": 2.6573960447740055e-05, "loss": 0.3898, "step": 45120 }, { "epoch": 884.9, "learning_rate": 2.6543021386153322e-05, "loss": 0.3866, "step": 45130 }, { "epoch": 885.0, "eval_loss": 0.39346495270729065, "eval_runtime": 2.245, "eval_samples_per_second": 1015.159, "eval_steps_per_second": 4.009, "step": 45135 }, { "epoch": 885.1, "learning_rate": 2.651209647292368e-05, "loss": 0.3878, "step": 45140 }, { "epoch": 885.29, "learning_rate": 2.6481185717080457e-05, "loss": 0.3882, "step": 45150 }, { "epoch": 885.49, "learning_rate": 2.645028912764893e-05, "loss": 0.3869, "step": 45160 }, { "epoch": 885.69, "learning_rate": 2.6419406713650245e-05, "loss": 0.3878, "step": 45170 }, { "epoch": 885.88, "learning_rate": 2.638853848410132e-05, "loss": 0.3834, "step": 45180 }, { "epoch": 886.0, "eval_loss": 0.3925130069255829, "eval_runtime": 2.2463, "eval_samples_per_second": 1014.544, "eval_steps_per_second": 4.007, "step": 45186 }, { "epoch": 886.08, "learning_rate": 2.6357684448015038e-05, "loss": 0.3826, "step": 45190 }, { "epoch": 886.27, "learning_rate": 2.6326844614400038e-05, "loss": 0.387, "step": 45200 }, { "epoch": 886.47, "learning_rate": 2.6296018992260903e-05, "loss": 0.3832, "step": 45210 }, { "epoch": 886.67, "learning_rate": 2.626520759059804e-05, "loss": 0.3887, "step": 45220 }, { "epoch": 886.86, "learning_rate": 2.623441041840765e-05, "loss": 0.3848, "step": 45230 }, { "epoch": 887.0, "eval_loss": 0.3903357684612274, "eval_runtime": 2.3016, "eval_samples_per_second": 990.196, "eval_steps_per_second": 3.91, "step": 45237 }, { "epoch": 887.06, "learning_rate": 2.620362748468186e-05, "loss": 0.3878, "step": 45240 }, { "epoch": 887.25, "learning_rate": 2.6172858798408557e-05, "loss": 0.3854, "step": 45250 }, { "epoch": 887.45, "learning_rate": 2.6142104368571522e-05, "loss": 0.386, "step": 45260 }, { "epoch": 887.65, "learning_rate": 2.6111364204150414e-05, "loss": 0.3814, "step": 45270 }, { "epoch": 887.84, "learning_rate": 2.60806383141206e-05, "loss": 0.3896, "step": 45280 }, { "epoch": 888.0, "eval_loss": 0.39181816577911377, "eval_runtime": 2.3791, "eval_samples_per_second": 957.935, "eval_steps_per_second": 3.783, "step": 45288 }, { "epoch": 888.04, "learning_rate": 2.6049926707453428e-05, "loss": 0.3842, "step": 45290 }, { "epoch": 888.24, "learning_rate": 2.6019229393115935e-05, "loss": 0.389, "step": 45300 }, { "epoch": 888.43, "learning_rate": 2.5988546380071072e-05, "loss": 0.3812, "step": 45310 }, { "epoch": 888.63, "learning_rate": 2.5957877677277615e-05, "loss": 0.3885, "step": 45320 }, { "epoch": 888.82, "learning_rate": 2.592722329369016e-05, "loss": 0.3863, "step": 45330 }, { "epoch": 889.0, "eval_loss": 0.3879792094230652, "eval_runtime": 2.2596, "eval_samples_per_second": 1008.603, "eval_steps_per_second": 3.983, "step": 45339 }, { "epoch": 889.02, "learning_rate": 2.5896583238259064e-05, "loss": 0.3845, "step": 45340 }, { "epoch": 889.22, "learning_rate": 2.5865957519930526e-05, "loss": 0.3888, "step": 45350 }, { "epoch": 889.41, "learning_rate": 2.5835346147646597e-05, "loss": 0.3855, "step": 45360 }, { "epoch": 889.61, "learning_rate": 2.580474913034512e-05, "loss": 0.3849, "step": 45370 }, { "epoch": 889.8, "learning_rate": 2.5774166476959758e-05, "loss": 0.3828, "step": 45380 }, { "epoch": 890.0, "learning_rate": 2.574359819641992e-05, "loss": 0.384, "step": 45390 }, { "epoch": 890.0, "eval_loss": 0.388411283493042, "eval_runtime": 2.2402, "eval_samples_per_second": 1017.319, "eval_steps_per_second": 4.017, "step": 45390 }, { "epoch": 890.2, "learning_rate": 2.5713044297650904e-05, "loss": 0.3816, "step": 45400 }, { "epoch": 890.39, "learning_rate": 2.568250478957372e-05, "loss": 0.3868, "step": 45410 }, { "epoch": 890.59, "learning_rate": 2.5651979681105258e-05, "loss": 0.3859, "step": 45420 }, { "epoch": 890.78, "learning_rate": 2.562146898115819e-05, "loss": 0.3863, "step": 45430 }, { "epoch": 890.98, "learning_rate": 2.5590972698640892e-05, "loss": 0.3844, "step": 45440 }, { "epoch": 891.0, "eval_loss": 0.3906935751438141, "eval_runtime": 2.3035, "eval_samples_per_second": 989.365, "eval_steps_per_second": 3.907, "step": 45441 }, { "epoch": 891.18, "learning_rate": 2.5560490842457675e-05, "loss": 0.3852, "step": 45450 }, { "epoch": 891.37, "learning_rate": 2.553002342150849e-05, "loss": 0.3851, "step": 45460 }, { "epoch": 891.57, "learning_rate": 2.549957044468919e-05, "loss": 0.3863, "step": 45470 }, { "epoch": 891.76, "learning_rate": 2.546913192089137e-05, "loss": 0.3835, "step": 45480 }, { "epoch": 891.96, "learning_rate": 2.543870785900236e-05, "loss": 0.3863, "step": 45490 }, { "epoch": 892.0, "eval_loss": 0.39539283514022827, "eval_runtime": 2.2062, "eval_samples_per_second": 1032.976, "eval_steps_per_second": 4.079, "step": 45492 }, { "epoch": 892.16, "learning_rate": 2.5408298267905357e-05, "loss": 0.3857, "step": 45500 }, { "epoch": 892.35, "learning_rate": 2.5377903156479235e-05, "loss": 0.3898, "step": 45510 }, { "epoch": 892.55, "learning_rate": 2.5347522533598706e-05, "loss": 0.3868, "step": 45520 }, { "epoch": 892.75, "learning_rate": 2.531715640813424e-05, "loss": 0.3801, "step": 45530 }, { "epoch": 892.94, "learning_rate": 2.52868047889521e-05, "loss": 0.3872, "step": 45540 }, { "epoch": 893.0, "eval_loss": 0.3918585479259491, "eval_runtime": 2.1779, "eval_samples_per_second": 1046.398, "eval_steps_per_second": 4.132, "step": 45543 }, { "epoch": 893.14, "learning_rate": 2.525646768491424e-05, "loss": 0.385, "step": 45550 }, { "epoch": 893.33, "learning_rate": 2.5226145104878405e-05, "loss": 0.3884, "step": 45560 }, { "epoch": 893.53, "learning_rate": 2.5195837057698134e-05, "loss": 0.3824, "step": 45570 }, { "epoch": 893.73, "learning_rate": 2.5165543552222706e-05, "loss": 0.3824, "step": 45580 }, { "epoch": 893.92, "learning_rate": 2.5135264597297166e-05, "loss": 0.3869, "step": 45590 }, { "epoch": 894.0, "eval_loss": 0.39283615350723267, "eval_runtime": 2.2359, "eval_samples_per_second": 1019.291, "eval_steps_per_second": 4.025, "step": 45594 }, { "epoch": 894.12, "learning_rate": 2.5105000201762253e-05, "loss": 0.3907, "step": 45600 }, { "epoch": 894.31, "learning_rate": 2.5074750374454532e-05, "loss": 0.3855, "step": 45610 }, { "epoch": 894.51, "learning_rate": 2.504451512420624e-05, "loss": 0.383, "step": 45620 }, { "epoch": 894.71, "learning_rate": 2.5014294459845418e-05, "loss": 0.384, "step": 45630 }, { "epoch": 894.9, "learning_rate": 2.498408839019584e-05, "loss": 0.3801, "step": 45640 }, { "epoch": 895.0, "eval_loss": 0.39413610100746155, "eval_runtime": 2.344, "eval_samples_per_second": 972.252, "eval_steps_per_second": 3.84, "step": 45645 }, { "epoch": 895.1, "learning_rate": 2.4953896924076978e-05, "loss": 0.3843, "step": 45650 }, { "epoch": 895.29, "learning_rate": 2.4923720070304088e-05, "loss": 0.386, "step": 45660 }, { "epoch": 895.49, "learning_rate": 2.4893557837688108e-05, "loss": 0.3901, "step": 45670 }, { "epoch": 895.69, "learning_rate": 2.486341023503576e-05, "loss": 0.3842, "step": 45680 }, { "epoch": 895.88, "learning_rate": 2.4833277271149496e-05, "loss": 0.3832, "step": 45690 }, { "epoch": 896.0, "eval_loss": 0.39303308725357056, "eval_runtime": 2.2279, "eval_samples_per_second": 1022.934, "eval_steps_per_second": 4.04, "step": 45696 }, { "epoch": 896.08, "learning_rate": 2.480315895482742e-05, "loss": 0.3866, "step": 45700 }, { "epoch": 896.27, "learning_rate": 2.4773055294863443e-05, "loss": 0.3891, "step": 45710 }, { "epoch": 896.47, "learning_rate": 2.4742966300047138e-05, "loss": 0.3846, "step": 45720 }, { "epoch": 896.67, "learning_rate": 2.4712891979163826e-05, "loss": 0.3854, "step": 45730 }, { "epoch": 896.86, "learning_rate": 2.4682832340994544e-05, "loss": 0.3886, "step": 45740 }, { "epoch": 897.0, "eval_loss": 0.3933192193508148, "eval_runtime": 2.243, "eval_samples_per_second": 1016.036, "eval_steps_per_second": 4.012, "step": 45747 }, { "epoch": 897.06, "learning_rate": 2.4652787394316066e-05, "loss": 0.3919, "step": 45750 }, { "epoch": 897.25, "learning_rate": 2.4622757147900816e-05, "loss": 0.3843, "step": 45760 }, { "epoch": 897.45, "learning_rate": 2.459274161051693e-05, "loss": 0.3851, "step": 45770 }, { "epoch": 897.65, "learning_rate": 2.4562740790928304e-05, "loss": 0.3832, "step": 45780 }, { "epoch": 897.84, "learning_rate": 2.4532754697894512e-05, "loss": 0.3871, "step": 45790 }, { "epoch": 898.0, "eval_loss": 0.3916724920272827, "eval_runtime": 2.233, "eval_samples_per_second": 1020.585, "eval_steps_per_second": 4.03, "step": 45798 }, { "epoch": 898.04, "learning_rate": 2.4502783340170833e-05, "loss": 0.3837, "step": 45800 }, { "epoch": 898.24, "learning_rate": 2.4472826726508207e-05, "loss": 0.3866, "step": 45810 }, { "epoch": 898.43, "learning_rate": 2.4442884865653332e-05, "loss": 0.3846, "step": 45820 }, { "epoch": 898.63, "learning_rate": 2.4412957766348516e-05, "loss": 0.3798, "step": 45830 }, { "epoch": 898.82, "learning_rate": 2.4383045437331835e-05, "loss": 0.3892, "step": 45840 }, { "epoch": 899.0, "eval_loss": 0.39273956418037415, "eval_runtime": 2.3197, "eval_samples_per_second": 982.453, "eval_steps_per_second": 3.88, "step": 45849 }, { "epoch": 899.02, "learning_rate": 2.4353147887337042e-05, "loss": 0.3851, "step": 45850 }, { "epoch": 899.22, "learning_rate": 2.4323265125093507e-05, "loss": 0.386, "step": 45860 }, { "epoch": 899.41, "learning_rate": 2.4293397159326384e-05, "loss": 0.3838, "step": 45870 }, { "epoch": 899.61, "learning_rate": 2.4263543998756392e-05, "loss": 0.3802, "step": 45880 }, { "epoch": 899.8, "learning_rate": 2.4233705652100026e-05, "loss": 0.3867, "step": 45890 }, { "epoch": 900.0, "learning_rate": 2.420388212806943e-05, "loss": 0.3864, "step": 45900 }, { "epoch": 900.0, "eval_loss": 0.3934266269207001, "eval_runtime": 2.2188, "eval_samples_per_second": 1027.135, "eval_steps_per_second": 4.056, "step": 45900 }, { "epoch": 900.2, "learning_rate": 2.417407343537237e-05, "loss": 0.3864, "step": 45910 }, { "epoch": 900.39, "learning_rate": 2.4144279582712353e-05, "loss": 0.384, "step": 45920 }, { "epoch": 900.59, "learning_rate": 2.4114500578788486e-05, "loss": 0.3841, "step": 45930 }, { "epoch": 900.78, "learning_rate": 2.40847364322956e-05, "loss": 0.3825, "step": 45940 }, { "epoch": 900.98, "learning_rate": 2.405498715192415e-05, "loss": 0.3827, "step": 45950 }, { "epoch": 901.0, "eval_loss": 0.39162707328796387, "eval_runtime": 2.2278, "eval_samples_per_second": 1022.964, "eval_steps_per_second": 4.04, "step": 45951 }, { "epoch": 901.18, "learning_rate": 2.402525274636029e-05, "loss": 0.3816, "step": 45960 }, { "epoch": 901.37, "learning_rate": 2.3995533224285788e-05, "loss": 0.3832, "step": 45970 }, { "epoch": 901.57, "learning_rate": 2.3965828594378042e-05, "loss": 0.3892, "step": 45980 }, { "epoch": 901.76, "learning_rate": 2.3936138865310177e-05, "loss": 0.3815, "step": 45990 }, { "epoch": 901.96, "learning_rate": 2.3906464045750927e-05, "loss": 0.3838, "step": 46000 }, { "epoch": 902.0, "eval_loss": 0.3931758403778076, "eval_runtime": 2.2246, "eval_samples_per_second": 1024.454, "eval_steps_per_second": 4.046, "step": 46002 }, { "epoch": 902.16, "learning_rate": 2.387680414436471e-05, "loss": 0.3852, "step": 46010 }, { "epoch": 902.35, "learning_rate": 2.384715916981152e-05, "loss": 0.3842, "step": 46020 }, { "epoch": 902.55, "learning_rate": 2.3817529130747002e-05, "loss": 0.3843, "step": 46030 }, { "epoch": 902.75, "learning_rate": 2.3787914035822512e-05, "loss": 0.3856, "step": 46040 }, { "epoch": 902.94, "learning_rate": 2.3758313893684976e-05, "loss": 0.3859, "step": 46050 }, { "epoch": 903.0, "eval_loss": 0.3901480436325073, "eval_runtime": 2.2773, "eval_samples_per_second": 1000.738, "eval_steps_per_second": 3.952, "step": 46053 }, { "epoch": 903.14, "learning_rate": 2.3728728712977005e-05, "loss": 0.377, "step": 46060 }, { "epoch": 903.33, "learning_rate": 2.369915850233677e-05, "loss": 0.3878, "step": 46070 }, { "epoch": 903.53, "learning_rate": 2.366960327039815e-05, "loss": 0.3839, "step": 46080 }, { "epoch": 903.73, "learning_rate": 2.3640063025790577e-05, "loss": 0.3786, "step": 46090 }, { "epoch": 903.92, "learning_rate": 2.3610537777139165e-05, "loss": 0.382, "step": 46100 }, { "epoch": 904.0, "eval_loss": 0.3918239176273346, "eval_runtime": 2.2928, "eval_samples_per_second": 993.994, "eval_steps_per_second": 3.925, "step": 46104 }, { "epoch": 904.12, "learning_rate": 2.358102753306465e-05, "loss": 0.3819, "step": 46110 }, { "epoch": 904.31, "learning_rate": 2.3551532302183307e-05, "loss": 0.3855, "step": 46120 }, { "epoch": 904.51, "learning_rate": 2.3522052093107154e-05, "loss": 0.3824, "step": 46130 }, { "epoch": 904.71, "learning_rate": 2.3492586914443693e-05, "loss": 0.3829, "step": 46140 }, { "epoch": 904.9, "learning_rate": 2.346313677479613e-05, "loss": 0.3824, "step": 46150 }, { "epoch": 905.0, "eval_loss": 0.39389172196388245, "eval_runtime": 2.3228, "eval_samples_per_second": 981.125, "eval_steps_per_second": 3.875, "step": 46155 }, { "epoch": 905.1, "learning_rate": 2.3433701682763262e-05, "loss": 0.3865, "step": 46160 }, { "epoch": 905.29, "learning_rate": 2.3404281646939442e-05, "loss": 0.385, "step": 46170 }, { "epoch": 905.49, "learning_rate": 2.3374876675914704e-05, "loss": 0.386, "step": 46180 }, { "epoch": 905.69, "learning_rate": 2.3345486778274604e-05, "loss": 0.386, "step": 46190 }, { "epoch": 905.88, "learning_rate": 2.331611196260036e-05, "loss": 0.3799, "step": 46200 }, { "epoch": 906.0, "eval_loss": 0.390666663646698, "eval_runtime": 2.2415, "eval_samples_per_second": 1016.712, "eval_steps_per_second": 4.015, "step": 46206 }, { "epoch": 906.08, "learning_rate": 2.328675223746876e-05, "loss": 0.3824, "step": 46210 }, { "epoch": 906.27, "learning_rate": 2.3257407611452215e-05, "loss": 0.3818, "step": 46220 }, { "epoch": 906.47, "learning_rate": 2.322807809311867e-05, "loss": 0.3869, "step": 46230 }, { "epoch": 906.67, "learning_rate": 2.3198763691031675e-05, "loss": 0.3806, "step": 46240 }, { "epoch": 906.86, "learning_rate": 2.31694644137504e-05, "loss": 0.3851, "step": 46250 }, { "epoch": 907.0, "eval_loss": 0.38907137513160706, "eval_runtime": 2.3737, "eval_samples_per_second": 960.124, "eval_steps_per_second": 3.792, "step": 46257 }, { "epoch": 907.06, "learning_rate": 2.3140180269829587e-05, "loss": 0.3824, "step": 46260 }, { "epoch": 907.25, "learning_rate": 2.311091126781957e-05, "loss": 0.3862, "step": 46270 }, { "epoch": 907.45, "learning_rate": 2.3081657416266202e-05, "loss": 0.3848, "step": 46280 }, { "epoch": 907.65, "learning_rate": 2.3052418723711e-05, "loss": 0.3833, "step": 46290 }, { "epoch": 907.84, "learning_rate": 2.3023195198690968e-05, "loss": 0.3854, "step": 46300 }, { "epoch": 908.0, "eval_loss": 0.38849303126335144, "eval_runtime": 2.3757, "eval_samples_per_second": 959.284, "eval_steps_per_second": 3.788, "step": 46308 }, { "epoch": 908.04, "learning_rate": 2.2993986849738735e-05, "loss": 0.3819, "step": 46310 }, { "epoch": 908.24, "learning_rate": 2.2964793685382518e-05, "loss": 0.384, "step": 46320 }, { "epoch": 908.43, "learning_rate": 2.293561571414603e-05, "loss": 0.3822, "step": 46330 }, { "epoch": 908.63, "learning_rate": 2.2906452944548622e-05, "loss": 0.381, "step": 46340 }, { "epoch": 908.82, "learning_rate": 2.2877305385105134e-05, "loss": 0.3855, "step": 46350 }, { "epoch": 909.0, "eval_loss": 0.3911910653114319, "eval_runtime": 2.2359, "eval_samples_per_second": 1019.275, "eval_steps_per_second": 4.025, "step": 46359 }, { "epoch": 909.02, "learning_rate": 2.2848173044326036e-05, "loss": 0.3796, "step": 46360 }, { "epoch": 909.22, "learning_rate": 2.2819055930717316e-05, "loss": 0.3814, "step": 46370 }, { "epoch": 909.41, "learning_rate": 2.2789954052780508e-05, "loss": 0.3793, "step": 46380 }, { "epoch": 909.61, "learning_rate": 2.2760867419012732e-05, "loss": 0.3837, "step": 46390 }, { "epoch": 909.8, "learning_rate": 2.273179603790661e-05, "loss": 0.3862, "step": 46400 }, { "epoch": 910.0, "learning_rate": 2.2702739917950342e-05, "loss": 0.3855, "step": 46410 }, { "epoch": 910.0, "eval_loss": 0.3912041187286377, "eval_runtime": 2.2617, "eval_samples_per_second": 1007.639, "eval_steps_per_second": 3.979, "step": 46410 }, { "epoch": 910.2, "learning_rate": 2.267369906762768e-05, "loss": 0.3839, "step": 46420 }, { "epoch": 910.39, "learning_rate": 2.2644673495417922e-05, "loss": 0.3809, "step": 46430 }, { "epoch": 910.59, "learning_rate": 2.261566320979587e-05, "loss": 0.3842, "step": 46440 }, { "epoch": 910.78, "learning_rate": 2.2586668219231847e-05, "loss": 0.3804, "step": 46450 }, { "epoch": 910.98, "learning_rate": 2.255768853219178e-05, "loss": 0.3799, "step": 46460 }, { "epoch": 911.0, "eval_loss": 0.38822638988494873, "eval_runtime": 2.2355, "eval_samples_per_second": 1019.442, "eval_steps_per_second": 4.026, "step": 46461 }, { "epoch": 911.18, "learning_rate": 2.2528724157137082e-05, "loss": 0.3793, "step": 46470 }, { "epoch": 911.37, "learning_rate": 2.2499775102524725e-05, "loss": 0.3807, "step": 46480 }, { "epoch": 911.57, "learning_rate": 2.2470841376807154e-05, "loss": 0.3854, "step": 46490 }, { "epoch": 911.76, "learning_rate": 2.2441922988432405e-05, "loss": 0.3827, "step": 46500 }, { "epoch": 911.96, "learning_rate": 2.2413019945843964e-05, "loss": 0.387, "step": 46510 }, { "epoch": 912.0, "eval_loss": 0.3894227147102356, "eval_runtime": 2.3651, "eval_samples_per_second": 963.582, "eval_steps_per_second": 3.805, "step": 46512 }, { "epoch": 912.16, "learning_rate": 2.2384132257480898e-05, "loss": 0.384, "step": 46520 }, { "epoch": 912.35, "learning_rate": 2.2355259931777784e-05, "loss": 0.387, "step": 46530 }, { "epoch": 912.55, "learning_rate": 2.2326402977164658e-05, "loss": 0.3837, "step": 46540 }, { "epoch": 912.75, "learning_rate": 2.2297561402067148e-05, "loss": 0.3859, "step": 46550 }, { "epoch": 912.94, "learning_rate": 2.226873521490631e-05, "loss": 0.3792, "step": 46560 }, { "epoch": 913.0, "eval_loss": 0.3886968791484833, "eval_runtime": 2.3469, "eval_samples_per_second": 971.078, "eval_steps_per_second": 3.835, "step": 46563 }, { "epoch": 913.14, "learning_rate": 2.223992442409876e-05, "loss": 0.3861, "step": 46570 }, { "epoch": 913.33, "learning_rate": 2.2211129038056646e-05, "loss": 0.3809, "step": 46580 }, { "epoch": 913.53, "learning_rate": 2.218234906518752e-05, "loss": 0.3852, "step": 46590 }, { "epoch": 913.73, "learning_rate": 2.2153584513894547e-05, "loss": 0.3826, "step": 46600 }, { "epoch": 913.92, "learning_rate": 2.2124835392576275e-05, "loss": 0.3831, "step": 46610 }, { "epoch": 914.0, "eval_loss": 0.3874710500240326, "eval_runtime": 2.2984, "eval_samples_per_second": 991.568, "eval_steps_per_second": 3.916, "step": 46614 }, { "epoch": 914.12, "learning_rate": 2.209610170962685e-05, "loss": 0.3827, "step": 46620 }, { "epoch": 914.31, "learning_rate": 2.2067383473435844e-05, "loss": 0.3832, "step": 46630 }, { "epoch": 914.51, "learning_rate": 2.203868069238838e-05, "loss": 0.385, "step": 46640 }, { "epoch": 914.71, "learning_rate": 2.2009993374864997e-05, "loss": 0.3857, "step": 46650 }, { "epoch": 914.9, "learning_rate": 2.198132152924173e-05, "loss": 0.3821, "step": 46660 }, { "epoch": 915.0, "eval_loss": 0.38625603914260864, "eval_runtime": 2.2102, "eval_samples_per_second": 1031.139, "eval_steps_per_second": 4.072, "step": 46665 }, { "epoch": 915.1, "learning_rate": 2.195266516389015e-05, "loss": 0.381, "step": 46670 }, { "epoch": 915.29, "learning_rate": 2.192402428717728e-05, "loss": 0.3842, "step": 46680 }, { "epoch": 915.49, "learning_rate": 2.189539890746562e-05, "loss": 0.381, "step": 46690 }, { "epoch": 915.69, "learning_rate": 2.1866789033113142e-05, "loss": 0.3763, "step": 46700 }, { "epoch": 915.88, "learning_rate": 2.1838194672473254e-05, "loss": 0.3853, "step": 46710 }, { "epoch": 916.0, "eval_loss": 0.3884444534778595, "eval_runtime": 2.358, "eval_samples_per_second": 966.491, "eval_steps_per_second": 3.817, "step": 46716 }, { "epoch": 916.08, "learning_rate": 2.1809615833894893e-05, "loss": 0.38, "step": 46720 }, { "epoch": 916.27, "learning_rate": 2.178105252572245e-05, "loss": 0.3845, "step": 46730 }, { "epoch": 916.47, "learning_rate": 2.17525047562958e-05, "loss": 0.3847, "step": 46740 }, { "epoch": 916.67, "learning_rate": 2.1723972533950197e-05, "loss": 0.3907, "step": 46750 }, { "epoch": 916.86, "learning_rate": 2.1695455867016466e-05, "loss": 0.381, "step": 46760 }, { "epoch": 917.0, "eval_loss": 0.38729578256607056, "eval_runtime": 2.3567, "eval_samples_per_second": 967.025, "eval_steps_per_second": 3.819, "step": 46767 }, { "epoch": 917.06, "learning_rate": 2.1666954763820795e-05, "loss": 0.3843, "step": 46770 }, { "epoch": 917.25, "learning_rate": 2.1638469232684892e-05, "loss": 0.3812, "step": 46780 }, { "epoch": 917.45, "learning_rate": 2.1609999281925916e-05, "loss": 0.3827, "step": 46790 }, { "epoch": 917.65, "learning_rate": 2.1581544919856415e-05, "loss": 0.3857, "step": 46800 }, { "epoch": 917.84, "learning_rate": 2.1553106154784482e-05, "loss": 0.3847, "step": 46810 }, { "epoch": 918.0, "eval_loss": 0.3849899470806122, "eval_runtime": 2.2047, "eval_samples_per_second": 1033.685, "eval_steps_per_second": 4.082, "step": 46818 }, { "epoch": 918.04, "learning_rate": 2.152468299501353e-05, "loss": 0.3817, "step": 46820 }, { "epoch": 918.24, "learning_rate": 2.1496275448842536e-05, "loss": 0.3891, "step": 46830 }, { "epoch": 918.43, "learning_rate": 2.1467883524565886e-05, "loss": 0.3834, "step": 46840 }, { "epoch": 918.63, "learning_rate": 2.1439507230473345e-05, "loss": 0.3803, "step": 46850 }, { "epoch": 918.82, "learning_rate": 2.14111465748502e-05, "loss": 0.3813, "step": 46860 }, { "epoch": 919.0, "eval_loss": 0.387481153011322, "eval_runtime": 2.1882, "eval_samples_per_second": 1041.495, "eval_steps_per_second": 4.113, "step": 46869 }, { "epoch": 919.02, "learning_rate": 2.1382801565977082e-05, "loss": 0.3762, "step": 46870 }, { "epoch": 919.22, "learning_rate": 2.135447221213013e-05, "loss": 0.3835, "step": 46880 }, { "epoch": 919.41, "learning_rate": 2.1326158521580874e-05, "loss": 0.3842, "step": 46890 }, { "epoch": 919.61, "learning_rate": 2.129786050259632e-05, "loss": 0.3819, "step": 46900 }, { "epoch": 919.8, "learning_rate": 2.1269578163438812e-05, "loss": 0.3833, "step": 46910 }, { "epoch": 920.0, "learning_rate": 2.1241311512366167e-05, "loss": 0.3853, "step": 46920 }, { "epoch": 920.0, "eval_loss": 0.3859827518463135, "eval_runtime": 2.2216, "eval_samples_per_second": 1025.821, "eval_steps_per_second": 4.051, "step": 46920 }, { "epoch": 920.2, "learning_rate": 2.1213060557631614e-05, "loss": 0.3855, "step": 46930 }, { "epoch": 920.39, "learning_rate": 2.1184825307483818e-05, "loss": 0.3855, "step": 46940 }, { "epoch": 920.59, "learning_rate": 2.115660577016686e-05, "loss": 0.3826, "step": 46950 }, { "epoch": 920.78, "learning_rate": 2.1128401953920172e-05, "loss": 0.3822, "step": 46960 }, { "epoch": 920.98, "learning_rate": 2.1100213866978683e-05, "loss": 0.3849, "step": 46970 }, { "epoch": 921.0, "eval_loss": 0.38799986243247986, "eval_runtime": 2.175, "eval_samples_per_second": 1047.835, "eval_steps_per_second": 4.138, "step": 46971 }, { "epoch": 921.18, "learning_rate": 2.1072041517572635e-05, "loss": 0.3817, "step": 46980 }, { "epoch": 921.37, "learning_rate": 2.1043884913927757e-05, "loss": 0.3842, "step": 46990 }, { "epoch": 921.57, "learning_rate": 2.1015744064265165e-05, "loss": 0.3846, "step": 47000 }, { "epoch": 921.76, "learning_rate": 2.098761897680132e-05, "loss": 0.3848, "step": 47010 }, { "epoch": 921.96, "learning_rate": 2.095950965974817e-05, "loss": 0.3771, "step": 47020 }, { "epoch": 922.0, "eval_loss": 0.3890962600708008, "eval_runtime": 2.3192, "eval_samples_per_second": 982.67, "eval_steps_per_second": 3.881, "step": 47022 }, { "epoch": 922.16, "learning_rate": 2.0931416121312948e-05, "loss": 0.3789, "step": 47030 }, { "epoch": 922.35, "learning_rate": 2.0903338369698376e-05, "loss": 0.3837, "step": 47040 }, { "epoch": 922.55, "learning_rate": 2.0875276413102553e-05, "loss": 0.3806, "step": 47050 }, { "epoch": 922.75, "learning_rate": 2.084723025971889e-05, "loss": 0.3833, "step": 47060 }, { "epoch": 922.94, "learning_rate": 2.0819199917736294e-05, "loss": 0.3815, "step": 47070 }, { "epoch": 923.0, "eval_loss": 0.3886687457561493, "eval_runtime": 2.278, "eval_samples_per_second": 1000.437, "eval_steps_per_second": 3.951, "step": 47073 }, { "epoch": 923.14, "learning_rate": 2.0791185395338944e-05, "loss": 0.3829, "step": 47080 }, { "epoch": 923.33, "learning_rate": 2.076318670070649e-05, "loss": 0.3815, "step": 47090 }, { "epoch": 923.53, "learning_rate": 2.0735203842013924e-05, "loss": 0.384, "step": 47100 }, { "epoch": 923.73, "learning_rate": 2.0707236827431635e-05, "loss": 0.3862, "step": 47110 }, { "epoch": 923.92, "learning_rate": 2.0679285665125343e-05, "loss": 0.3827, "step": 47120 }, { "epoch": 924.0, "eval_loss": 0.39016667008399963, "eval_runtime": 2.192, "eval_samples_per_second": 1039.686, "eval_steps_per_second": 4.106, "step": 47124 }, { "epoch": 924.12, "learning_rate": 2.0651350363256144e-05, "loss": 0.3817, "step": 47130 }, { "epoch": 924.31, "learning_rate": 2.062343092998055e-05, "loss": 0.379, "step": 47140 }, { "epoch": 924.51, "learning_rate": 2.0595527373450406e-05, "loss": 0.3816, "step": 47150 }, { "epoch": 924.71, "learning_rate": 2.0567639701812956e-05, "loss": 0.3834, "step": 47160 }, { "epoch": 924.9, "learning_rate": 2.0539767923210733e-05, "loss": 0.3828, "step": 47170 }, { "epoch": 925.0, "eval_loss": 0.39003047347068787, "eval_runtime": 2.2931, "eval_samples_per_second": 993.858, "eval_steps_per_second": 3.925, "step": 47175 }, { "epoch": 925.1, "learning_rate": 2.0511912045781716e-05, "loss": 0.3807, "step": 47180 }, { "epoch": 925.29, "learning_rate": 2.0484072077659158e-05, "loss": 0.3796, "step": 47190 }, { "epoch": 925.49, "learning_rate": 2.045624802697173e-05, "loss": 0.3844, "step": 47200 }, { "epoch": 925.69, "learning_rate": 2.0428439901843452e-05, "loss": 0.3805, "step": 47210 }, { "epoch": 925.88, "learning_rate": 2.0400647710393635e-05, "loss": 0.3861, "step": 47220 }, { "epoch": 926.0, "eval_loss": 0.39150747656822205, "eval_runtime": 2.2942, "eval_samples_per_second": 993.379, "eval_steps_per_second": 3.923, "step": 47226 }, { "epoch": 926.08, "learning_rate": 2.037287146073703e-05, "loss": 0.3812, "step": 47230 }, { "epoch": 926.27, "learning_rate": 2.0345111160983632e-05, "loss": 0.3801, "step": 47240 }, { "epoch": 926.47, "learning_rate": 2.0317366819238855e-05, "loss": 0.3837, "step": 47250 }, { "epoch": 926.67, "learning_rate": 2.0289638443603447e-05, "loss": 0.3836, "step": 47260 }, { "epoch": 926.86, "learning_rate": 2.0261926042173433e-05, "loss": 0.383, "step": 47270 }, { "epoch": 927.0, "eval_loss": 0.39107587933540344, "eval_runtime": 2.2238, "eval_samples_per_second": 1024.819, "eval_steps_per_second": 4.047, "step": 47277 }, { "epoch": 927.06, "learning_rate": 2.023422962304026e-05, "loss": 0.3848, "step": 47280 }, { "epoch": 927.25, "learning_rate": 2.0206549194290613e-05, "loss": 0.3828, "step": 47290 }, { "epoch": 927.45, "learning_rate": 2.0178884764006595e-05, "loss": 0.3804, "step": 47300 }, { "epoch": 927.65, "learning_rate": 2.0151236340265593e-05, "loss": 0.3857, "step": 47310 }, { "epoch": 927.84, "learning_rate": 2.0123603931140354e-05, "loss": 0.3785, "step": 47320 }, { "epoch": 928.0, "eval_loss": 0.3836647570133209, "eval_runtime": 2.3837, "eval_samples_per_second": 956.084, "eval_steps_per_second": 3.776, "step": 47328 }, { "epoch": 928.04, "learning_rate": 2.0095987544698916e-05, "loss": 0.3847, "step": 47330 }, { "epoch": 928.24, "learning_rate": 2.0068387189004604e-05, "loss": 0.3833, "step": 47340 }, { "epoch": 928.43, "learning_rate": 2.004080287211614e-05, "loss": 0.3813, "step": 47350 }, { "epoch": 928.63, "learning_rate": 2.0013234602087526e-05, "loss": 0.3792, "step": 47360 }, { "epoch": 928.82, "learning_rate": 1.998568238696811e-05, "loss": 0.3825, "step": 47370 }, { "epoch": 929.0, "eval_loss": 0.3878856301307678, "eval_runtime": 2.2175, "eval_samples_per_second": 1027.725, "eval_steps_per_second": 4.059, "step": 47379 }, { "epoch": 929.02, "learning_rate": 1.9958146234802504e-05, "loss": 0.3804, "step": 47380 }, { "epoch": 929.22, "learning_rate": 1.9930626153630614e-05, "loss": 0.3808, "step": 47390 }, { "epoch": 929.41, "learning_rate": 1.9903122151487725e-05, "loss": 0.3789, "step": 47400 }, { "epoch": 929.61, "learning_rate": 1.9875634236404388e-05, "loss": 0.382, "step": 47410 }, { "epoch": 929.8, "learning_rate": 1.984816241640648e-05, "loss": 0.3786, "step": 47420 }, { "epoch": 930.0, "learning_rate": 1.982070669951513e-05, "loss": 0.3793, "step": 47430 }, { "epoch": 930.0, "eval_loss": 0.39214441180229187, "eval_runtime": 2.3109, "eval_samples_per_second": 986.177, "eval_steps_per_second": 3.895, "step": 47430 }, { "epoch": 930.2, "learning_rate": 1.9793267093746814e-05, "loss": 0.3809, "step": 47440 }, { "epoch": 930.39, "learning_rate": 1.976584360711326e-05, "loss": 0.3851, "step": 47450 }, { "epoch": 930.59, "learning_rate": 1.9738436247621536e-05, "loss": 0.3829, "step": 47460 }, { "epoch": 930.78, "learning_rate": 1.971104502327399e-05, "loss": 0.3799, "step": 47470 }, { "epoch": 930.98, "learning_rate": 1.968366994206822e-05, "loss": 0.3836, "step": 47480 }, { "epoch": 931.0, "eval_loss": 0.38933873176574707, "eval_runtime": 2.3912, "eval_samples_per_second": 953.091, "eval_steps_per_second": 3.764, "step": 47481 }, { "epoch": 931.18, "learning_rate": 1.9656311011997168e-05, "loss": 0.3799, "step": 47490 }, { "epoch": 931.37, "learning_rate": 1.9628968241049e-05, "loss": 0.3838, "step": 47500 }, { "epoch": 931.57, "learning_rate": 1.9601641637207204e-05, "loss": 0.3878, "step": 47510 }, { "epoch": 931.76, "learning_rate": 1.9574331208450575e-05, "loss": 0.3797, "step": 47520 }, { "epoch": 931.96, "learning_rate": 1.9547036962753097e-05, "loss": 0.3858, "step": 47530 }, { "epoch": 932.0, "eval_loss": 0.38738271594047546, "eval_runtime": 2.2401, "eval_samples_per_second": 1017.363, "eval_steps_per_second": 4.018, "step": 47532 }, { "epoch": 932.16, "learning_rate": 1.9519758908084132e-05, "loss": 0.3813, "step": 47540 }, { "epoch": 932.35, "learning_rate": 1.9492497052408204e-05, "loss": 0.3814, "step": 47550 }, { "epoch": 932.55, "learning_rate": 1.9465251403685207e-05, "loss": 0.3827, "step": 47560 }, { "epoch": 932.75, "learning_rate": 1.9438021969870248e-05, "loss": 0.3792, "step": 47570 }, { "epoch": 932.94, "learning_rate": 1.9410808758913747e-05, "loss": 0.387, "step": 47580 }, { "epoch": 933.0, "eval_loss": 0.3881475627422333, "eval_runtime": 2.3304, "eval_samples_per_second": 977.924, "eval_steps_per_second": 3.862, "step": 47583 }, { "epoch": 933.14, "learning_rate": 1.938361177876133e-05, "loss": 0.3814, "step": 47590 }, { "epoch": 933.33, "learning_rate": 1.935643103735389e-05, "loss": 0.3805, "step": 47600 }, { "epoch": 933.53, "learning_rate": 1.9329266542627614e-05, "loss": 0.3807, "step": 47610 }, { "epoch": 933.73, "learning_rate": 1.9302118302513926e-05, "loss": 0.3796, "step": 47620 }, { "epoch": 933.92, "learning_rate": 1.927498632493953e-05, "loss": 0.3855, "step": 47630 }, { "epoch": 934.0, "eval_loss": 0.3862844705581665, "eval_runtime": 2.2897, "eval_samples_per_second": 995.32, "eval_steps_per_second": 3.931, "step": 47634 }, { "epoch": 934.12, "learning_rate": 1.9247870617826323e-05, "loss": 0.3807, "step": 47640 }, { "epoch": 934.31, "learning_rate": 1.9220771189091515e-05, "loss": 0.3788, "step": 47650 }, { "epoch": 934.51, "learning_rate": 1.919368804664751e-05, "loss": 0.3823, "step": 47660 }, { "epoch": 934.71, "learning_rate": 1.9166621198401992e-05, "loss": 0.3811, "step": 47670 }, { "epoch": 934.9, "learning_rate": 1.9139570652257897e-05, "loss": 0.3813, "step": 47680 }, { "epoch": 935.0, "eval_loss": 0.38326093554496765, "eval_runtime": 2.2607, "eval_samples_per_second": 1008.081, "eval_steps_per_second": 3.981, "step": 47685 }, { "epoch": 935.1, "learning_rate": 1.911253641611334e-05, "loss": 0.379, "step": 47690 }, { "epoch": 935.29, "learning_rate": 1.9085518497861766e-05, "loss": 0.3842, "step": 47700 }, { "epoch": 935.49, "learning_rate": 1.9058516905391757e-05, "loss": 0.378, "step": 47710 }, { "epoch": 935.69, "learning_rate": 1.9031531646587185e-05, "loss": 0.3811, "step": 47720 }, { "epoch": 935.88, "learning_rate": 1.9004562729327182e-05, "loss": 0.3787, "step": 47730 }, { "epoch": 936.0, "eval_loss": 0.387604683637619, "eval_runtime": 2.2353, "eval_samples_per_second": 1019.552, "eval_steps_per_second": 4.026, "step": 47736 }, { "epoch": 936.08, "learning_rate": 1.897761016148602e-05, "loss": 0.3787, "step": 47740 }, { "epoch": 936.27, "learning_rate": 1.8950673950933296e-05, "loss": 0.3826, "step": 47750 }, { "epoch": 936.47, "learning_rate": 1.8923754105533733e-05, "loss": 0.3797, "step": 47760 }, { "epoch": 936.67, "learning_rate": 1.889685063314734e-05, "loss": 0.3848, "step": 47770 }, { "epoch": 936.86, "learning_rate": 1.8869963541629353e-05, "loss": 0.3834, "step": 47780 }, { "epoch": 937.0, "eval_loss": 0.38703593611717224, "eval_runtime": 2.28, "eval_samples_per_second": 999.577, "eval_steps_per_second": 3.947, "step": 47787 }, { "epoch": 937.06, "learning_rate": 1.8843092838830206e-05, "loss": 0.381, "step": 47790 }, { "epoch": 937.25, "learning_rate": 1.8816238532595532e-05, "loss": 0.3802, "step": 47800 }, { "epoch": 937.45, "learning_rate": 1.8789400630766168e-05, "loss": 0.3742, "step": 47810 }, { "epoch": 937.65, "learning_rate": 1.8762579141178198e-05, "loss": 0.3801, "step": 47820 }, { "epoch": 937.84, "learning_rate": 1.87357740716629e-05, "loss": 0.3807, "step": 47830 }, { "epoch": 938.0, "eval_loss": 0.3838607668876648, "eval_runtime": 2.2246, "eval_samples_per_second": 1024.454, "eval_steps_per_second": 4.046, "step": 47838 }, { "epoch": 938.04, "learning_rate": 1.8708985430046785e-05, "loss": 0.3805, "step": 47840 }, { "epoch": 938.24, "learning_rate": 1.868221322415149e-05, "loss": 0.3789, "step": 47850 }, { "epoch": 938.43, "learning_rate": 1.8655457461793947e-05, "loss": 0.3759, "step": 47860 }, { "epoch": 938.63, "learning_rate": 1.8628718150786196e-05, "loss": 0.3823, "step": 47870 }, { "epoch": 938.82, "learning_rate": 1.8601995298935548e-05, "loss": 0.3788, "step": 47880 }, { "epoch": 939.0, "eval_loss": 0.3863469660282135, "eval_runtime": 2.3287, "eval_samples_per_second": 978.651, "eval_steps_per_second": 3.865, "step": 47889 }, { "epoch": 939.02, "learning_rate": 1.8575288914044497e-05, "loss": 0.3787, "step": 47890 }, { "epoch": 939.22, "learning_rate": 1.8548599003910664e-05, "loss": 0.3816, "step": 47900 }, { "epoch": 939.41, "learning_rate": 1.8521925576326955e-05, "loss": 0.3785, "step": 47910 }, { "epoch": 939.61, "learning_rate": 1.8495268639081373e-05, "loss": 0.384, "step": 47920 }, { "epoch": 939.8, "learning_rate": 1.846862819995718e-05, "loss": 0.3861, "step": 47930 }, { "epoch": 940.0, "learning_rate": 1.8442004266732787e-05, "loss": 0.3788, "step": 47940 }, { "epoch": 940.0, "eval_loss": 0.38470685482025146, "eval_runtime": 2.281, "eval_samples_per_second": 999.111, "eval_steps_per_second": 3.946, "step": 47940 }, { "epoch": 940.2, "learning_rate": 1.8415396847181766e-05, "loss": 0.3786, "step": 47950 }, { "epoch": 940.39, "learning_rate": 1.838880594907294e-05, "loss": 0.3824, "step": 47960 }, { "epoch": 940.59, "learning_rate": 1.8362231580170186e-05, "loss": 0.3843, "step": 47970 }, { "epoch": 940.78, "learning_rate": 1.8335673748232674e-05, "loss": 0.3859, "step": 47980 }, { "epoch": 940.98, "learning_rate": 1.8309132461014688e-05, "loss": 0.3819, "step": 47990 }, { "epoch": 941.0, "eval_loss": 0.3876339793205261, "eval_runtime": 2.2305, "eval_samples_per_second": 1021.726, "eval_steps_per_second": 4.035, "step": 47991 }, { "epoch": 941.18, "learning_rate": 1.8282607726265716e-05, "loss": 0.3788, "step": 48000 }, { "epoch": 941.37, "learning_rate": 1.825609955173037e-05, "loss": 0.3834, "step": 48010 }, { "epoch": 941.57, "learning_rate": 1.822960794514842e-05, "loss": 0.3777, "step": 48020 }, { "epoch": 941.76, "learning_rate": 1.8203132914254847e-05, "loss": 0.3827, "step": 48030 }, { "epoch": 941.96, "learning_rate": 1.817667446677977e-05, "loss": 0.3814, "step": 48040 }, { "epoch": 942.0, "eval_loss": 0.38454288244247437, "eval_runtime": 2.3427, "eval_samples_per_second": 972.806, "eval_steps_per_second": 3.842, "step": 48042 }, { "epoch": 942.16, "learning_rate": 1.8150232610448492e-05, "loss": 0.3836, "step": 48050 }, { "epoch": 942.35, "learning_rate": 1.812380735298139e-05, "loss": 0.3805, "step": 48060 }, { "epoch": 942.55, "learning_rate": 1.8097398702094106e-05, "loss": 0.3773, "step": 48070 }, { "epoch": 942.75, "learning_rate": 1.8071006665497327e-05, "loss": 0.3781, "step": 48080 }, { "epoch": 942.94, "learning_rate": 1.8044631250896958e-05, "loss": 0.3817, "step": 48090 }, { "epoch": 943.0, "eval_loss": 0.3829639256000519, "eval_runtime": 2.3281, "eval_samples_per_second": 978.891, "eval_steps_per_second": 3.866, "step": 48093 }, { "epoch": 943.14, "learning_rate": 1.8018272465994058e-05, "loss": 0.3792, "step": 48100 }, { "epoch": 943.33, "learning_rate": 1.7991930318484763e-05, "loss": 0.3781, "step": 48110 }, { "epoch": 943.53, "learning_rate": 1.7965604816060436e-05, "loss": 0.3822, "step": 48120 }, { "epoch": 943.73, "learning_rate": 1.7939295966407478e-05, "loss": 0.3778, "step": 48130 }, { "epoch": 943.92, "learning_rate": 1.7913003777207533e-05, "loss": 0.3838, "step": 48140 }, { "epoch": 944.0, "eval_loss": 0.388039231300354, "eval_runtime": 2.3308, "eval_samples_per_second": 977.761, "eval_steps_per_second": 3.861, "step": 48144 }, { "epoch": 944.12, "learning_rate": 1.7886728256137345e-05, "loss": 0.3834, "step": 48150 }, { "epoch": 944.31, "learning_rate": 1.786046941086873e-05, "loss": 0.3813, "step": 48160 }, { "epoch": 944.51, "learning_rate": 1.783422724906873e-05, "loss": 0.3789, "step": 48170 }, { "epoch": 944.71, "learning_rate": 1.7808001778399432e-05, "loss": 0.3765, "step": 48180 }, { "epoch": 944.9, "learning_rate": 1.7781793006518112e-05, "loss": 0.3787, "step": 48190 }, { "epoch": 945.0, "eval_loss": 0.3880222737789154, "eval_runtime": 2.2771, "eval_samples_per_second": 1000.828, "eval_steps_per_second": 3.952, "step": 48195 }, { "epoch": 945.1, "learning_rate": 1.7755600941077165e-05, "loss": 0.3771, "step": 48200 }, { "epoch": 945.29, "learning_rate": 1.772942558972405e-05, "loss": 0.3782, "step": 48210 }, { "epoch": 945.49, "learning_rate": 1.7703266960101425e-05, "loss": 0.3793, "step": 48220 }, { "epoch": 945.69, "learning_rate": 1.7677125059846983e-05, "loss": 0.3813, "step": 48230 }, { "epoch": 945.88, "learning_rate": 1.7650999896593602e-05, "loss": 0.3812, "step": 48240 }, { "epoch": 946.0, "eval_loss": 0.38842880725860596, "eval_runtime": 2.3222, "eval_samples_per_second": 981.39, "eval_steps_per_second": 3.876, "step": 48246 }, { "epoch": 946.08, "learning_rate": 1.7624891477969244e-05, "loss": 0.3814, "step": 48250 }, { "epoch": 946.27, "learning_rate": 1.7598799811597004e-05, "loss": 0.3784, "step": 48260 }, { "epoch": 946.47, "learning_rate": 1.7572724905095058e-05, "loss": 0.3796, "step": 48270 }, { "epoch": 946.67, "learning_rate": 1.7546666766076655e-05, "loss": 0.3824, "step": 48280 }, { "epoch": 946.86, "learning_rate": 1.7520625402150225e-05, "loss": 0.3806, "step": 48290 }, { "epoch": 947.0, "eval_loss": 0.38914692401885986, "eval_runtime": 2.3124, "eval_samples_per_second": 985.566, "eval_steps_per_second": 3.892, "step": 48297 }, { "epoch": 947.06, "learning_rate": 1.7494600820919264e-05, "loss": 0.3746, "step": 48300 }, { "epoch": 947.25, "learning_rate": 1.746859302998239e-05, "loss": 0.3777, "step": 48310 }, { "epoch": 947.45, "learning_rate": 1.7442602036933252e-05, "loss": 0.3768, "step": 48320 }, { "epoch": 947.65, "learning_rate": 1.7416627849360695e-05, "loss": 0.3803, "step": 48330 }, { "epoch": 947.84, "learning_rate": 1.7390670474848538e-05, "loss": 0.3816, "step": 48340 }, { "epoch": 948.0, "eval_loss": 0.3855222165584564, "eval_runtime": 2.2246, "eval_samples_per_second": 1024.441, "eval_steps_per_second": 4.046, "step": 48348 }, { "epoch": 948.04, "learning_rate": 1.73647299209758e-05, "loss": 0.3813, "step": 48350 }, { "epoch": 948.24, "learning_rate": 1.7338806195316555e-05, "loss": 0.3799, "step": 48360 }, { "epoch": 948.43, "learning_rate": 1.7312899305439903e-05, "loss": 0.3819, "step": 48370 }, { "epoch": 948.63, "learning_rate": 1.728700925891013e-05, "loss": 0.3781, "step": 48380 }, { "epoch": 948.82, "learning_rate": 1.7261136063286505e-05, "loss": 0.3813, "step": 48390 }, { "epoch": 949.0, "eval_loss": 0.38467514514923096, "eval_runtime": 2.2999, "eval_samples_per_second": 990.903, "eval_steps_per_second": 3.913, "step": 48399 }, { "epoch": 949.02, "learning_rate": 1.7235279726123456e-05, "loss": 0.3769, "step": 48400 }, { "epoch": 949.22, "learning_rate": 1.7209440254970467e-05, "loss": 0.3814, "step": 48410 }, { "epoch": 949.41, "learning_rate": 1.7183617657372047e-05, "loss": 0.3837, "step": 48420 }, { "epoch": 949.61, "learning_rate": 1.715781194086786e-05, "loss": 0.3795, "step": 48430 }, { "epoch": 949.8, "learning_rate": 1.713202311299256e-05, "loss": 0.378, "step": 48440 }, { "epoch": 950.0, "learning_rate": 1.7106251181275932e-05, "loss": 0.3811, "step": 48450 }, { "epoch": 950.0, "eval_loss": 0.38474026322364807, "eval_runtime": 2.3034, "eval_samples_per_second": 989.396, "eval_steps_per_second": 3.907, "step": 48450 }, { "epoch": 950.2, "learning_rate": 1.7080496153242798e-05, "loss": 0.3811, "step": 48460 }, { "epoch": 950.39, "learning_rate": 1.7054758036413086e-05, "loss": 0.3793, "step": 48470 }, { "epoch": 950.59, "learning_rate": 1.7029036838301716e-05, "loss": 0.378, "step": 48480 }, { "epoch": 950.78, "learning_rate": 1.700333256641869e-05, "loss": 0.3775, "step": 48490 }, { "epoch": 950.98, "learning_rate": 1.6977645228269106e-05, "loss": 0.3776, "step": 48500 }, { "epoch": 951.0, "eval_loss": 0.38311800360679626, "eval_runtime": 2.3155, "eval_samples_per_second": 984.221, "eval_steps_per_second": 3.887, "step": 48501 }, { "epoch": 951.18, "learning_rate": 1.6951974831353092e-05, "loss": 0.3787, "step": 48510 }, { "epoch": 951.37, "learning_rate": 1.6926321383165852e-05, "loss": 0.3782, "step": 48520 }, { "epoch": 951.57, "learning_rate": 1.6900684891197576e-05, "loss": 0.378, "step": 48530 }, { "epoch": 951.76, "learning_rate": 1.6875065362933595e-05, "loss": 0.378, "step": 48540 }, { "epoch": 951.96, "learning_rate": 1.684946280585419e-05, "loss": 0.3794, "step": 48550 }, { "epoch": 952.0, "eval_loss": 0.38669443130493164, "eval_runtime": 2.36, "eval_samples_per_second": 965.687, "eval_steps_per_second": 3.814, "step": 48552 }, { "epoch": 952.16, "learning_rate": 1.6823877227434774e-05, "loss": 0.3798, "step": 48560 }, { "epoch": 952.35, "learning_rate": 1.6798308635145765e-05, "loss": 0.3816, "step": 48570 }, { "epoch": 952.55, "learning_rate": 1.677275703645259e-05, "loss": 0.3793, "step": 48580 }, { "epoch": 952.75, "learning_rate": 1.674722243881579e-05, "loss": 0.3783, "step": 48590 }, { "epoch": 952.94, "learning_rate": 1.672170484969086e-05, "loss": 0.3782, "step": 48600 }, { "epoch": 953.0, "eval_loss": 0.38120561838150024, "eval_runtime": 2.3996, "eval_samples_per_second": 949.743, "eval_steps_per_second": 3.751, "step": 48603 }, { "epoch": 953.14, "learning_rate": 1.6696204276528375e-05, "loss": 0.3754, "step": 48610 }, { "epoch": 953.33, "learning_rate": 1.6670720726773965e-05, "loss": 0.3837, "step": 48620 }, { "epoch": 953.53, "learning_rate": 1.6645254207868203e-05, "loss": 0.38, "step": 48630 }, { "epoch": 953.73, "learning_rate": 1.661980472724681e-05, "loss": 0.3779, "step": 48640 }, { "epoch": 953.92, "learning_rate": 1.6594372292340403e-05, "loss": 0.3834, "step": 48650 }, { "epoch": 954.0, "eval_loss": 0.38515594601631165, "eval_runtime": 2.3227, "eval_samples_per_second": 981.17, "eval_steps_per_second": 3.875, "step": 48654 }, { "epoch": 954.12, "learning_rate": 1.6568956910574713e-05, "loss": 0.3766, "step": 48660 }, { "epoch": 954.31, "learning_rate": 1.6543558589370472e-05, "loss": 0.3815, "step": 48670 }, { "epoch": 954.51, "learning_rate": 1.6518177336143434e-05, "loss": 0.383, "step": 48680 }, { "epoch": 954.71, "learning_rate": 1.6492813158304344e-05, "loss": 0.3795, "step": 48690 }, { "epoch": 954.9, "learning_rate": 1.6467466063258956e-05, "loss": 0.3785, "step": 48700 }, { "epoch": 955.0, "eval_loss": 0.3830149173736572, "eval_runtime": 2.3291, "eval_samples_per_second": 978.479, "eval_steps_per_second": 3.864, "step": 48705 }, { "epoch": 955.1, "learning_rate": 1.6442136058408073e-05, "loss": 0.3811, "step": 48710 }, { "epoch": 955.29, "learning_rate": 1.6416823151147498e-05, "loss": 0.3839, "step": 48720 }, { "epoch": 955.49, "learning_rate": 1.6391527348868047e-05, "loss": 0.3788, "step": 48730 }, { "epoch": 955.69, "learning_rate": 1.6366248658955496e-05, "loss": 0.3777, "step": 48740 }, { "epoch": 955.88, "learning_rate": 1.6340987088790696e-05, "loss": 0.3789, "step": 48750 }, { "epoch": 956.0, "eval_loss": 0.3851874768733978, "eval_runtime": 2.2442, "eval_samples_per_second": 1015.509, "eval_steps_per_second": 4.01, "step": 48756 }, { "epoch": 956.08, "learning_rate": 1.6315742645749423e-05, "loss": 0.3794, "step": 48760 }, { "epoch": 956.27, "learning_rate": 1.6290515337202516e-05, "loss": 0.3801, "step": 48770 }, { "epoch": 956.47, "learning_rate": 1.6265305170515798e-05, "loss": 0.3787, "step": 48780 }, { "epoch": 956.67, "learning_rate": 1.6240112153050038e-05, "loss": 0.3808, "step": 48790 }, { "epoch": 956.86, "learning_rate": 1.6214936292161072e-05, "loss": 0.3801, "step": 48800 }, { "epoch": 957.0, "eval_loss": 0.38819777965545654, "eval_runtime": 2.2645, "eval_samples_per_second": 1006.408, "eval_steps_per_second": 3.974, "step": 48807 }, { "epoch": 957.06, "learning_rate": 1.6189777595199663e-05, "loss": 0.3766, "step": 48810 }, { "epoch": 957.25, "learning_rate": 1.6164636069511606e-05, "loss": 0.3797, "step": 48820 }, { "epoch": 957.45, "learning_rate": 1.613951172243767e-05, "loss": 0.3823, "step": 48830 }, { "epoch": 957.65, "learning_rate": 1.6114404561313583e-05, "loss": 0.3768, "step": 48840 }, { "epoch": 957.84, "learning_rate": 1.60893145934701e-05, "loss": 0.3771, "step": 48850 }, { "epoch": 958.0, "eval_loss": 0.38420116901397705, "eval_runtime": 2.2468, "eval_samples_per_second": 1014.35, "eval_steps_per_second": 4.006, "step": 48858 }, { "epoch": 958.04, "learning_rate": 1.60642418262329e-05, "loss": 0.3806, "step": 48860 }, { "epoch": 958.24, "learning_rate": 1.6039186266922693e-05, "loss": 0.3787, "step": 48870 }, { "epoch": 958.43, "learning_rate": 1.6014147922855168e-05, "loss": 0.3784, "step": 48880 }, { "epoch": 958.63, "learning_rate": 1.5989126801340914e-05, "loss": 0.3739, "step": 48890 }, { "epoch": 958.82, "learning_rate": 1.5964122909685588e-05, "loss": 0.3808, "step": 48900 }, { "epoch": 959.0, "eval_loss": 0.3839624524116516, "eval_runtime": 2.2667, "eval_samples_per_second": 1005.427, "eval_steps_per_second": 3.971, "step": 48909 }, { "epoch": 959.02, "learning_rate": 1.5939136255189715e-05, "loss": 0.3793, "step": 48910 }, { "epoch": 959.22, "learning_rate": 1.5914166845148876e-05, "loss": 0.3776, "step": 48920 }, { "epoch": 959.41, "learning_rate": 1.5889214686853578e-05, "loss": 0.3781, "step": 48930 }, { "epoch": 959.61, "learning_rate": 1.5864279787589306e-05, "loss": 0.3814, "step": 48940 }, { "epoch": 959.8, "learning_rate": 1.5839362154636485e-05, "loss": 0.3793, "step": 48950 }, { "epoch": 960.0, "learning_rate": 1.581446179527049e-05, "loss": 0.3762, "step": 48960 }, { "epoch": 960.0, "eval_loss": 0.3849249482154846, "eval_runtime": 2.1949, "eval_samples_per_second": 1038.309, "eval_steps_per_second": 4.1, "step": 48960 }, { "epoch": 960.2, "learning_rate": 1.578957871676168e-05, "loss": 0.3752, "step": 48970 }, { "epoch": 960.39, "learning_rate": 1.5764712926375365e-05, "loss": 0.3755, "step": 48980 }, { "epoch": 960.59, "learning_rate": 1.5739864431371816e-05, "loss": 0.3833, "step": 48990 }, { "epoch": 960.78, "learning_rate": 1.5715033239006214e-05, "loss": 0.3765, "step": 49000 }, { "epoch": 960.98, "learning_rate": 1.5690219356528737e-05, "loss": 0.3777, "step": 49010 }, { "epoch": 961.0, "eval_loss": 0.38420653343200684, "eval_runtime": 2.4626, "eval_samples_per_second": 925.439, "eval_steps_per_second": 3.655, "step": 49011 }, { "epoch": 961.18, "learning_rate": 1.566542279118445e-05, "loss": 0.3807, "step": 49020 }, { "epoch": 961.37, "learning_rate": 1.564064355021342e-05, "loss": 0.3801, "step": 49030 }, { "epoch": 961.57, "learning_rate": 1.5615881640850652e-05, "loss": 0.3761, "step": 49040 }, { "epoch": 961.76, "learning_rate": 1.5591137070326027e-05, "loss": 0.379, "step": 49050 }, { "epoch": 961.96, "learning_rate": 1.5566409845864454e-05, "loss": 0.3781, "step": 49060 }, { "epoch": 962.0, "eval_loss": 0.3874445855617523, "eval_runtime": 2.1852, "eval_samples_per_second": 1042.935, "eval_steps_per_second": 4.119, "step": 49062 }, { "epoch": 962.16, "learning_rate": 1.554169997468569e-05, "loss": 0.3793, "step": 49070 }, { "epoch": 962.35, "learning_rate": 1.5517007464004482e-05, "loss": 0.3771, "step": 49080 }, { "epoch": 962.55, "learning_rate": 1.5492332321030517e-05, "loss": 0.3779, "step": 49090 }, { "epoch": 962.75, "learning_rate": 1.546767455296834e-05, "loss": 0.3808, "step": 49100 }, { "epoch": 962.94, "learning_rate": 1.5443034167017524e-05, "loss": 0.3781, "step": 49110 }, { "epoch": 963.0, "eval_loss": 0.3837532699108124, "eval_runtime": 2.345, "eval_samples_per_second": 971.842, "eval_steps_per_second": 3.838, "step": 49113 }, { "epoch": 963.14, "learning_rate": 1.5418411170372452e-05, "loss": 0.3771, "step": 49120 }, { "epoch": 963.33, "learning_rate": 1.5393805570222524e-05, "loss": 0.3781, "step": 49130 }, { "epoch": 963.53, "learning_rate": 1.5369217373752023e-05, "loss": 0.3766, "step": 49140 }, { "epoch": 963.73, "learning_rate": 1.534464658814017e-05, "loss": 0.3766, "step": 49150 }, { "epoch": 963.92, "learning_rate": 1.532009322056107e-05, "loss": 0.376, "step": 49160 }, { "epoch": 964.0, "eval_loss": 0.3862951695919037, "eval_runtime": 2.2938, "eval_samples_per_second": 993.556, "eval_steps_per_second": 3.924, "step": 49164 }, { "epoch": 964.12, "learning_rate": 1.529555727818374e-05, "loss": 0.3788, "step": 49170 }, { "epoch": 964.31, "learning_rate": 1.5271038768172144e-05, "loss": 0.3788, "step": 49180 }, { "epoch": 964.51, "learning_rate": 1.5246537697685143e-05, "loss": 0.3767, "step": 49190 }, { "epoch": 964.71, "learning_rate": 1.5222054073876527e-05, "loss": 0.3779, "step": 49200 }, { "epoch": 964.9, "learning_rate": 1.5197587903894929e-05, "loss": 0.3777, "step": 49210 }, { "epoch": 965.0, "eval_loss": 0.3826569616794586, "eval_runtime": 2.3906, "eval_samples_per_second": 953.331, "eval_steps_per_second": 3.765, "step": 49215 }, { "epoch": 965.1, "learning_rate": 1.5173139194883948e-05, "loss": 0.3756, "step": 49220 }, { "epoch": 965.29, "learning_rate": 1.5148707953982036e-05, "loss": 0.3765, "step": 49230 }, { "epoch": 965.49, "learning_rate": 1.5124294188322594e-05, "loss": 0.3776, "step": 49240 }, { "epoch": 965.69, "learning_rate": 1.5099897905033904e-05, "loss": 0.3812, "step": 49250 }, { "epoch": 965.88, "learning_rate": 1.5075519111239106e-05, "loss": 0.3808, "step": 49260 }, { "epoch": 966.0, "eval_loss": 0.38532519340515137, "eval_runtime": 2.2587, "eval_samples_per_second": 1008.974, "eval_steps_per_second": 3.985, "step": 49266 }, { "epoch": 966.08, "learning_rate": 1.5051157814056303e-05, "loss": 0.3771, "step": 49270 }, { "epoch": 966.27, "learning_rate": 1.502681402059841e-05, "loss": 0.3805, "step": 49280 }, { "epoch": 966.47, "learning_rate": 1.5002487737973293e-05, "loss": 0.3763, "step": 49290 }, { "epoch": 966.67, "learning_rate": 1.4978178973283703e-05, "loss": 0.3812, "step": 49300 }, { "epoch": 966.86, "learning_rate": 1.4953887733627213e-05, "loss": 0.3835, "step": 49310 }, { "epoch": 967.0, "eval_loss": 0.3868524432182312, "eval_runtime": 2.3298, "eval_samples_per_second": 978.194, "eval_steps_per_second": 3.863, "step": 49317 }, { "epoch": 967.06, "learning_rate": 1.4929614026096365e-05, "loss": 0.3793, "step": 49320 }, { "epoch": 967.25, "learning_rate": 1.4905357857778499e-05, "loss": 0.3769, "step": 49330 }, { "epoch": 967.45, "learning_rate": 1.488111923575591e-05, "loss": 0.3771, "step": 49340 }, { "epoch": 967.65, "learning_rate": 1.4856898167105707e-05, "loss": 0.381, "step": 49350 }, { "epoch": 967.84, "learning_rate": 1.4832694658899946e-05, "loss": 0.3801, "step": 49360 }, { "epoch": 968.0, "eval_loss": 0.3859129548072815, "eval_runtime": 2.3191, "eval_samples_per_second": 982.701, "eval_steps_per_second": 3.881, "step": 49368 }, { "epoch": 968.04, "learning_rate": 1.480850871820549e-05, "loss": 0.3765, "step": 49370 }, { "epoch": 968.24, "learning_rate": 1.4784340352084062e-05, "loss": 0.3785, "step": 49380 }, { "epoch": 968.43, "learning_rate": 1.4760189567592304e-05, "loss": 0.3763, "step": 49390 }, { "epoch": 968.63, "learning_rate": 1.4736056371781723e-05, "loss": 0.3771, "step": 49400 }, { "epoch": 968.82, "learning_rate": 1.4711940771698686e-05, "loss": 0.3839, "step": 49410 }, { "epoch": 969.0, "eval_loss": 0.38414880633354187, "eval_runtime": 2.2645, "eval_samples_per_second": 1006.397, "eval_steps_per_second": 3.974, "step": 49419 }, { "epoch": 969.02, "learning_rate": 1.4687842774384365e-05, "loss": 0.3796, "step": 49420 }, { "epoch": 969.22, "learning_rate": 1.4663762386874883e-05, "loss": 0.3757, "step": 49430 }, { "epoch": 969.41, "learning_rate": 1.4639699616201133e-05, "loss": 0.3808, "step": 49440 }, { "epoch": 969.61, "learning_rate": 1.4615654469388938e-05, "loss": 0.3708, "step": 49450 }, { "epoch": 969.8, "learning_rate": 1.4591626953458955e-05, "loss": 0.38, "step": 49460 }, { "epoch": 970.0, "learning_rate": 1.4567617075426638e-05, "loss": 0.3768, "step": 49470 }, { "epoch": 970.0, "eval_loss": 0.38485315442085266, "eval_runtime": 2.244, "eval_samples_per_second": 1015.601, "eval_steps_per_second": 4.011, "step": 49470 }, { "epoch": 970.2, "learning_rate": 1.4543624842302382e-05, "loss": 0.3777, "step": 49480 }, { "epoch": 970.39, "learning_rate": 1.4519650261091347e-05, "loss": 0.376, "step": 49490 }, { "epoch": 970.59, "learning_rate": 1.4495693338793595e-05, "loss": 0.3779, "step": 49500 }, { "epoch": 970.78, "learning_rate": 1.447175408240403e-05, "loss": 0.3753, "step": 49510 }, { "epoch": 970.98, "learning_rate": 1.4447832498912335e-05, "loss": 0.3797, "step": 49520 }, { "epoch": 971.0, "eval_loss": 0.38439249992370605, "eval_runtime": 2.2074, "eval_samples_per_second": 1032.431, "eval_steps_per_second": 4.077, "step": 49521 }, { "epoch": 971.18, "learning_rate": 1.442392859530313e-05, "loss": 0.3765, "step": 49530 }, { "epoch": 971.37, "learning_rate": 1.4400042378555784e-05, "loss": 0.3805, "step": 49540 }, { "epoch": 971.57, "learning_rate": 1.4376173855644544e-05, "loss": 0.3766, "step": 49550 }, { "epoch": 971.76, "learning_rate": 1.4352323033538523e-05, "loss": 0.3776, "step": 49560 }, { "epoch": 971.96, "learning_rate": 1.432848991920158e-05, "loss": 0.3763, "step": 49570 }, { "epoch": 972.0, "eval_loss": 0.38550448417663574, "eval_runtime": 2.3189, "eval_samples_per_second": 982.804, "eval_steps_per_second": 3.881, "step": 49572 }, { "epoch": 972.16, "learning_rate": 1.4304674519592496e-05, "loss": 0.3719, "step": 49580 }, { "epoch": 972.35, "learning_rate": 1.4280876841664793e-05, "loss": 0.3774, "step": 49590 }, { "epoch": 972.55, "learning_rate": 1.425709689236688e-05, "loss": 0.3792, "step": 49600 }, { "epoch": 972.75, "learning_rate": 1.4233334678641984e-05, "loss": 0.3802, "step": 49610 }, { "epoch": 972.94, "learning_rate": 1.4209590207428148e-05, "loss": 0.3788, "step": 49620 }, { "epoch": 973.0, "eval_loss": 0.3832288980484009, "eval_runtime": 2.3426, "eval_samples_per_second": 972.871, "eval_steps_per_second": 3.842, "step": 49623 }, { "epoch": 973.14, "learning_rate": 1.418586348565821e-05, "loss": 0.3797, "step": 49630 }, { "epoch": 973.33, "learning_rate": 1.4162154520259839e-05, "loss": 0.376, "step": 49640 }, { "epoch": 973.53, "learning_rate": 1.4138463318155527e-05, "loss": 0.3818, "step": 49650 }, { "epoch": 973.73, "learning_rate": 1.4114789886262576e-05, "loss": 0.3774, "step": 49660 }, { "epoch": 973.92, "learning_rate": 1.4091134231493131e-05, "loss": 0.374, "step": 49670 }, { "epoch": 974.0, "eval_loss": 0.3858170211315155, "eval_runtime": 2.3194, "eval_samples_per_second": 982.599, "eval_steps_per_second": 3.88, "step": 49674 }, { "epoch": 974.12, "learning_rate": 1.4067496360754078e-05, "loss": 0.3829, "step": 49680 }, { "epoch": 974.31, "learning_rate": 1.4043876280947175e-05, "loss": 0.3772, "step": 49690 }, { "epoch": 974.51, "learning_rate": 1.4020273998968918e-05, "loss": 0.376, "step": 49700 }, { "epoch": 974.71, "learning_rate": 1.3996689521710683e-05, "loss": 0.373, "step": 49710 }, { "epoch": 974.9, "learning_rate": 1.3973122856058614e-05, "loss": 0.3785, "step": 49720 }, { "epoch": 975.0, "eval_loss": 0.38047897815704346, "eval_runtime": 2.3373, "eval_samples_per_second": 975.043, "eval_steps_per_second": 3.851, "step": 49725 }, { "epoch": 975.1, "learning_rate": 1.3949574008893629e-05, "loss": 0.3775, "step": 49730 }, { "epoch": 975.29, "learning_rate": 1.392604298709149e-05, "loss": 0.3805, "step": 49740 }, { "epoch": 975.49, "learning_rate": 1.39025297975227e-05, "loss": 0.3765, "step": 49750 }, { "epoch": 975.69, "learning_rate": 1.3879034447052597e-05, "loss": 0.3808, "step": 49760 }, { "epoch": 975.88, "learning_rate": 1.3855556942541333e-05, "loss": 0.3752, "step": 49770 }, { "epoch": 976.0, "eval_loss": 0.38550615310668945, "eval_runtime": 2.3057, "eval_samples_per_second": 988.423, "eval_steps_per_second": 3.903, "step": 49776 }, { "epoch": 976.08, "learning_rate": 1.383209729084377e-05, "loss": 0.3783, "step": 49780 }, { "epoch": 976.27, "learning_rate": 1.3808655498809638e-05, "loss": 0.379, "step": 49790 }, { "epoch": 976.47, "learning_rate": 1.3785231573283379e-05, "loss": 0.3753, "step": 49800 }, { "epoch": 976.67, "learning_rate": 1.376182552110428e-05, "loss": 0.3774, "step": 49810 }, { "epoch": 976.86, "learning_rate": 1.3738437349106384e-05, "loss": 0.3752, "step": 49820 }, { "epoch": 977.0, "eval_loss": 0.38268429040908813, "eval_runtime": 2.3909, "eval_samples_per_second": 953.211, "eval_steps_per_second": 3.764, "step": 49827 }, { "epoch": 977.06, "learning_rate": 1.3715067064118537e-05, "loss": 0.381, "step": 49830 }, { "epoch": 977.25, "learning_rate": 1.3691714672964322e-05, "loss": 0.3796, "step": 49840 }, { "epoch": 977.45, "learning_rate": 1.36683801824621e-05, "loss": 0.3758, "step": 49850 }, { "epoch": 977.65, "learning_rate": 1.364506359942503e-05, "loss": 0.3765, "step": 49860 }, { "epoch": 977.84, "learning_rate": 1.362176493066104e-05, "loss": 0.3779, "step": 49870 }, { "epoch": 978.0, "eval_loss": 0.3826252222061157, "eval_runtime": 2.2244, "eval_samples_per_second": 1024.528, "eval_steps_per_second": 4.046, "step": 49878 }, { "epoch": 978.04, "learning_rate": 1.3598484182972844e-05, "loss": 0.3764, "step": 49880 }, { "epoch": 978.24, "learning_rate": 1.3575221363157866e-05, "loss": 0.377, "step": 49890 }, { "epoch": 978.43, "learning_rate": 1.3551976478008356e-05, "loss": 0.3756, "step": 49900 }, { "epoch": 978.63, "learning_rate": 1.3528749534311279e-05, "loss": 0.3736, "step": 49910 }, { "epoch": 978.82, "learning_rate": 1.3505540538848384e-05, "loss": 0.3769, "step": 49920 }, { "epoch": 979.0, "eval_loss": 0.3824384808540344, "eval_runtime": 2.2551, "eval_samples_per_second": 1010.599, "eval_steps_per_second": 3.991, "step": 49929 }, { "epoch": 979.02, "learning_rate": 1.3482349498396224e-05, "loss": 0.3763, "step": 49930 }, { "epoch": 979.22, "learning_rate": 1.3459176419726004e-05, "loss": 0.3792, "step": 49940 }, { "epoch": 979.41, "learning_rate": 1.3436021309603806e-05, "loss": 0.3759, "step": 49950 }, { "epoch": 979.61, "learning_rate": 1.341288417479035e-05, "loss": 0.3765, "step": 49960 }, { "epoch": 979.8, "learning_rate": 1.3389765022041191e-05, "loss": 0.3754, "step": 49970 }, { "epoch": 980.0, "learning_rate": 1.3366663858106618e-05, "loss": 0.3778, "step": 49980 }, { "epoch": 980.0, "eval_loss": 0.38484612107276917, "eval_runtime": 2.2844, "eval_samples_per_second": 997.63, "eval_steps_per_second": 3.94, "step": 49980 }, { "epoch": 980.2, "learning_rate": 1.3343580689731632e-05, "loss": 0.3776, "step": 49990 }, { "epoch": 980.39, "learning_rate": 1.3320515523656019e-05, "loss": 0.382, "step": 50000 }, { "epoch": 980.59, "learning_rate": 1.3297468366614281e-05, "loss": 0.3809, "step": 50010 }, { "epoch": 980.78, "learning_rate": 1.3274439225335673e-05, "loss": 0.3788, "step": 50020 }, { "epoch": 980.98, "learning_rate": 1.3251428106544202e-05, "loss": 0.3749, "step": 50030 }, { "epoch": 981.0, "eval_loss": 0.38307544589042664, "eval_runtime": 2.3077, "eval_samples_per_second": 987.572, "eval_steps_per_second": 3.9, "step": 50031 }, { "epoch": 981.18, "learning_rate": 1.3228435016958609e-05, "loss": 0.3727, "step": 50040 }, { "epoch": 981.37, "learning_rate": 1.3205459963292357e-05, "loss": 0.377, "step": 50050 }, { "epoch": 981.57, "learning_rate": 1.3182502952253621e-05, "loss": 0.3802, "step": 50060 }, { "epoch": 981.76, "learning_rate": 1.3159563990545366e-05, "loss": 0.3794, "step": 50070 }, { "epoch": 981.96, "learning_rate": 1.3136643084865242e-05, "loss": 0.3756, "step": 50080 }, { "epoch": 982.0, "eval_loss": 0.38791292905807495, "eval_runtime": 2.2435, "eval_samples_per_second": 1015.827, "eval_steps_per_second": 4.012, "step": 50082 }, { "epoch": 982.16, "learning_rate": 1.3113740241905671e-05, "loss": 0.38, "step": 50090 }, { "epoch": 982.35, "learning_rate": 1.3090855468353736e-05, "loss": 0.377, "step": 50100 }, { "epoch": 982.55, "learning_rate": 1.3067988770891319e-05, "loss": 0.3827, "step": 50110 }, { "epoch": 982.75, "learning_rate": 1.3045140156194936e-05, "loss": 0.3768, "step": 50120 }, { "epoch": 982.94, "learning_rate": 1.3022309630935901e-05, "loss": 0.3739, "step": 50130 }, { "epoch": 983.0, "eval_loss": 0.38304463028907776, "eval_runtime": 2.2767, "eval_samples_per_second": 1001.022, "eval_steps_per_second": 3.953, "step": 50133 }, { "epoch": 983.14, "learning_rate": 1.299949720178024e-05, "loss": 0.3792, "step": 50140 }, { "epoch": 983.33, "learning_rate": 1.2976702875388633e-05, "loss": 0.3716, "step": 50150 }, { "epoch": 983.53, "learning_rate": 1.295392665841655e-05, "loss": 0.3773, "step": 50160 }, { "epoch": 983.73, "learning_rate": 1.2931168557514094e-05, "loss": 0.3751, "step": 50170 }, { "epoch": 983.92, "learning_rate": 1.2908428579326158e-05, "loss": 0.3769, "step": 50180 }, { "epoch": 984.0, "eval_loss": 0.3844551146030426, "eval_runtime": 2.1862, "eval_samples_per_second": 1042.456, "eval_steps_per_second": 4.117, "step": 50184 }, { "epoch": 984.12, "learning_rate": 1.2885706730492316e-05, "loss": 0.3767, "step": 50190 }, { "epoch": 984.31, "learning_rate": 1.2863003017646809e-05, "loss": 0.3797, "step": 50200 }, { "epoch": 984.51, "learning_rate": 1.2840317447418652e-05, "loss": 0.379, "step": 50210 }, { "epoch": 984.71, "learning_rate": 1.2817650026431481e-05, "loss": 0.3756, "step": 50220 }, { "epoch": 984.9, "learning_rate": 1.2795000761303708e-05, "loss": 0.3737, "step": 50230 }, { "epoch": 985.0, "eval_loss": 0.3893979787826538, "eval_runtime": 2.2314, "eval_samples_per_second": 1021.322, "eval_steps_per_second": 4.033, "step": 50235 }, { "epoch": 985.1, "learning_rate": 1.277236965864842e-05, "loss": 0.38, "step": 50240 }, { "epoch": 985.29, "learning_rate": 1.2749756725073365e-05, "loss": 0.3791, "step": 50250 }, { "epoch": 985.49, "learning_rate": 1.2727161967181043e-05, "loss": 0.3772, "step": 50260 }, { "epoch": 985.69, "learning_rate": 1.2704585391568594e-05, "loss": 0.3743, "step": 50270 }, { "epoch": 985.88, "learning_rate": 1.2682027004827888e-05, "loss": 0.3769, "step": 50280 }, { "epoch": 986.0, "eval_loss": 0.38151878118515015, "eval_runtime": 2.232, "eval_samples_per_second": 1021.062, "eval_steps_per_second": 4.032, "step": 50286 }, { "epoch": 986.08, "learning_rate": 1.2659486813545472e-05, "loss": 0.3764, "step": 50290 }, { "epoch": 986.27, "learning_rate": 1.2636964824302597e-05, "loss": 0.3798, "step": 50300 }, { "epoch": 986.47, "learning_rate": 1.2614461043675164e-05, "loss": 0.3708, "step": 50310 }, { "epoch": 986.67, "learning_rate": 1.2591975478233749e-05, "loss": 0.3742, "step": 50320 }, { "epoch": 986.86, "learning_rate": 1.2569508134543666e-05, "loss": 0.373, "step": 50330 }, { "epoch": 987.0, "eval_loss": 0.37965089082717896, "eval_runtime": 2.2126, "eval_samples_per_second": 1030.024, "eval_steps_per_second": 4.068, "step": 50337 }, { "epoch": 987.06, "learning_rate": 1.2547059019164868e-05, "loss": 0.3739, "step": 50340 }, { "epoch": 987.25, "learning_rate": 1.2524628138652021e-05, "loss": 0.3752, "step": 50350 }, { "epoch": 987.45, "learning_rate": 1.2502215499554411e-05, "loss": 0.3725, "step": 50360 }, { "epoch": 987.65, "learning_rate": 1.2479821108416044e-05, "loss": 0.3808, "step": 50370 }, { "epoch": 987.84, "learning_rate": 1.2457444971775565e-05, "loss": 0.374, "step": 50380 }, { "epoch": 988.0, "eval_loss": 0.38273051381111145, "eval_runtime": 2.3252, "eval_samples_per_second": 980.135, "eval_steps_per_second": 3.871, "step": 50388 }, { "epoch": 988.04, "learning_rate": 1.2435087096166324e-05, "loss": 0.3735, "step": 50390 }, { "epoch": 988.24, "learning_rate": 1.2412747488116332e-05, "loss": 0.3778, "step": 50400 }, { "epoch": 988.43, "learning_rate": 1.2390426154148228e-05, "loss": 0.372, "step": 50410 }, { "epoch": 988.63, "learning_rate": 1.2368123100779376e-05, "loss": 0.379, "step": 50420 }, { "epoch": 988.82, "learning_rate": 1.2345838334521724e-05, "loss": 0.3778, "step": 50430 }, { "epoch": 989.0, "eval_loss": 0.38441118597984314, "eval_runtime": 2.235, "eval_samples_per_second": 1019.7, "eval_steps_per_second": 4.027, "step": 50439 }, { "epoch": 989.02, "learning_rate": 1.2323571861881967e-05, "loss": 0.3758, "step": 50440 }, { "epoch": 989.22, "learning_rate": 1.2301323689361423e-05, "loss": 0.3726, "step": 50450 }, { "epoch": 989.41, "learning_rate": 1.2279093823456019e-05, "loss": 0.3765, "step": 50460 }, { "epoch": 989.61, "learning_rate": 1.2256882270656429e-05, "loss": 0.3738, "step": 50470 }, { "epoch": 989.8, "learning_rate": 1.2234689037447892e-05, "loss": 0.3775, "step": 50480 }, { "epoch": 990.0, "learning_rate": 1.2212514130310358e-05, "loss": 0.3773, "step": 50490 }, { "epoch": 990.0, "eval_loss": 0.38456442952156067, "eval_runtime": 2.2877, "eval_samples_per_second": 996.205, "eval_steps_per_second": 3.934, "step": 50490 }, { "epoch": 990.2, "learning_rate": 1.2190357555718388e-05, "loss": 0.3756, "step": 50500 }, { "epoch": 990.39, "learning_rate": 1.216821932014125e-05, "loss": 0.3724, "step": 50510 }, { "epoch": 990.59, "learning_rate": 1.2146099430042782e-05, "loss": 0.3797, "step": 50520 }, { "epoch": 990.78, "learning_rate": 1.2123997891881485e-05, "loss": 0.3732, "step": 50530 }, { "epoch": 990.98, "learning_rate": 1.2101914712110536e-05, "loss": 0.3759, "step": 50540 }, { "epoch": 991.0, "eval_loss": 0.3825616240501404, "eval_runtime": 2.2692, "eval_samples_per_second": 1004.299, "eval_steps_per_second": 3.966, "step": 50541 }, { "epoch": 991.18, "learning_rate": 1.2079849897177721e-05, "loss": 0.3776, "step": 50550 }, { "epoch": 991.37, "learning_rate": 1.2057803453525502e-05, "loss": 0.3784, "step": 50560 }, { "epoch": 991.57, "learning_rate": 1.2035775387590915e-05, "loss": 0.3768, "step": 50570 }, { "epoch": 991.76, "learning_rate": 1.201376570580569e-05, "loss": 0.3741, "step": 50580 }, { "epoch": 991.96, "learning_rate": 1.1991774414596126e-05, "loss": 0.3752, "step": 50590 }, { "epoch": 992.0, "eval_loss": 0.38430219888687134, "eval_runtime": 2.3526, "eval_samples_per_second": 968.721, "eval_steps_per_second": 3.826, "step": 50592 }, { "epoch": 992.16, "learning_rate": 1.196980152038322e-05, "loss": 0.3784, "step": 50600 }, { "epoch": 992.35, "learning_rate": 1.1947847029582578e-05, "loss": 0.3768, "step": 50610 }, { "epoch": 992.55, "learning_rate": 1.1925910948604376e-05, "loss": 0.3751, "step": 50620 }, { "epoch": 992.75, "learning_rate": 1.1903993283853516e-05, "loss": 0.3772, "step": 50630 }, { "epoch": 992.94, "learning_rate": 1.1882094041729423e-05, "loss": 0.3747, "step": 50640 }, { "epoch": 993.0, "eval_loss": 0.381651371717453, "eval_runtime": 2.2123, "eval_samples_per_second": 1030.155, "eval_steps_per_second": 4.068, "step": 50643 }, { "epoch": 993.14, "learning_rate": 1.1860213228626198e-05, "loss": 0.3762, "step": 50650 }, { "epoch": 993.33, "learning_rate": 1.1838350850932578e-05, "loss": 0.3774, "step": 50660 }, { "epoch": 993.53, "learning_rate": 1.1816506915031845e-05, "loss": 0.3776, "step": 50670 }, { "epoch": 993.73, "learning_rate": 1.1794681427301986e-05, "loss": 0.3762, "step": 50680 }, { "epoch": 993.92, "learning_rate": 1.1772874394115519e-05, "loss": 0.3781, "step": 50690 }, { "epoch": 994.0, "eval_loss": 0.3783932626247406, "eval_runtime": 2.3211, "eval_samples_per_second": 981.883, "eval_steps_per_second": 3.878, "step": 50694 }, { "epoch": 994.12, "learning_rate": 1.175108582183962e-05, "loss": 0.3763, "step": 50700 }, { "epoch": 994.31, "learning_rate": 1.1729315716836083e-05, "loss": 0.3761, "step": 50710 }, { "epoch": 994.51, "learning_rate": 1.1707564085461295e-05, "loss": 0.3792, "step": 50720 }, { "epoch": 994.71, "learning_rate": 1.168583093406624e-05, "loss": 0.3752, "step": 50730 }, { "epoch": 994.9, "learning_rate": 1.1664116268996488e-05, "loss": 0.3751, "step": 50740 }, { "epoch": 995.0, "eval_loss": 0.3832464814186096, "eval_runtime": 2.2172, "eval_samples_per_second": 1027.873, "eval_steps_per_second": 4.059, "step": 50745 }, { "epoch": 995.1, "learning_rate": 1.1642420096592258e-05, "loss": 0.3765, "step": 50750 }, { "epoch": 995.29, "learning_rate": 1.1620742423188354e-05, "loss": 0.3787, "step": 50760 }, { "epoch": 995.49, "learning_rate": 1.1599083255114175e-05, "loss": 0.3764, "step": 50770 }, { "epoch": 995.69, "learning_rate": 1.1577442598693699e-05, "loss": 0.3781, "step": 50780 }, { "epoch": 995.88, "learning_rate": 1.1555820460245535e-05, "loss": 0.3758, "step": 50790 }, { "epoch": 996.0, "eval_loss": 0.37998104095458984, "eval_runtime": 2.2526, "eval_samples_per_second": 1011.735, "eval_steps_per_second": 3.995, "step": 50796 }, { "epoch": 996.08, "learning_rate": 1.1534216846082845e-05, "loss": 0.3749, "step": 50800 }, { "epoch": 996.27, "learning_rate": 1.1512631762513405e-05, "loss": 0.3793, "step": 50810 }, { "epoch": 996.47, "learning_rate": 1.14910652158396e-05, "loss": 0.3785, "step": 50820 }, { "epoch": 996.67, "learning_rate": 1.1469517212358354e-05, "loss": 0.3736, "step": 50830 }, { "epoch": 996.86, "learning_rate": 1.144798775836123e-05, "loss": 0.3718, "step": 50840 }, { "epoch": 997.0, "eval_loss": 0.38368964195251465, "eval_runtime": 2.2857, "eval_samples_per_second": 997.047, "eval_steps_per_second": 3.937, "step": 50847 }, { "epoch": 997.06, "learning_rate": 1.1426476860134318e-05, "loss": 0.3746, "step": 50850 }, { "epoch": 997.25, "learning_rate": 1.1404984523958335e-05, "loss": 0.3727, "step": 50860 }, { "epoch": 997.45, "learning_rate": 1.138351075610858e-05, "loss": 0.3771, "step": 50870 }, { "epoch": 997.65, "learning_rate": 1.1362055562854877e-05, "loss": 0.3787, "step": 50880 }, { "epoch": 997.84, "learning_rate": 1.1340618950461708e-05, "loss": 0.3745, "step": 50890 }, { "epoch": 998.0, "eval_loss": 0.382259726524353, "eval_runtime": 2.3112, "eval_samples_per_second": 986.086, "eval_steps_per_second": 3.894, "step": 50898 }, { "epoch": 998.04, "learning_rate": 1.1319200925188049e-05, "loss": 0.3769, "step": 50900 }, { "epoch": 998.24, "learning_rate": 1.1297801493287497e-05, "loss": 0.3799, "step": 50910 }, { "epoch": 998.43, "learning_rate": 1.1276420661008231e-05, "loss": 0.3787, "step": 50920 }, { "epoch": 998.63, "learning_rate": 1.1255058434592939e-05, "loss": 0.3742, "step": 50930 }, { "epoch": 998.82, "learning_rate": 1.123371482027895e-05, "loss": 0.3757, "step": 50940 }, { "epoch": 999.0, "eval_loss": 0.3797883093357086, "eval_runtime": 2.2661, "eval_samples_per_second": 1005.687, "eval_steps_per_second": 3.972, "step": 50949 }, { "epoch": 999.02, "learning_rate": 1.1212389824298093e-05, "loss": 0.3729, "step": 50950 }, { "epoch": 999.22, "learning_rate": 1.1191083452876806e-05, "loss": 0.3754, "step": 50960 }, { "epoch": 999.41, "learning_rate": 1.116979571223607e-05, "loss": 0.3697, "step": 50970 }, { "epoch": 999.61, "learning_rate": 1.114852660859145e-05, "loss": 0.3744, "step": 50980 }, { "epoch": 999.8, "learning_rate": 1.1127276148153039e-05, "loss": 0.373, "step": 50990 }, { "epoch": 1000.0, "learning_rate": 1.1106044337125478e-05, "loss": 0.3786, "step": 51000 }, { "epoch": 1000.0, "eval_loss": 0.37940987944602966, "eval_runtime": 2.3813, "eval_samples_per_second": 957.056, "eval_steps_per_second": 3.78, "step": 51000 }, { "epoch": 1000.2, "learning_rate": 1.108483118170799e-05, "loss": 0.3717, "step": 51010 }, { "epoch": 1000.39, "learning_rate": 1.1063636688094354e-05, "loss": 0.3741, "step": 51020 }, { "epoch": 1000.59, "learning_rate": 1.1042460862472905e-05, "loss": 0.3779, "step": 51030 }, { "epoch": 1000.78, "learning_rate": 1.1021303711026468e-05, "loss": 0.3747, "step": 51040 }, { "epoch": 1000.98, "learning_rate": 1.1000165239932507e-05, "loss": 0.3738, "step": 51050 }, { "epoch": 1001.0, "eval_loss": 0.37811383605003357, "eval_runtime": 2.1934, "eval_samples_per_second": 1039.022, "eval_steps_per_second": 4.103, "step": 51051 }, { "epoch": 1001.18, "learning_rate": 1.0979045455362948e-05, "loss": 0.3778, "step": 51060 }, { "epoch": 1001.37, "learning_rate": 1.09579443634843e-05, "loss": 0.3713, "step": 51070 }, { "epoch": 1001.57, "learning_rate": 1.0936861970457644e-05, "loss": 0.3763, "step": 51080 }, { "epoch": 1001.76, "learning_rate": 1.0915798282438531e-05, "loss": 0.3775, "step": 51090 }, { "epoch": 1001.96, "learning_rate": 1.0894753305577116e-05, "loss": 0.3779, "step": 51100 }, { "epoch": 1002.0, "eval_loss": 0.38506320118904114, "eval_runtime": 2.3112, "eval_samples_per_second": 986.049, "eval_steps_per_second": 3.894, "step": 51102 }, { "epoch": 1002.16, "learning_rate": 1.0873727046018036e-05, "loss": 0.3746, "step": 51110 }, { "epoch": 1002.35, "learning_rate": 1.085271950990051e-05, "loss": 0.3796, "step": 51120 }, { "epoch": 1002.55, "learning_rate": 1.0831730703358265e-05, "loss": 0.3764, "step": 51130 }, { "epoch": 1002.75, "learning_rate": 1.081076063251956e-05, "loss": 0.3795, "step": 51140 }, { "epoch": 1002.94, "learning_rate": 1.0789809303507205e-05, "loss": 0.3735, "step": 51150 }, { "epoch": 1003.0, "eval_loss": 0.3844279646873474, "eval_runtime": 2.3307, "eval_samples_per_second": 977.811, "eval_steps_per_second": 3.861, "step": 51153 }, { "epoch": 1003.14, "learning_rate": 1.0768876722438487e-05, "loss": 0.376, "step": 51160 }, { "epoch": 1003.33, "learning_rate": 1.0747962895425272e-05, "loss": 0.3744, "step": 51170 }, { "epoch": 1003.53, "learning_rate": 1.0727067828573937e-05, "loss": 0.376, "step": 51180 }, { "epoch": 1003.73, "learning_rate": 1.0706191527985389e-05, "loss": 0.3746, "step": 51190 }, { "epoch": 1003.92, "learning_rate": 1.0685333999755017e-05, "loss": 0.3753, "step": 51200 }, { "epoch": 1004.0, "eval_loss": 0.3840962052345276, "eval_runtime": 2.2584, "eval_samples_per_second": 1009.122, "eval_steps_per_second": 3.985, "step": 51204 }, { "epoch": 1004.12, "learning_rate": 1.0664495249972749e-05, "loss": 0.3796, "step": 51210 }, { "epoch": 1004.31, "learning_rate": 1.0643675284723043e-05, "loss": 0.3747, "step": 51220 }, { "epoch": 1004.51, "learning_rate": 1.0622874110084873e-05, "loss": 0.3754, "step": 51230 }, { "epoch": 1004.71, "learning_rate": 1.0602091732131727e-05, "loss": 0.3759, "step": 51240 }, { "epoch": 1004.9, "learning_rate": 1.0581328156931559e-05, "loss": 0.3701, "step": 51250 }, { "epoch": 1005.0, "eval_loss": 0.3804880380630493, "eval_runtime": 2.256, "eval_samples_per_second": 1010.217, "eval_steps_per_second": 3.989, "step": 51255 }, { "epoch": 1005.1, "learning_rate": 1.0560583390546923e-05, "loss": 0.3763, "step": 51260 }, { "epoch": 1005.29, "learning_rate": 1.053985743903477e-05, "loss": 0.3766, "step": 51270 }, { "epoch": 1005.49, "learning_rate": 1.0519150308446655e-05, "loss": 0.3795, "step": 51280 }, { "epoch": 1005.69, "learning_rate": 1.0498462004828598e-05, "loss": 0.3769, "step": 51290 }, { "epoch": 1005.88, "learning_rate": 1.04777925342211e-05, "loss": 0.3738, "step": 51300 }, { "epoch": 1006.0, "eval_loss": 0.3825666904449463, "eval_runtime": 2.2233, "eval_samples_per_second": 1025.034, "eval_steps_per_second": 4.048, "step": 51306 }, { "epoch": 1006.08, "learning_rate": 1.0457141902659208e-05, "loss": 0.373, "step": 51310 }, { "epoch": 1006.27, "learning_rate": 1.0436510116172425e-05, "loss": 0.374, "step": 51320 }, { "epoch": 1006.47, "learning_rate": 1.0415897180784774e-05, "loss": 0.375, "step": 51330 }, { "epoch": 1006.67, "learning_rate": 1.0395303102514807e-05, "loss": 0.3782, "step": 51340 }, { "epoch": 1006.86, "learning_rate": 1.0374727887375481e-05, "loss": 0.3729, "step": 51350 }, { "epoch": 1007.0, "eval_loss": 0.3792899549007416, "eval_runtime": 2.2748, "eval_samples_per_second": 1001.838, "eval_steps_per_second": 3.956, "step": 51357 }, { "epoch": 1007.06, "learning_rate": 1.0354171541374356e-05, "loss": 0.3748, "step": 51360 }, { "epoch": 1007.25, "learning_rate": 1.0333634070513375e-05, "loss": 0.3737, "step": 51370 }, { "epoch": 1007.45, "learning_rate": 1.0313115480789047e-05, "loss": 0.3785, "step": 51380 }, { "epoch": 1007.65, "learning_rate": 1.0292615778192348e-05, "loss": 0.3722, "step": 51390 }, { "epoch": 1007.84, "learning_rate": 1.027213496870874e-05, "loss": 0.3765, "step": 51400 }, { "epoch": 1008.0, "eval_loss": 0.38250917196273804, "eval_runtime": 2.4031, "eval_samples_per_second": 948.347, "eval_steps_per_second": 3.745, "step": 51408 }, { "epoch": 1008.04, "learning_rate": 1.0251673058318147e-05, "loss": 0.3738, "step": 51410 }, { "epoch": 1008.24, "learning_rate": 1.0231230052994974e-05, "loss": 0.3751, "step": 51420 }, { "epoch": 1008.43, "learning_rate": 1.0210805958708145e-05, "loss": 0.3736, "step": 51430 }, { "epoch": 1008.63, "learning_rate": 1.0190400781421035e-05, "loss": 0.3752, "step": 51440 }, { "epoch": 1008.82, "learning_rate": 1.0170014527091524e-05, "loss": 0.3725, "step": 51450 }, { "epoch": 1009.0, "eval_loss": 0.38174739480018616, "eval_runtime": 2.3742, "eval_samples_per_second": 959.913, "eval_steps_per_second": 3.791, "step": 51459 }, { "epoch": 1009.02, "learning_rate": 1.0149647201671904e-05, "loss": 0.3754, "step": 51460 }, { "epoch": 1009.22, "learning_rate": 1.0129298811109015e-05, "loss": 0.3773, "step": 51470 }, { "epoch": 1009.41, "learning_rate": 1.0108969361344099e-05, "loss": 0.3749, "step": 51480 }, { "epoch": 1009.61, "learning_rate": 1.0088658858312914e-05, "loss": 0.3774, "step": 51490 }, { "epoch": 1009.8, "learning_rate": 1.0068367307945702e-05, "loss": 0.3746, "step": 51500 }, { "epoch": 1010.0, "learning_rate": 1.0048094716167095e-05, "loss": 0.3766, "step": 51510 }, { "epoch": 1010.0, "eval_loss": 0.38128504157066345, "eval_runtime": 2.2597, "eval_samples_per_second": 1008.534, "eval_steps_per_second": 3.983, "step": 51510 }, { "epoch": 1010.2, "learning_rate": 1.0027841088896289e-05, "loss": 0.3742, "step": 51520 }, { "epoch": 1010.39, "learning_rate": 1.0007606432046846e-05, "loss": 0.3758, "step": 51530 }, { "epoch": 1010.59, "learning_rate": 9.987390751526855e-06, "loss": 0.3777, "step": 51540 }, { "epoch": 1010.78, "learning_rate": 9.96719405323885e-06, "loss": 0.3797, "step": 51550 }, { "epoch": 1010.98, "learning_rate": 9.947016343079806e-06, "loss": 0.3736, "step": 51560 }, { "epoch": 1011.0, "eval_loss": 0.38342854380607605, "eval_runtime": 2.256, "eval_samples_per_second": 1010.174, "eval_steps_per_second": 3.989, "step": 51561 }, { "epoch": 1011.18, "learning_rate": 9.926857626941176e-06, "loss": 0.3724, "step": 51570 }, { "epoch": 1011.37, "learning_rate": 9.906717910708828e-06, "loss": 0.3753, "step": 51580 }, { "epoch": 1011.57, "learning_rate": 9.886597200263132e-06, "loss": 0.3776, "step": 51590 }, { "epoch": 1011.76, "learning_rate": 9.866495501478891e-06, "loss": 0.3789, "step": 51600 }, { "epoch": 1011.96, "learning_rate": 9.846412820225358e-06, "loss": 0.3747, "step": 51610 }, { "epoch": 1012.0, "eval_loss": 0.38004985451698303, "eval_runtime": 2.2958, "eval_samples_per_second": 992.701, "eval_steps_per_second": 3.92, "step": 51612 }, { "epoch": 1012.16, "learning_rate": 9.82634916236621e-06, "loss": 0.375, "step": 51620 }, { "epoch": 1012.35, "learning_rate": 9.806304533759576e-06, "loss": 0.3746, "step": 51630 }, { "epoch": 1012.55, "learning_rate": 9.78627894025806e-06, "loss": 0.3714, "step": 51640 }, { "epoch": 1012.75, "learning_rate": 9.766272387708693e-06, "loss": 0.3699, "step": 51650 }, { "epoch": 1012.94, "learning_rate": 9.746284881952942e-06, "loss": 0.3726, "step": 51660 }, { "epoch": 1013.0, "eval_loss": 0.381724089384079, "eval_runtime": 2.2926, "eval_samples_per_second": 994.086, "eval_steps_per_second": 3.926, "step": 51663 }, { "epoch": 1013.14, "learning_rate": 9.726316428826717e-06, "loss": 0.3761, "step": 51670 }, { "epoch": 1013.33, "learning_rate": 9.706367034160326e-06, "loss": 0.3737, "step": 51680 }, { "epoch": 1013.53, "learning_rate": 9.686436703778577e-06, "loss": 0.3765, "step": 51690 }, { "epoch": 1013.73, "learning_rate": 9.666525443500667e-06, "loss": 0.3723, "step": 51700 }, { "epoch": 1013.92, "learning_rate": 9.646633259140276e-06, "loss": 0.3819, "step": 51710 }, { "epoch": 1014.0, "eval_loss": 0.3839859068393707, "eval_runtime": 2.2503, "eval_samples_per_second": 1012.774, "eval_steps_per_second": 4.0, "step": 51714 }, { "epoch": 1014.12, "learning_rate": 9.626760156505429e-06, "loss": 0.3707, "step": 51720 }, { "epoch": 1014.31, "learning_rate": 9.60690614139867e-06, "loss": 0.3738, "step": 51730 }, { "epoch": 1014.51, "learning_rate": 9.587071219616918e-06, "loss": 0.3731, "step": 51740 }, { "epoch": 1014.71, "learning_rate": 9.567255396951478e-06, "loss": 0.3739, "step": 51750 }, { "epoch": 1014.9, "learning_rate": 9.5474586791882e-06, "loss": 0.3799, "step": 51760 }, { "epoch": 1015.0, "eval_loss": 0.38338810205459595, "eval_runtime": 2.2465, "eval_samples_per_second": 1014.485, "eval_steps_per_second": 4.006, "step": 51765 }, { "epoch": 1015.1, "learning_rate": 9.527681072107249e-06, "loss": 0.379, "step": 51770 }, { "epoch": 1015.29, "learning_rate": 9.507922581483257e-06, "loss": 0.3778, "step": 51780 }, { "epoch": 1015.49, "learning_rate": 9.488183213085243e-06, "loss": 0.3747, "step": 51790 }, { "epoch": 1015.69, "learning_rate": 9.46846297267668e-06, "loss": 0.3726, "step": 51800 }, { "epoch": 1015.88, "learning_rate": 9.448761866015445e-06, "loss": 0.3754, "step": 51810 }, { "epoch": 1016.0, "eval_loss": 0.3817760944366455, "eval_runtime": 2.3863, "eval_samples_per_second": 955.027, "eval_steps_per_second": 3.771, "step": 51816 }, { "epoch": 1016.08, "learning_rate": 9.429079898853795e-06, "loss": 0.3722, "step": 51820 }, { "epoch": 1016.27, "learning_rate": 9.409417076938457e-06, "loss": 0.3708, "step": 51830 }, { "epoch": 1016.47, "learning_rate": 9.389773406010509e-06, "loss": 0.37, "step": 51840 }, { "epoch": 1016.67, "learning_rate": 9.370148891805467e-06, "loss": 0.373, "step": 51850 }, { "epoch": 1016.86, "learning_rate": 9.350543540053268e-06, "loss": 0.3762, "step": 51860 }, { "epoch": 1017.0, "eval_loss": 0.37691184878349304, "eval_runtime": 2.2427, "eval_samples_per_second": 1016.193, "eval_steps_per_second": 4.013, "step": 51867 }, { "epoch": 1017.06, "learning_rate": 9.330957356478248e-06, "loss": 0.3805, "step": 51870 }, { "epoch": 1017.25, "learning_rate": 9.311390346799114e-06, "loss": 0.3737, "step": 51880 }, { "epoch": 1017.45, "learning_rate": 9.29184251672899e-06, "loss": 0.3775, "step": 51890 }, { "epoch": 1017.65, "learning_rate": 9.27231387197541e-06, "loss": 0.3714, "step": 51900 }, { "epoch": 1017.84, "learning_rate": 9.252804418240312e-06, "loss": 0.3718, "step": 51910 }, { "epoch": 1018.0, "eval_loss": 0.3794402480125427, "eval_runtime": 2.2234, "eval_samples_per_second": 1025.004, "eval_steps_per_second": 4.048, "step": 51918 }, { "epoch": 1018.04, "learning_rate": 9.23331416122004e-06, "loss": 0.3731, "step": 51920 }, { "epoch": 1018.24, "learning_rate": 9.213843106605267e-06, "loss": 0.3782, "step": 51930 }, { "epoch": 1018.43, "learning_rate": 9.194391260081163e-06, "loss": 0.3725, "step": 51940 }, { "epoch": 1018.63, "learning_rate": 9.174958627327191e-06, "loss": 0.3746, "step": 51950 }, { "epoch": 1018.82, "learning_rate": 9.155545214017232e-06, "loss": 0.3785, "step": 51960 }, { "epoch": 1019.0, "eval_loss": 0.3825004994869232, "eval_runtime": 2.2245, "eval_samples_per_second": 1024.505, "eval_steps_per_second": 4.046, "step": 51969 }, { "epoch": 1019.02, "learning_rate": 9.136151025819633e-06, "loss": 0.3729, "step": 51970 }, { "epoch": 1019.22, "learning_rate": 9.116776068397006e-06, "loss": 0.3754, "step": 51980 }, { "epoch": 1019.41, "learning_rate": 9.097420347406442e-06, "loss": 0.3786, "step": 51990 }, { "epoch": 1019.61, "learning_rate": 9.078083868499356e-06, "loss": 0.3759, "step": 52000 }, { "epoch": 1019.8, "learning_rate": 9.05876663732158e-06, "loss": 0.3697, "step": 52010 }, { "epoch": 1020.0, "learning_rate": 9.039468659513327e-06, "loss": 0.3754, "step": 52020 }, { "epoch": 1020.0, "eval_loss": 0.38265079259872437, "eval_runtime": 2.199, "eval_samples_per_second": 1036.381, "eval_steps_per_second": 4.093, "step": 52020 }, { "epoch": 1020.2, "learning_rate": 9.02018994070914e-06, "loss": 0.376, "step": 52030 }, { "epoch": 1020.39, "learning_rate": 9.000930486538026e-06, "loss": 0.3739, "step": 52040 }, { "epoch": 1020.59, "learning_rate": 8.981690302623263e-06, "loss": 0.3717, "step": 52050 }, { "epoch": 1020.78, "learning_rate": 8.962469394582587e-06, "loss": 0.3768, "step": 52060 }, { "epoch": 1020.98, "learning_rate": 8.943267768028068e-06, "loss": 0.374, "step": 52070 }, { "epoch": 1021.0, "eval_loss": 0.3817632496356964, "eval_runtime": 2.293, "eval_samples_per_second": 993.877, "eval_steps_per_second": 3.925, "step": 52071 }, { "epoch": 1021.18, "learning_rate": 8.924085428566163e-06, "loss": 0.3702, "step": 52080 }, { "epoch": 1021.37, "learning_rate": 8.904922381797677e-06, "loss": 0.3725, "step": 52090 }, { "epoch": 1021.57, "learning_rate": 8.885778633317783e-06, "loss": 0.3782, "step": 52100 }, { "epoch": 1021.76, "learning_rate": 8.866654188716035e-06, "loss": 0.3683, "step": 52110 }, { "epoch": 1021.96, "learning_rate": 8.847549053576342e-06, "loss": 0.3785, "step": 52120 }, { "epoch": 1022.0, "eval_loss": 0.3780389428138733, "eval_runtime": 2.2927, "eval_samples_per_second": 994.038, "eval_steps_per_second": 3.926, "step": 52122 }, { "epoch": 1022.16, "learning_rate": 8.828463233477e-06, "loss": 0.3754, "step": 52130 }, { "epoch": 1022.35, "learning_rate": 8.809396733990615e-06, "loss": 0.3757, "step": 52140 }, { "epoch": 1022.55, "learning_rate": 8.790349560684203e-06, "loss": 0.3749, "step": 52150 }, { "epoch": 1022.75, "learning_rate": 8.771321719119101e-06, "loss": 0.3733, "step": 52160 }, { "epoch": 1022.94, "learning_rate": 8.75231321485098e-06, "loss": 0.3735, "step": 52170 }, { "epoch": 1023.0, "eval_loss": 0.3814985752105713, "eval_runtime": 2.374, "eval_samples_per_second": 959.969, "eval_steps_per_second": 3.791, "step": 52173 }, { "epoch": 1023.14, "learning_rate": 8.733324053429963e-06, "loss": 0.3719, "step": 52180 }, { "epoch": 1023.33, "learning_rate": 8.71435424040042e-06, "loss": 0.3746, "step": 52190 }, { "epoch": 1023.53, "learning_rate": 8.695403781301144e-06, "loss": 0.3718, "step": 52200 }, { "epoch": 1023.73, "learning_rate": 8.676472681665208e-06, "loss": 0.3755, "step": 52210 }, { "epoch": 1023.92, "learning_rate": 8.657560947020093e-06, "loss": 0.3726, "step": 52220 }, { "epoch": 1024.0, "eval_loss": 0.3794108033180237, "eval_runtime": 2.2464, "eval_samples_per_second": 1014.494, "eval_steps_per_second": 4.006, "step": 52224 }, { "epoch": 1024.12, "learning_rate": 8.63866858288762e-06, "loss": 0.3789, "step": 52230 }, { "epoch": 1024.31, "learning_rate": 8.619795594783896e-06, "loss": 0.3744, "step": 52240 }, { "epoch": 1024.51, "learning_rate": 8.600941988219453e-06, "loss": 0.375, "step": 52250 }, { "epoch": 1024.71, "learning_rate": 8.582107768699098e-06, "loss": 0.3722, "step": 52260 }, { "epoch": 1024.9, "learning_rate": 8.563292941722004e-06, "loss": 0.3798, "step": 52270 }, { "epoch": 1025.0, "eval_loss": 0.378730446100235, "eval_runtime": 2.3701, "eval_samples_per_second": 961.574, "eval_steps_per_second": 3.797, "step": 52275 }, { "epoch": 1025.1, "learning_rate": 8.544497512781697e-06, "loss": 0.3745, "step": 52280 }, { "epoch": 1025.29, "learning_rate": 8.525721487366027e-06, "loss": 0.3725, "step": 52290 }, { "epoch": 1025.49, "learning_rate": 8.506964870957159e-06, "loss": 0.3762, "step": 52300 }, { "epoch": 1025.69, "learning_rate": 8.488227669031594e-06, "loss": 0.3732, "step": 52310 }, { "epoch": 1025.88, "learning_rate": 8.4695098870602e-06, "loss": 0.3714, "step": 52320 }, { "epoch": 1026.0, "eval_loss": 0.3809713125228882, "eval_runtime": 2.3518, "eval_samples_per_second": 969.036, "eval_steps_per_second": 3.827, "step": 52326 }, { "epoch": 1026.08, "learning_rate": 8.450811530508136e-06, "loss": 0.3731, "step": 52330 }, { "epoch": 1026.27, "learning_rate": 8.432132604834938e-06, "loss": 0.3736, "step": 52340 }, { "epoch": 1026.47, "learning_rate": 8.413473115494407e-06, "loss": 0.3721, "step": 52350 }, { "epoch": 1026.67, "learning_rate": 8.394833067934687e-06, "loss": 0.378, "step": 52360 }, { "epoch": 1026.86, "learning_rate": 8.37621246759829e-06, "loss": 0.3776, "step": 52370 }, { "epoch": 1027.0, "eval_loss": 0.3787022829055786, "eval_runtime": 2.2896, "eval_samples_per_second": 995.38, "eval_steps_per_second": 3.931, "step": 52377 }, { "epoch": 1027.06, "learning_rate": 8.357611319921967e-06, "loss": 0.3712, "step": 52380 }, { "epoch": 1027.25, "learning_rate": 8.3390296303369e-06, "loss": 0.3721, "step": 52390 }, { "epoch": 1027.45, "learning_rate": 8.320467404268479e-06, "loss": 0.3758, "step": 52400 }, { "epoch": 1027.65, "learning_rate": 8.301924647136499e-06, "loss": 0.3751, "step": 52410 }, { "epoch": 1027.84, "learning_rate": 8.283401364354999e-06, "loss": 0.3688, "step": 52420 }, { "epoch": 1028.0, "eval_loss": 0.37706291675567627, "eval_runtime": 2.2642, "eval_samples_per_second": 1006.53, "eval_steps_per_second": 3.975, "step": 52428 }, { "epoch": 1028.04, "learning_rate": 8.264897561332357e-06, "loss": 0.3715, "step": 52430 }, { "epoch": 1028.24, "learning_rate": 8.246413243471315e-06, "loss": 0.3757, "step": 52440 }, { "epoch": 1028.43, "learning_rate": 8.22794841616884e-06, "loss": 0.3712, "step": 52450 }, { "epoch": 1028.63, "learning_rate": 8.209503084816285e-06, "loss": 0.3777, "step": 52460 }, { "epoch": 1028.82, "learning_rate": 8.191077254799244e-06, "loss": 0.375, "step": 52470 }, { "epoch": 1029.0, "eval_loss": 0.3775680661201477, "eval_runtime": 2.3853, "eval_samples_per_second": 955.43, "eval_steps_per_second": 3.773, "step": 52479 }, { "epoch": 1029.02, "learning_rate": 8.172670931497655e-06, "loss": 0.3781, "step": 52480 }, { "epoch": 1029.22, "learning_rate": 8.154284120285775e-06, "loss": 0.3723, "step": 52490 }, { "epoch": 1029.41, "learning_rate": 8.135916826532112e-06, "loss": 0.3737, "step": 52500 }, { "epoch": 1029.61, "learning_rate": 8.117569055599543e-06, "loss": 0.3723, "step": 52510 }, { "epoch": 1029.8, "learning_rate": 8.099240812845173e-06, "loss": 0.3709, "step": 52520 }, { "epoch": 1030.0, "learning_rate": 8.080932103620446e-06, "loss": 0.372, "step": 52530 }, { "epoch": 1030.0, "eval_loss": 0.3795132339000702, "eval_runtime": 2.3155, "eval_samples_per_second": 984.231, "eval_steps_per_second": 3.887, "step": 52530 }, { "epoch": 1030.2, "learning_rate": 8.062642933271104e-06, "loss": 0.3769, "step": 52540 }, { "epoch": 1030.39, "learning_rate": 8.044373307137201e-06, "loss": 0.3697, "step": 52550 }, { "epoch": 1030.59, "learning_rate": 8.026123230553033e-06, "loss": 0.3753, "step": 52560 }, { "epoch": 1030.78, "learning_rate": 8.0078927088472e-06, "loss": 0.3695, "step": 52570 }, { "epoch": 1030.98, "learning_rate": 7.98968174734265e-06, "loss": 0.3736, "step": 52580 }, { "epoch": 1031.0, "eval_loss": 0.3780902624130249, "eval_runtime": 2.2445, "eval_samples_per_second": 1015.369, "eval_steps_per_second": 4.01, "step": 52581 }, { "epoch": 1031.18, "learning_rate": 7.971490351356521e-06, "loss": 0.3718, "step": 52590 }, { "epoch": 1031.37, "learning_rate": 7.953318526200358e-06, "loss": 0.3723, "step": 52600 }, { "epoch": 1031.57, "learning_rate": 7.935166277179884e-06, "loss": 0.3737, "step": 52610 }, { "epoch": 1031.76, "learning_rate": 7.91703360959518e-06, "loss": 0.3744, "step": 52620 }, { "epoch": 1031.96, "learning_rate": 7.898920528740566e-06, "loss": 0.3713, "step": 52630 }, { "epoch": 1032.0, "eval_loss": 0.3815433084964752, "eval_runtime": 2.225, "eval_samples_per_second": 1024.291, "eval_steps_per_second": 4.045, "step": 52632 }, { "epoch": 1032.16, "learning_rate": 7.880827039904633e-06, "loss": 0.3725, "step": 52640 }, { "epoch": 1032.35, "learning_rate": 7.862753148370331e-06, "loss": 0.3678, "step": 52650 }, { "epoch": 1032.55, "learning_rate": 7.844698859414783e-06, "loss": 0.3763, "step": 52660 }, { "epoch": 1032.75, "learning_rate": 7.826664178309477e-06, "loss": 0.3692, "step": 52670 }, { "epoch": 1032.94, "learning_rate": 7.808649110320111e-06, "loss": 0.3772, "step": 52680 }, { "epoch": 1033.0, "eval_loss": 0.38015732169151306, "eval_runtime": 2.2304, "eval_samples_per_second": 1021.789, "eval_steps_per_second": 4.035, "step": 52683 }, { "epoch": 1033.14, "learning_rate": 7.790653660706686e-06, "loss": 0.3714, "step": 52690 }, { "epoch": 1033.33, "learning_rate": 7.772677834723498e-06, "loss": 0.3774, "step": 52700 }, { "epoch": 1033.53, "learning_rate": 7.75472163761905e-06, "loss": 0.3751, "step": 52710 }, { "epoch": 1033.73, "learning_rate": 7.736785074636179e-06, "loss": 0.3657, "step": 52720 }, { "epoch": 1033.92, "learning_rate": 7.71886815101194e-06, "loss": 0.375, "step": 52730 }, { "epoch": 1034.0, "eval_loss": 0.37879452109336853, "eval_runtime": 2.2594, "eval_samples_per_second": 1008.66, "eval_steps_per_second": 3.983, "step": 52734 }, { "epoch": 1034.12, "learning_rate": 7.700970871977687e-06, "loss": 0.3721, "step": 52740 }, { "epoch": 1034.31, "learning_rate": 7.68309324275902e-06, "loss": 0.375, "step": 52750 }, { "epoch": 1034.51, "learning_rate": 7.665235268575835e-06, "loss": 0.3738, "step": 52760 }, { "epoch": 1034.71, "learning_rate": 7.647396954642235e-06, "loss": 0.3723, "step": 52770 }, { "epoch": 1034.9, "learning_rate": 7.629578306166607e-06, "loss": 0.3725, "step": 52780 }, { "epoch": 1035.0, "eval_loss": 0.3818568289279938, "eval_runtime": 2.272, "eval_samples_per_second": 1003.103, "eval_steps_per_second": 3.961, "step": 52785 }, { "epoch": 1035.1, "learning_rate": 7.6117793283516196e-06, "loss": 0.3697, "step": 52790 }, { "epoch": 1035.29, "learning_rate": 7.594000026394134e-06, "loss": 0.3768, "step": 52800 }, { "epoch": 1035.49, "learning_rate": 7.576240405485373e-06, "loss": 0.3731, "step": 52810 }, { "epoch": 1035.69, "learning_rate": 7.558500470810697e-06, "loss": 0.3749, "step": 52820 }, { "epoch": 1035.88, "learning_rate": 7.540780227549811e-06, "loss": 0.3696, "step": 52830 }, { "epoch": 1036.0, "eval_loss": 0.38364723324775696, "eval_runtime": 2.3835, "eval_samples_per_second": 956.169, "eval_steps_per_second": 3.776, "step": 52836 }, { "epoch": 1036.08, "learning_rate": 7.523079680876612e-06, "loss": 0.372, "step": 52840 }, { "epoch": 1036.27, "learning_rate": 7.50539883595924e-06, "loss": 0.3726, "step": 52850 }, { "epoch": 1036.47, "learning_rate": 7.487737697960155e-06, "loss": 0.3733, "step": 52860 }, { "epoch": 1036.67, "learning_rate": 7.470096272035978e-06, "loss": 0.3738, "step": 52870 }, { "epoch": 1036.86, "learning_rate": 7.452474563337643e-06, "loss": 0.3741, "step": 52880 }, { "epoch": 1037.0, "eval_loss": 0.3813818693161011, "eval_runtime": 2.2919, "eval_samples_per_second": 994.355, "eval_steps_per_second": 3.927, "step": 52887 }, { "epoch": 1037.06, "learning_rate": 7.43487257701027e-06, "loss": 0.3728, "step": 52890 }, { "epoch": 1037.25, "learning_rate": 7.417290318193247e-06, "loss": 0.3696, "step": 52900 }, { "epoch": 1037.45, "learning_rate": 7.399727792020235e-06, "loss": 0.3743, "step": 52910 }, { "epoch": 1037.65, "learning_rate": 7.382185003619048e-06, "loss": 0.3764, "step": 52920 }, { "epoch": 1037.84, "learning_rate": 7.364661958111839e-06, "loss": 0.3734, "step": 52930 }, { "epoch": 1038.0, "eval_loss": 0.37986841797828674, "eval_runtime": 2.3307, "eval_samples_per_second": 977.807, "eval_steps_per_second": 3.861, "step": 52938 }, { "epoch": 1038.04, "learning_rate": 7.347158660614907e-06, "loss": 0.3711, "step": 52940 }, { "epoch": 1038.24, "learning_rate": 7.3296751162388475e-06, "loss": 0.3734, "step": 52950 }, { "epoch": 1038.43, "learning_rate": 7.3122113300884525e-06, "loss": 0.3741, "step": 52960 }, { "epoch": 1038.63, "learning_rate": 7.294767307262784e-06, "loss": 0.373, "step": 52970 }, { "epoch": 1038.82, "learning_rate": 7.277343052855084e-06, "loss": 0.3759, "step": 52980 }, { "epoch": 1039.0, "eval_loss": 0.3788532614707947, "eval_runtime": 2.3042, "eval_samples_per_second": 989.052, "eval_steps_per_second": 3.906, "step": 52989 }, { "epoch": 1039.02, "learning_rate": 7.259938571952833e-06, "loss": 0.3736, "step": 52990 }, { "epoch": 1039.22, "learning_rate": 7.242553869637793e-06, "loss": 0.3782, "step": 53000 }, { "epoch": 1039.41, "learning_rate": 7.225188950985852e-06, "loss": 0.3711, "step": 53010 }, { "epoch": 1039.61, "learning_rate": 7.207843821067239e-06, "loss": 0.3719, "step": 53020 }, { "epoch": 1039.8, "learning_rate": 7.190518484946309e-06, "loss": 0.376, "step": 53030 }, { "epoch": 1040.0, "learning_rate": 7.173212947681692e-06, "loss": 0.3726, "step": 53040 }, { "epoch": 1040.0, "eval_loss": 0.38017430901527405, "eval_runtime": 2.2059, "eval_samples_per_second": 1033.148, "eval_steps_per_second": 4.08, "step": 53040 }, { "epoch": 1040.2, "learning_rate": 7.155927214326213e-06, "loss": 0.3687, "step": 53050 }, { "epoch": 1040.39, "learning_rate": 7.138661289926892e-06, "loss": 0.3779, "step": 53060 }, { "epoch": 1040.59, "learning_rate": 7.121415179525039e-06, "loss": 0.3764, "step": 53070 }, { "epoch": 1040.78, "learning_rate": 7.104188888156109e-06, "loss": 0.3772, "step": 53080 }, { "epoch": 1040.98, "learning_rate": 7.086982420849812e-06, "loss": 0.3693, "step": 53090 }, { "epoch": 1041.0, "eval_loss": 0.37691381573677063, "eval_runtime": 2.2297, "eval_samples_per_second": 1022.133, "eval_steps_per_second": 4.037, "step": 53091 }, { "epoch": 1041.18, "learning_rate": 7.069795782630039e-06, "loss": 0.3744, "step": 53100 }, { "epoch": 1041.37, "learning_rate": 7.0526289785148824e-06, "loss": 0.3716, "step": 53110 }, { "epoch": 1041.57, "learning_rate": 7.035482013516716e-06, "loss": 0.3705, "step": 53120 }, { "epoch": 1041.76, "learning_rate": 7.018354892642028e-06, "loss": 0.3755, "step": 53130 }, { "epoch": 1041.96, "learning_rate": 7.001247620891592e-06, "loss": 0.3705, "step": 53140 }, { "epoch": 1042.0, "eval_loss": 0.3811741769313812, "eval_runtime": 2.3416, "eval_samples_per_second": 973.249, "eval_steps_per_second": 3.843, "step": 53142 }, { "epoch": 1042.16, "learning_rate": 6.984160203260323e-06, "loss": 0.3728, "step": 53150 }, { "epoch": 1042.35, "learning_rate": 6.967092644737368e-06, "loss": 0.3718, "step": 53160 }, { "epoch": 1042.55, "learning_rate": 6.950044950306094e-06, "loss": 0.3709, "step": 53170 }, { "epoch": 1042.75, "learning_rate": 6.9330171249440184e-06, "loss": 0.374, "step": 53180 }, { "epoch": 1042.94, "learning_rate": 6.916009173622914e-06, "loss": 0.3691, "step": 53190 }, { "epoch": 1043.0, "eval_loss": 0.3806150257587433, "eval_runtime": 2.3941, "eval_samples_per_second": 951.93, "eval_steps_per_second": 3.759, "step": 53193 }, { "epoch": 1043.14, "learning_rate": 6.899021101308699e-06, "loss": 0.3748, "step": 53200 }, { "epoch": 1043.33, "learning_rate": 6.882052912961533e-06, "loss": 0.3745, "step": 53210 }, { "epoch": 1043.53, "learning_rate": 6.865104613535718e-06, "loss": 0.3736, "step": 53220 }, { "epoch": 1043.73, "learning_rate": 6.848176207979822e-06, "loss": 0.3703, "step": 53230 }, { "epoch": 1043.92, "learning_rate": 6.83126770123654e-06, "loss": 0.3736, "step": 53240 }, { "epoch": 1044.0, "eval_loss": 0.3796224892139435, "eval_runtime": 2.3263, "eval_samples_per_second": 979.664, "eval_steps_per_second": 3.869, "step": 53244 }, { "epoch": 1044.12, "learning_rate": 6.814379098242773e-06, "loss": 0.3684, "step": 53250 }, { "epoch": 1044.31, "learning_rate": 6.7975104039296266e-06, "loss": 0.372, "step": 53260 }, { "epoch": 1044.51, "learning_rate": 6.780661623222361e-06, "loss": 0.3715, "step": 53270 }, { "epoch": 1044.71, "learning_rate": 6.763832761040483e-06, "loss": 0.3704, "step": 53280 }, { "epoch": 1044.9, "learning_rate": 6.747023822297612e-06, "loss": 0.3707, "step": 53290 }, { "epoch": 1045.0, "eval_loss": 0.3784136474132538, "eval_runtime": 2.3444, "eval_samples_per_second": 972.094, "eval_steps_per_second": 3.839, "step": 53295 }, { "epoch": 1045.1, "learning_rate": 6.730234811901614e-06, "loss": 0.3733, "step": 53300 }, { "epoch": 1045.29, "learning_rate": 6.713465734754475e-06, "loss": 0.3748, "step": 53310 }, { "epoch": 1045.49, "learning_rate": 6.696716595752388e-06, "loss": 0.3723, "step": 53320 }, { "epoch": 1045.69, "learning_rate": 6.679987399785766e-06, "loss": 0.3714, "step": 53330 }, { "epoch": 1045.88, "learning_rate": 6.663278151739135e-06, "loss": 0.3735, "step": 53340 }, { "epoch": 1046.0, "eval_loss": 0.3752482831478119, "eval_runtime": 2.3265, "eval_samples_per_second": 979.576, "eval_steps_per_second": 3.868, "step": 53346 }, { "epoch": 1046.08, "learning_rate": 6.646588856491234e-06, "loss": 0.3691, "step": 53350 }, { "epoch": 1046.27, "learning_rate": 6.629919518914939e-06, "loss": 0.3676, "step": 53360 }, { "epoch": 1046.47, "learning_rate": 6.61327014387735e-06, "loss": 0.3704, "step": 53370 }, { "epoch": 1046.67, "learning_rate": 6.59664073623972e-06, "loss": 0.3789, "step": 53380 }, { "epoch": 1046.86, "learning_rate": 6.580031300857438e-06, "loss": 0.3773, "step": 53390 }, { "epoch": 1047.0, "eval_loss": 0.38012462854385376, "eval_runtime": 2.3216, "eval_samples_per_second": 981.637, "eval_steps_per_second": 3.877, "step": 53397 }, { "epoch": 1047.06, "learning_rate": 6.563441842580111e-06, "loss": 0.3798, "step": 53400 }, { "epoch": 1047.25, "learning_rate": 6.54687236625148e-06, "loss": 0.3741, "step": 53410 }, { "epoch": 1047.45, "learning_rate": 6.530322876709465e-06, "loss": 0.3705, "step": 53420 }, { "epoch": 1047.65, "learning_rate": 6.513793378786136e-06, "loss": 0.3742, "step": 53430 }, { "epoch": 1047.84, "learning_rate": 6.4972838773077655e-06, "loss": 0.3714, "step": 53440 }, { "epoch": 1048.0, "eval_loss": 0.38000649213790894, "eval_runtime": 2.2751, "eval_samples_per_second": 1001.725, "eval_steps_per_second": 3.956, "step": 53448 }, { "epoch": 1048.04, "learning_rate": 6.4807943770947475e-06, "loss": 0.3758, "step": 53450 }, { "epoch": 1048.24, "learning_rate": 6.46432488296163e-06, "loss": 0.3754, "step": 53460 }, { "epoch": 1048.43, "learning_rate": 6.4478753997171675e-06, "loss": 0.3754, "step": 53470 }, { "epoch": 1048.63, "learning_rate": 6.4314459321642e-06, "loss": 0.3703, "step": 53480 }, { "epoch": 1048.82, "learning_rate": 6.415036485099825e-06, "loss": 0.3747, "step": 53490 }, { "epoch": 1049.0, "eval_loss": 0.3787485957145691, "eval_runtime": 2.204, "eval_samples_per_second": 1034.047, "eval_steps_per_second": 4.084, "step": 53499 }, { "epoch": 1049.02, "learning_rate": 6.3986470633151845e-06, "loss": 0.3682, "step": 53500 }, { "epoch": 1049.22, "learning_rate": 6.382277671595659e-06, "loss": 0.368, "step": 53510 }, { "epoch": 1049.41, "learning_rate": 6.365928314720725e-06, "loss": 0.3749, "step": 53520 }, { "epoch": 1049.61, "learning_rate": 6.349598997464015e-06, "loss": 0.3713, "step": 53530 }, { "epoch": 1049.8, "learning_rate": 6.333289724593363e-06, "loss": 0.3732, "step": 53540 }, { "epoch": 1050.0, "learning_rate": 6.317000500870687e-06, "loss": 0.3735, "step": 53550 }, { "epoch": 1050.0, "eval_loss": 0.3775447905063629, "eval_runtime": 2.2154, "eval_samples_per_second": 1028.709, "eval_steps_per_second": 4.062, "step": 53550 }, { "epoch": 1050.2, "learning_rate": 6.3007313310520975e-06, "loss": 0.367, "step": 53560 }, { "epoch": 1050.39, "learning_rate": 6.2844822198878046e-06, "loss": 0.3703, "step": 53570 }, { "epoch": 1050.59, "learning_rate": 6.268253172122204e-06, "loss": 0.3666, "step": 53580 }, { "epoch": 1050.78, "learning_rate": 6.252044192493813e-06, "loss": 0.3735, "step": 53590 }, { "epoch": 1050.98, "learning_rate": 6.235855285735289e-06, "loss": 0.3727, "step": 53600 }, { "epoch": 1051.0, "eval_loss": 0.37708601355552673, "eval_runtime": 2.3593, "eval_samples_per_second": 965.956, "eval_steps_per_second": 3.815, "step": 53601 }, { "epoch": 1051.18, "learning_rate": 6.219686456573434e-06, "loss": 0.374, "step": 53610 }, { "epoch": 1051.37, "learning_rate": 6.203537709729178e-06, "loss": 0.3726, "step": 53620 }, { "epoch": 1051.57, "learning_rate": 6.187409049917611e-06, "loss": 0.3717, "step": 53630 }, { "epoch": 1051.76, "learning_rate": 6.171300481847905e-06, "loss": 0.3703, "step": 53640 }, { "epoch": 1051.96, "learning_rate": 6.155212010223457e-06, "loss": 0.3736, "step": 53650 }, { "epoch": 1052.0, "eval_loss": 0.38328659534454346, "eval_runtime": 2.4265, "eval_samples_per_second": 939.22, "eval_steps_per_second": 3.709, "step": 53652 }, { "epoch": 1052.16, "learning_rate": 6.1391436397417084e-06, "loss": 0.3724, "step": 53660 }, { "epoch": 1052.35, "learning_rate": 6.123095375094267e-06, "loss": 0.3723, "step": 53670 }, { "epoch": 1052.55, "learning_rate": 6.107067220966874e-06, "loss": 0.3691, "step": 53680 }, { "epoch": 1052.75, "learning_rate": 6.0910591820393705e-06, "loss": 0.3719, "step": 53690 }, { "epoch": 1052.94, "learning_rate": 6.0750712629858005e-06, "loss": 0.3676, "step": 53700 }, { "epoch": 1053.0, "eval_loss": 0.37962618470191956, "eval_runtime": 2.3919, "eval_samples_per_second": 952.79, "eval_steps_per_second": 3.763, "step": 53703 }, { "epoch": 1053.14, "learning_rate": 6.059103468474222e-06, "loss": 0.372, "step": 53710 }, { "epoch": 1053.33, "learning_rate": 6.043155803166921e-06, "loss": 0.3712, "step": 53720 }, { "epoch": 1053.53, "learning_rate": 6.027228271720233e-06, "loss": 0.3705, "step": 53730 }, { "epoch": 1053.73, "learning_rate": 6.011320878784629e-06, "loss": 0.3735, "step": 53740 }, { "epoch": 1053.92, "learning_rate": 5.99543362900475e-06, "loss": 0.3688, "step": 53750 }, { "epoch": 1054.0, "eval_loss": 0.3757660686969757, "eval_runtime": 2.255, "eval_samples_per_second": 1010.632, "eval_steps_per_second": 3.991, "step": 53754 }, { "epoch": 1054.12, "learning_rate": 5.979566527019289e-06, "loss": 0.3707, "step": 53760 }, { "epoch": 1054.31, "learning_rate": 5.963719577461112e-06, "loss": 0.3738, "step": 53770 }, { "epoch": 1054.51, "learning_rate": 5.947892784957162e-06, "loss": 0.3766, "step": 53780 }, { "epoch": 1054.71, "learning_rate": 5.932086154128474e-06, "loss": 0.3759, "step": 53790 }, { "epoch": 1054.9, "learning_rate": 5.916299689590298e-06, "loss": 0.369, "step": 53800 }, { "epoch": 1055.0, "eval_loss": 0.3774784207344055, "eval_runtime": 2.2073, "eval_samples_per_second": 1032.485, "eval_steps_per_second": 4.077, "step": 53805 }, { "epoch": 1055.1, "learning_rate": 5.900533395951881e-06, "loss": 0.3718, "step": 53810 }, { "epoch": 1055.29, "learning_rate": 5.884787277816649e-06, "loss": 0.3693, "step": 53820 }, { "epoch": 1055.49, "learning_rate": 5.869061339782116e-06, "loss": 0.372, "step": 53830 }, { "epoch": 1055.69, "learning_rate": 5.853355586439901e-06, "loss": 0.374, "step": 53840 }, { "epoch": 1055.88, "learning_rate": 5.837670022375734e-06, "loss": 0.3696, "step": 53850 }, { "epoch": 1056.0, "eval_loss": 0.38110822439193726, "eval_runtime": 2.2544, "eval_samples_per_second": 1010.892, "eval_steps_per_second": 3.992, "step": 53856 }, { "epoch": 1056.08, "learning_rate": 5.822004652169445e-06, "loss": 0.3728, "step": 53860 }, { "epoch": 1056.27, "learning_rate": 5.806359480394992e-06, "loss": 0.3676, "step": 53870 }, { "epoch": 1056.47, "learning_rate": 5.790734511620387e-06, "loss": 0.3764, "step": 53880 }, { "epoch": 1056.67, "learning_rate": 5.775129750407806e-06, "loss": 0.3732, "step": 53890 }, { "epoch": 1056.86, "learning_rate": 5.759545201313445e-06, "loss": 0.3707, "step": 53900 }, { "epoch": 1057.0, "eval_loss": 0.3776305615901947, "eval_runtime": 2.2002, "eval_samples_per_second": 1035.8, "eval_steps_per_second": 4.09, "step": 53907 }, { "epoch": 1057.06, "learning_rate": 5.743980868887699e-06, "loss": 0.3742, "step": 53910 }, { "epoch": 1057.25, "learning_rate": 5.728436757674981e-06, "loss": 0.3709, "step": 53920 }, { "epoch": 1057.45, "learning_rate": 5.712912872213812e-06, "loss": 0.3692, "step": 53930 }, { "epoch": 1057.65, "learning_rate": 5.6974092170368414e-06, "loss": 0.3766, "step": 53940 }, { "epoch": 1057.84, "learning_rate": 5.681925796670756e-06, "loss": 0.3765, "step": 53950 }, { "epoch": 1058.0, "eval_loss": 0.3803638219833374, "eval_runtime": 2.3012, "eval_samples_per_second": 990.343, "eval_steps_per_second": 3.911, "step": 53958 }, { "epoch": 1058.04, "learning_rate": 5.666462615636422e-06, "loss": 0.3715, "step": 53960 }, { "epoch": 1058.24, "learning_rate": 5.6510196784487125e-06, "loss": 0.3725, "step": 53970 }, { "epoch": 1058.43, "learning_rate": 5.635596989616628e-06, "loss": 0.3727, "step": 53980 }, { "epoch": 1058.63, "learning_rate": 5.620194553643243e-06, "loss": 0.372, "step": 53990 }, { "epoch": 1058.82, "learning_rate": 5.604812375025708e-06, "loss": 0.3697, "step": 54000 }, { "epoch": 1059.0, "eval_loss": 0.3813176155090332, "eval_runtime": 2.2044, "eval_samples_per_second": 1033.843, "eval_steps_per_second": 4.083, "step": 54009 }, { "epoch": 1059.02, "learning_rate": 5.589450458255324e-06, "loss": 0.3749, "step": 54010 }, { "epoch": 1059.22, "learning_rate": 5.574108807817384e-06, "loss": 0.373, "step": 54020 }, { "epoch": 1059.41, "learning_rate": 5.558787428191341e-06, "loss": 0.3729, "step": 54030 }, { "epoch": 1059.61, "learning_rate": 5.543486323850666e-06, "loss": 0.3722, "step": 54040 }, { "epoch": 1059.8, "learning_rate": 5.528205499262958e-06, "loss": 0.3719, "step": 54050 }, { "epoch": 1060.0, "learning_rate": 5.512944958889867e-06, "loss": 0.3718, "step": 54060 }, { "epoch": 1060.0, "eval_loss": 0.3722068667411804, "eval_runtime": 2.3645, "eval_samples_per_second": 963.858, "eval_steps_per_second": 3.806, "step": 54060 }, { "epoch": 1060.2, "learning_rate": 5.497704707187137e-06, "loss": 0.3722, "step": 54070 }, { "epoch": 1060.39, "learning_rate": 5.482484748604598e-06, "loss": 0.3718, "step": 54080 }, { "epoch": 1060.59, "learning_rate": 5.467285087586107e-06, "loss": 0.3773, "step": 54090 }, { "epoch": 1060.78, "learning_rate": 5.452105728569644e-06, "loss": 0.3672, "step": 54100 }, { "epoch": 1060.98, "learning_rate": 5.436946675987225e-06, "loss": 0.3699, "step": 54110 }, { "epoch": 1061.0, "eval_loss": 0.37705689668655396, "eval_runtime": 2.2209, "eval_samples_per_second": 1026.164, "eval_steps_per_second": 4.052, "step": 54111 }, { "epoch": 1061.18, "learning_rate": 5.4218079342649906e-06, "loss": 0.3643, "step": 54120 }, { "epoch": 1061.37, "learning_rate": 5.4066895078230894e-06, "loss": 0.3655, "step": 54130 }, { "epoch": 1061.57, "learning_rate": 5.391591401075765e-06, "loss": 0.3724, "step": 54140 }, { "epoch": 1061.76, "learning_rate": 5.376513618431349e-06, "loss": 0.3729, "step": 54150 }, { "epoch": 1061.96, "learning_rate": 5.361456164292171e-06, "loss": 0.3725, "step": 54160 }, { "epoch": 1062.0, "eval_loss": 0.3779694437980652, "eval_runtime": 2.3417, "eval_samples_per_second": 973.217, "eval_steps_per_second": 3.843, "step": 54162 }, { "epoch": 1062.16, "learning_rate": 5.346419043054731e-06, "loss": 0.3711, "step": 54170 }, { "epoch": 1062.35, "learning_rate": 5.331402259109491e-06, "loss": 0.3718, "step": 54180 }, { "epoch": 1062.55, "learning_rate": 5.316405816841035e-06, "loss": 0.375, "step": 54190 }, { "epoch": 1062.75, "learning_rate": 5.3014297206279945e-06, "loss": 0.375, "step": 54200 }, { "epoch": 1062.94, "learning_rate": 5.286473974843022e-06, "loss": 0.3705, "step": 54210 }, { "epoch": 1063.0, "eval_loss": 0.37669458985328674, "eval_runtime": 2.2851, "eval_samples_per_second": 997.336, "eval_steps_per_second": 3.939, "step": 54213 }, { "epoch": 1063.14, "learning_rate": 5.271538583852908e-06, "loss": 0.3732, "step": 54220 }, { "epoch": 1063.33, "learning_rate": 5.256623552018421e-06, "loss": 0.3708, "step": 54230 }, { "epoch": 1063.53, "learning_rate": 5.241728883694446e-06, "loss": 0.3735, "step": 54240 }, { "epoch": 1063.73, "learning_rate": 5.226854583229853e-06, "loss": 0.3736, "step": 54250 }, { "epoch": 1063.92, "learning_rate": 5.2120006549676516e-06, "loss": 0.3698, "step": 54260 }, { "epoch": 1064.0, "eval_loss": 0.3782898783683777, "eval_runtime": 2.2588, "eval_samples_per_second": 1008.939, "eval_steps_per_second": 3.984, "step": 54264 }, { "epoch": 1064.12, "learning_rate": 5.197167103244823e-06, "loss": 0.375, "step": 54270 }, { "epoch": 1064.31, "learning_rate": 5.182353932392435e-06, "loss": 0.3668, "step": 54280 }, { "epoch": 1064.51, "learning_rate": 5.1675611467356385e-06, "loss": 0.3702, "step": 54290 }, { "epoch": 1064.71, "learning_rate": 5.152788750593559e-06, "loss": 0.3717, "step": 54300 }, { "epoch": 1064.9, "learning_rate": 5.138036748279431e-06, "loss": 0.374, "step": 54310 }, { "epoch": 1065.0, "eval_loss": 0.3775031268596649, "eval_runtime": 2.3721, "eval_samples_per_second": 960.751, "eval_steps_per_second": 3.794, "step": 54315 }, { "epoch": 1065.1, "learning_rate": 5.123305144100467e-06, "loss": 0.3726, "step": 54320 }, { "epoch": 1065.29, "learning_rate": 5.108593942358036e-06, "loss": 0.3703, "step": 54330 }, { "epoch": 1065.49, "learning_rate": 5.0939031473474336e-06, "loss": 0.3685, "step": 54340 }, { "epoch": 1065.69, "learning_rate": 5.079232763358046e-06, "loss": 0.3701, "step": 54350 }, { "epoch": 1065.88, "learning_rate": 5.0645827946733215e-06, "loss": 0.3665, "step": 54360 }, { "epoch": 1066.0, "eval_loss": 0.3812878131866455, "eval_runtime": 2.3196, "eval_samples_per_second": 982.514, "eval_steps_per_second": 3.88, "step": 54366 }, { "epoch": 1066.08, "learning_rate": 5.04995324557069e-06, "loss": 0.3756, "step": 54370 }, { "epoch": 1066.27, "learning_rate": 5.035344120321691e-06, "loss": 0.3716, "step": 54380 }, { "epoch": 1066.47, "learning_rate": 5.020755423191839e-06, "loss": 0.3706, "step": 54390 }, { "epoch": 1066.67, "learning_rate": 5.006187158440716e-06, "loss": 0.371, "step": 54400 }, { "epoch": 1066.86, "learning_rate": 4.991639330321939e-06, "loss": 0.3695, "step": 54410 }, { "epoch": 1067.0, "eval_loss": 0.38005512952804565, "eval_runtime": 2.33, "eval_samples_per_second": 978.108, "eval_steps_per_second": 3.863, "step": 54417 }, { "epoch": 1067.06, "learning_rate": 4.977111943083118e-06, "loss": 0.3717, "step": 54420 }, { "epoch": 1067.25, "learning_rate": 4.962605000965958e-06, "loss": 0.3732, "step": 54430 }, { "epoch": 1067.45, "learning_rate": 4.948118508206156e-06, "loss": 0.3696, "step": 54440 }, { "epoch": 1067.65, "learning_rate": 4.933652469033444e-06, "loss": 0.3717, "step": 54450 }, { "epoch": 1067.84, "learning_rate": 4.9192068876715704e-06, "loss": 0.3705, "step": 54460 }, { "epoch": 1068.0, "eval_loss": 0.38045910000801086, "eval_runtime": 2.2201, "eval_samples_per_second": 1026.519, "eval_steps_per_second": 4.054, "step": 54468 }, { "epoch": 1068.04, "learning_rate": 4.904781768338342e-06, "loss": 0.3715, "step": 54470 }, { "epoch": 1068.24, "learning_rate": 4.8903771152455505e-06, "loss": 0.3755, "step": 54480 }, { "epoch": 1068.43, "learning_rate": 4.875992932599046e-06, "loss": 0.3715, "step": 54490 }, { "epoch": 1068.63, "learning_rate": 4.861629224598695e-06, "loss": 0.3718, "step": 54500 }, { "epoch": 1068.82, "learning_rate": 4.847285995438369e-06, "loss": 0.3709, "step": 54510 }, { "epoch": 1069.0, "eval_loss": 0.3779762387275696, "eval_runtime": 2.3441, "eval_samples_per_second": 972.235, "eval_steps_per_second": 3.839, "step": 54519 }, { "epoch": 1069.02, "learning_rate": 4.832963249305982e-06, "loss": 0.3705, "step": 54520 }, { "epoch": 1069.22, "learning_rate": 4.818660990383441e-06, "loss": 0.3656, "step": 54530 }, { "epoch": 1069.41, "learning_rate": 4.804379222846696e-06, "loss": 0.3681, "step": 54540 }, { "epoch": 1069.61, "learning_rate": 4.790117950865713e-06, "loss": 0.3762, "step": 54550 }, { "epoch": 1069.8, "learning_rate": 4.775877178604442e-06, "loss": 0.3734, "step": 54560 }, { "epoch": 1070.0, "learning_rate": 4.761656910220901e-06, "loss": 0.3762, "step": 54570 }, { "epoch": 1070.0, "eval_loss": 0.37581372261047363, "eval_runtime": 2.3052, "eval_samples_per_second": 988.632, "eval_steps_per_second": 3.904, "step": 54570 }, { "epoch": 1070.2, "learning_rate": 4.747457149867051e-06, "loss": 0.3741, "step": 54580 }, { "epoch": 1070.39, "learning_rate": 4.733277901688951e-06, "loss": 0.3705, "step": 54590 }, { "epoch": 1070.59, "learning_rate": 4.719119169826605e-06, "loss": 0.3679, "step": 54600 }, { "epoch": 1070.78, "learning_rate": 4.704980958414031e-06, "loss": 0.3715, "step": 54610 }, { "epoch": 1070.98, "learning_rate": 4.690863271579304e-06, "loss": 0.3718, "step": 54620 }, { "epoch": 1071.0, "eval_loss": 0.38009241223335266, "eval_runtime": 2.2507, "eval_samples_per_second": 1012.56, "eval_steps_per_second": 3.999, "step": 54621 }, { "epoch": 1071.18, "learning_rate": 4.676766113444425e-06, "loss": 0.3686, "step": 54630 }, { "epoch": 1071.37, "learning_rate": 4.662689488125509e-06, "loss": 0.3716, "step": 54640 }, { "epoch": 1071.57, "learning_rate": 4.648633399732571e-06, "loss": 0.37, "step": 54650 }, { "epoch": 1071.76, "learning_rate": 4.6345978523697094e-06, "loss": 0.3754, "step": 54660 }, { "epoch": 1071.96, "learning_rate": 4.620582850134971e-06, "loss": 0.3736, "step": 54670 }, { "epoch": 1072.0, "eval_loss": 0.3768666684627533, "eval_runtime": 2.3236, "eval_samples_per_second": 980.799, "eval_steps_per_second": 3.873, "step": 54672 }, { "epoch": 1072.16, "learning_rate": 4.606588397120417e-06, "loss": 0.3676, "step": 54680 }, { "epoch": 1072.35, "learning_rate": 4.592614497412128e-06, "loss": 0.3706, "step": 54690 }, { "epoch": 1072.55, "learning_rate": 4.5786611550901655e-06, "loss": 0.373, "step": 54700 }, { "epoch": 1072.75, "learning_rate": 4.564728374228613e-06, "loss": 0.3732, "step": 54710 }, { "epoch": 1072.94, "learning_rate": 4.5508161588954986e-06, "loss": 0.3702, "step": 54720 }, { "epoch": 1073.0, "eval_loss": 0.37629246711730957, "eval_runtime": 2.2337, "eval_samples_per_second": 1020.259, "eval_steps_per_second": 4.029, "step": 54723 }, { "epoch": 1073.14, "learning_rate": 4.536924513152915e-06, "loss": 0.371, "step": 54730 }, { "epoch": 1073.33, "learning_rate": 4.523053441056876e-06, "loss": 0.3752, "step": 54740 }, { "epoch": 1073.53, "learning_rate": 4.509202946657442e-06, "loss": 0.3692, "step": 54750 }, { "epoch": 1073.73, "learning_rate": 4.49537303399867e-06, "loss": 0.3694, "step": 54760 }, { "epoch": 1073.92, "learning_rate": 4.481563707118554e-06, "loss": 0.3716, "step": 54770 }, { "epoch": 1074.0, "eval_loss": 0.3790897727012634, "eval_runtime": 2.3133, "eval_samples_per_second": 985.167, "eval_steps_per_second": 3.891, "step": 54774 }, { "epoch": 1074.12, "learning_rate": 4.467774970049129e-06, "loss": 0.3717, "step": 54780 }, { "epoch": 1074.31, "learning_rate": 4.454006826816373e-06, "loss": 0.3748, "step": 54790 }, { "epoch": 1074.51, "learning_rate": 4.440259281440311e-06, "loss": 0.3725, "step": 54800 }, { "epoch": 1074.71, "learning_rate": 4.42653233793491e-06, "loss": 0.3704, "step": 54810 }, { "epoch": 1074.9, "learning_rate": 4.412826000308111e-06, "loss": 0.3684, "step": 54820 }, { "epoch": 1075.0, "eval_loss": 0.37449750304222107, "eval_runtime": 2.2223, "eval_samples_per_second": 1025.497, "eval_steps_per_second": 4.05, "step": 54825 }, { "epoch": 1075.1, "learning_rate": 4.399140272561882e-06, "loss": 0.3727, "step": 54830 }, { "epoch": 1075.29, "learning_rate": 4.3854751586921255e-06, "loss": 0.37, "step": 54840 }, { "epoch": 1075.49, "learning_rate": 4.3718306626887825e-06, "loss": 0.367, "step": 54850 }, { "epoch": 1075.69, "learning_rate": 4.3582067885357175e-06, "loss": 0.3697, "step": 54860 }, { "epoch": 1075.88, "learning_rate": 4.344603540210814e-06, "loss": 0.3682, "step": 54870 }, { "epoch": 1076.0, "eval_loss": 0.3796183168888092, "eval_runtime": 2.2676, "eval_samples_per_second": 1005.038, "eval_steps_per_second": 3.969, "step": 54876 }, { "epoch": 1076.08, "learning_rate": 4.3310209216859126e-06, "loss": 0.3734, "step": 54880 }, { "epoch": 1076.27, "learning_rate": 4.317458936926816e-06, "loss": 0.3704, "step": 54890 }, { "epoch": 1076.47, "learning_rate": 4.303917589893338e-06, "loss": 0.3739, "step": 54900 }, { "epoch": 1076.67, "learning_rate": 4.290396884539243e-06, "loss": 0.3692, "step": 54910 }, { "epoch": 1076.86, "learning_rate": 4.276896824812298e-06, "loss": 0.3699, "step": 54920 }, { "epoch": 1077.0, "eval_loss": 0.37840622663497925, "eval_runtime": 2.2821, "eval_samples_per_second": 998.656, "eval_steps_per_second": 3.944, "step": 54927 }, { "epoch": 1077.06, "learning_rate": 4.263417414654191e-06, "loss": 0.369, "step": 54930 }, { "epoch": 1077.25, "learning_rate": 4.2499586580006324e-06, "loss": 0.3675, "step": 54940 }, { "epoch": 1077.45, "learning_rate": 4.236520558781245e-06, "loss": 0.3737, "step": 54950 }, { "epoch": 1077.65, "learning_rate": 4.223103120919683e-06, "loss": 0.3716, "step": 54960 }, { "epoch": 1077.84, "learning_rate": 4.209706348333544e-06, "loss": 0.3745, "step": 54970 }, { "epoch": 1078.0, "eval_loss": 0.3793691396713257, "eval_runtime": 2.2701, "eval_samples_per_second": 1003.93, "eval_steps_per_second": 3.965, "step": 54978 }, { "epoch": 1078.04, "learning_rate": 4.1963302449343595e-06, "loss": 0.3693, "step": 54980 }, { "epoch": 1078.24, "learning_rate": 4.182974814627688e-06, "loss": 0.3722, "step": 54990 }, { "epoch": 1078.43, "learning_rate": 4.169640061312968e-06, "loss": 0.3692, "step": 55000 }, { "epoch": 1078.63, "learning_rate": 4.156325988883702e-06, "loss": 0.371, "step": 55010 }, { "epoch": 1078.82, "learning_rate": 4.143032601227281e-06, "loss": 0.3721, "step": 55020 }, { "epoch": 1079.0, "eval_loss": 0.37800872325897217, "eval_runtime": 2.2342, "eval_samples_per_second": 1020.035, "eval_steps_per_second": 4.028, "step": 55029 }, { "epoch": 1079.02, "learning_rate": 4.129759902225066e-06, "loss": 0.3687, "step": 55030 }, { "epoch": 1079.22, "learning_rate": 4.116507895752408e-06, "loss": 0.3647, "step": 55040 }, { "epoch": 1079.41, "learning_rate": 4.103276585678578e-06, "loss": 0.3695, "step": 55050 }, { "epoch": 1079.61, "learning_rate": 4.090065975866843e-06, "loss": 0.371, "step": 55060 }, { "epoch": 1079.8, "learning_rate": 4.076876070174395e-06, "loss": 0.3698, "step": 55070 }, { "epoch": 1080.0, "learning_rate": 4.063706872452402e-06, "loss": 0.3758, "step": 55080 }, { "epoch": 1080.0, "eval_loss": 0.3792489469051361, "eval_runtime": 2.2691, "eval_samples_per_second": 1004.359, "eval_steps_per_second": 3.966, "step": 55080 }, { "epoch": 1080.2, "learning_rate": 4.0505583865459714e-06, "loss": 0.3715, "step": 55090 }, { "epoch": 1080.39, "learning_rate": 4.037430616294157e-06, "loss": 0.3664, "step": 55100 }, { "epoch": 1080.59, "learning_rate": 4.024323565529977e-06, "loss": 0.3658, "step": 55110 }, { "epoch": 1080.78, "learning_rate": 4.011237238080412e-06, "loss": 0.371, "step": 55120 }, { "epoch": 1080.98, "learning_rate": 3.998171637766379e-06, "loss": 0.3742, "step": 55130 }, { "epoch": 1081.0, "eval_loss": 0.37813621759414673, "eval_runtime": 2.3613, "eval_samples_per_second": 965.161, "eval_steps_per_second": 3.812, "step": 55131 }, { "epoch": 1081.18, "learning_rate": 3.985126768402719e-06, "loss": 0.3805, "step": 55140 }, { "epoch": 1081.37, "learning_rate": 3.972102633798277e-06, "loss": 0.3703, "step": 55150 }, { "epoch": 1081.57, "learning_rate": 3.95909923775577e-06, "loss": 0.3729, "step": 55160 }, { "epoch": 1081.76, "learning_rate": 3.946116584071926e-06, "loss": 0.3742, "step": 55170 }, { "epoch": 1081.96, "learning_rate": 3.933154676537389e-06, "loss": 0.3693, "step": 55180 }, { "epoch": 1082.0, "eval_loss": 0.38186749815940857, "eval_runtime": 2.2165, "eval_samples_per_second": 1028.2, "eval_steps_per_second": 4.06, "step": 55182 }, { "epoch": 1082.16, "learning_rate": 3.920213518936732e-06, "loss": 0.3734, "step": 55190 }, { "epoch": 1082.35, "learning_rate": 3.907293115048507e-06, "loss": 0.3682, "step": 55200 }, { "epoch": 1082.55, "learning_rate": 3.894393468645163e-06, "loss": 0.3734, "step": 55210 }, { "epoch": 1082.75, "learning_rate": 3.881514583493111e-06, "loss": 0.3684, "step": 55220 }, { "epoch": 1082.94, "learning_rate": 3.868656463352721e-06, "loss": 0.3676, "step": 55230 }, { "epoch": 1083.0, "eval_loss": 0.37459880113601685, "eval_runtime": 2.3079, "eval_samples_per_second": 987.465, "eval_steps_per_second": 3.9, "step": 55233 }, { "epoch": 1083.14, "learning_rate": 3.8558191119782536e-06, "loss": 0.3701, "step": 55240 }, { "epoch": 1083.33, "learning_rate": 3.843002533117937e-06, "loss": 0.3731, "step": 55250 }, { "epoch": 1083.53, "learning_rate": 3.83020673051391e-06, "loss": 0.3737, "step": 55260 }, { "epoch": 1083.73, "learning_rate": 3.817431707902293e-06, "loss": 0.3684, "step": 55270 }, { "epoch": 1083.92, "learning_rate": 3.8046774690131037e-06, "loss": 0.3684, "step": 55280 }, { "epoch": 1084.0, "eval_loss": 0.3811744451522827, "eval_runtime": 2.351, "eval_samples_per_second": 969.387, "eval_steps_per_second": 3.828, "step": 55284 }, { "epoch": 1084.12, "learning_rate": 3.7919440175702615e-06, "loss": 0.3702, "step": 55290 }, { "epoch": 1084.31, "learning_rate": 3.779231357291684e-06, "loss": 0.371, "step": 55300 }, { "epoch": 1084.51, "learning_rate": 3.76653949188917e-06, "loss": 0.3724, "step": 55310 }, { "epoch": 1084.71, "learning_rate": 3.7538684250684626e-06, "loss": 0.3679, "step": 55320 }, { "epoch": 1084.9, "learning_rate": 3.7412181605292275e-06, "loss": 0.3727, "step": 55330 }, { "epoch": 1085.0, "eval_loss": 0.3744555711746216, "eval_runtime": 2.2572, "eval_samples_per_second": 1009.673, "eval_steps_per_second": 3.987, "step": 55335 }, { "epoch": 1085.1, "learning_rate": 3.728588701965077e-06, "loss": 0.3642, "step": 55340 }, { "epoch": 1085.29, "learning_rate": 3.715980053063519e-06, "loss": 0.3697, "step": 55350 }, { "epoch": 1085.49, "learning_rate": 3.703392217505985e-06, "loss": 0.3695, "step": 55360 }, { "epoch": 1085.69, "learning_rate": 3.6908251989678504e-06, "loss": 0.3724, "step": 55370 }, { "epoch": 1085.88, "learning_rate": 3.6782790011184228e-06, "loss": 0.3689, "step": 55380 }, { "epoch": 1086.0, "eval_loss": 0.3743017315864563, "eval_runtime": 2.3114, "eval_samples_per_second": 985.971, "eval_steps_per_second": 3.894, "step": 55386 }, { "epoch": 1086.08, "learning_rate": 3.665753627620896e-06, "loss": 0.3701, "step": 55390 }, { "epoch": 1086.27, "learning_rate": 3.653249082132395e-06, "loss": 0.3729, "step": 55400 }, { "epoch": 1086.47, "learning_rate": 3.6407653683039913e-06, "loss": 0.3704, "step": 55410 }, { "epoch": 1086.67, "learning_rate": 3.6283024897806185e-06, "loss": 0.375, "step": 55420 }, { "epoch": 1086.86, "learning_rate": 3.6158604502011744e-06, "loss": 0.3704, "step": 55430 }, { "epoch": 1087.0, "eval_loss": 0.37848153710365295, "eval_runtime": 2.2484, "eval_samples_per_second": 1013.59, "eval_steps_per_second": 4.003, "step": 55437 }, { "epoch": 1087.06, "learning_rate": 3.60343925319847e-06, "loss": 0.3661, "step": 55440 }, { "epoch": 1087.25, "learning_rate": 3.591038902399196e-06, "loss": 0.3673, "step": 55450 }, { "epoch": 1087.45, "learning_rate": 3.5786594014239973e-06, "loss": 0.3714, "step": 55460 }, { "epoch": 1087.65, "learning_rate": 3.5663007538873828e-06, "loss": 0.3684, "step": 55470 }, { "epoch": 1087.84, "learning_rate": 3.553962963397841e-06, "loss": 0.3664, "step": 55480 }, { "epoch": 1088.0, "eval_loss": 0.3773548901081085, "eval_runtime": 2.3896, "eval_samples_per_second": 953.703, "eval_steps_per_second": 3.766, "step": 55488 }, { "epoch": 1088.04, "learning_rate": 3.541646033557716e-06, "loss": 0.3713, "step": 55490 }, { "epoch": 1088.24, "learning_rate": 3.529349967963263e-06, "loss": 0.3721, "step": 55500 }, { "epoch": 1088.43, "learning_rate": 3.5170747702046782e-06, "loss": 0.3678, "step": 55510 }, { "epoch": 1088.63, "learning_rate": 3.5048204438660273e-06, "loss": 0.3711, "step": 55520 }, { "epoch": 1088.82, "learning_rate": 3.492586992525306e-06, "loss": 0.3704, "step": 55530 }, { "epoch": 1089.0, "eval_loss": 0.3757428526878357, "eval_runtime": 2.3436, "eval_samples_per_second": 972.42, "eval_steps_per_second": 3.84, "step": 55539 }, { "epoch": 1089.02, "learning_rate": 3.480374419754417e-06, "loss": 0.3732, "step": 55540 }, { "epoch": 1089.22, "learning_rate": 3.468182729119157e-06, "loss": 0.3694, "step": 55550 }, { "epoch": 1089.41, "learning_rate": 3.456011924179236e-06, "loss": 0.3751, "step": 55560 }, { "epoch": 1089.61, "learning_rate": 3.4438620084882294e-06, "loss": 0.3696, "step": 55570 }, { "epoch": 1089.8, "learning_rate": 3.431732985593666e-06, "loss": 0.3698, "step": 55580 }, { "epoch": 1090.0, "learning_rate": 3.4196248590369373e-06, "loss": 0.3702, "step": 55590 }, { "epoch": 1090.0, "eval_loss": 0.3789558708667755, "eval_runtime": 2.3648, "eval_samples_per_second": 963.724, "eval_steps_per_second": 3.806, "step": 55590 }, { "epoch": 1090.2, "learning_rate": 3.407537632353366e-06, "loss": 0.3698, "step": 55600 }, { "epoch": 1090.39, "learning_rate": 3.395471309072137e-06, "loss": 0.3665, "step": 55610 }, { "epoch": 1090.59, "learning_rate": 3.383425892716349e-06, "loss": 0.3658, "step": 55620 }, { "epoch": 1090.78, "learning_rate": 3.3714013868029883e-06, "loss": 0.3726, "step": 55630 }, { "epoch": 1090.98, "learning_rate": 3.3593977948429467e-06, "loss": 0.3747, "step": 55640 }, { "epoch": 1091.0, "eval_loss": 0.37976905703544617, "eval_runtime": 2.348, "eval_samples_per_second": 970.598, "eval_steps_per_second": 3.833, "step": 55641 }, { "epoch": 1091.18, "learning_rate": 3.347415120341029e-06, "loss": 0.3697, "step": 55650 }, { "epoch": 1091.37, "learning_rate": 3.3354533667958706e-06, "loss": 0.3685, "step": 55660 }, { "epoch": 1091.57, "learning_rate": 3.3235125377000597e-06, "loss": 0.3715, "step": 55670 }, { "epoch": 1091.76, "learning_rate": 3.3115926365400336e-06, "loss": 0.3706, "step": 55680 }, { "epoch": 1091.96, "learning_rate": 3.299693666796174e-06, "loss": 0.3704, "step": 55690 }, { "epoch": 1092.0, "eval_loss": 0.37564924359321594, "eval_runtime": 2.3319, "eval_samples_per_second": 977.321, "eval_steps_per_second": 3.86, "step": 55692 }, { "epoch": 1092.16, "learning_rate": 3.2878156319426864e-06, "loss": 0.3706, "step": 55700 }, { "epoch": 1092.35, "learning_rate": 3.275958535447687e-06, "loss": 0.3666, "step": 55710 }, { "epoch": 1092.55, "learning_rate": 3.264122380773207e-06, "loss": 0.3663, "step": 55720 }, { "epoch": 1092.75, "learning_rate": 3.2523071713751154e-06, "loss": 0.3703, "step": 55730 }, { "epoch": 1092.94, "learning_rate": 3.2405129107032023e-06, "loss": 0.3749, "step": 55740 }, { "epoch": 1093.0, "eval_loss": 0.3782815933227539, "eval_runtime": 2.2261, "eval_samples_per_second": 1023.743, "eval_steps_per_second": 4.043, "step": 55743 }, { "epoch": 1093.14, "learning_rate": 3.228739602201122e-06, "loss": 0.3711, "step": 55750 }, { "epoch": 1093.33, "learning_rate": 3.216987249306441e-06, "loss": 0.3737, "step": 55760 }, { "epoch": 1093.53, "learning_rate": 3.205255855450564e-06, "loss": 0.3723, "step": 55770 }, { "epoch": 1093.73, "learning_rate": 3.1935454240587854e-06, "loss": 0.3728, "step": 55780 }, { "epoch": 1093.92, "learning_rate": 3.181855958550311e-06, "loss": 0.3686, "step": 55790 }, { "epoch": 1094.0, "eval_loss": 0.37587156891822815, "eval_runtime": 2.2492, "eval_samples_per_second": 1013.239, "eval_steps_per_second": 4.001, "step": 55794 }, { "epoch": 1094.12, "learning_rate": 3.170187462338186e-06, "loss": 0.3728, "step": 55800 }, { "epoch": 1094.31, "learning_rate": 3.158539938829377e-06, "loss": 0.3706, "step": 55810 }, { "epoch": 1094.51, "learning_rate": 3.1469133914246797e-06, "loss": 0.3695, "step": 55820 }, { "epoch": 1094.71, "learning_rate": 3.135307823518796e-06, "loss": 0.3723, "step": 55830 }, { "epoch": 1094.9, "learning_rate": 3.123723238500289e-06, "loss": 0.369, "step": 55840 }, { "epoch": 1095.0, "eval_loss": 0.3761863708496094, "eval_runtime": 2.3314, "eval_samples_per_second": 977.508, "eval_steps_per_second": 3.86, "step": 55845 }, { "epoch": 1095.1, "learning_rate": 3.112159639751588e-06, "loss": 0.3698, "step": 55850 }, { "epoch": 1095.29, "learning_rate": 3.100617030649033e-06, "loss": 0.3705, "step": 55860 }, { "epoch": 1095.49, "learning_rate": 3.0890954145627868e-06, "loss": 0.3729, "step": 55870 }, { "epoch": 1095.69, "learning_rate": 3.0775947948569165e-06, "loss": 0.366, "step": 55880 }, { "epoch": 1095.88, "learning_rate": 3.066115174889336e-06, "loss": 0.3671, "step": 55890 }, { "epoch": 1096.0, "eval_loss": 0.3782743811607361, "eval_runtime": 2.3706, "eval_samples_per_second": 961.358, "eval_steps_per_second": 3.796, "step": 55896 }, { "epoch": 1096.08, "learning_rate": 3.0546565580118393e-06, "loss": 0.3672, "step": 55900 }, { "epoch": 1096.27, "learning_rate": 3.0432189475701003e-06, "loss": 0.3673, "step": 55910 }, { "epoch": 1096.47, "learning_rate": 3.0318023469036225e-06, "loss": 0.3749, "step": 55920 }, { "epoch": 1096.67, "learning_rate": 3.020406759345831e-06, "loss": 0.3692, "step": 55930 }, { "epoch": 1096.86, "learning_rate": 3.0090321882239477e-06, "loss": 0.3686, "step": 55940 }, { "epoch": 1097.0, "eval_loss": 0.3780481815338135, "eval_runtime": 2.4515, "eval_samples_per_second": 929.638, "eval_steps_per_second": 3.671, "step": 55947 }, { "epoch": 1097.06, "learning_rate": 2.997678636859116e-06, "loss": 0.3673, "step": 55950 }, { "epoch": 1097.25, "learning_rate": 2.986346108566326e-06, "loss": 0.3729, "step": 55960 }, { "epoch": 1097.45, "learning_rate": 2.975034606654397e-06, "loss": 0.3734, "step": 55970 }, { "epoch": 1097.65, "learning_rate": 2.963744134426063e-06, "loss": 0.3739, "step": 55980 }, { "epoch": 1097.84, "learning_rate": 2.95247469517787e-06, "loss": 0.3693, "step": 55990 }, { "epoch": 1098.0, "eval_loss": 0.3777942657470703, "eval_runtime": 2.3668, "eval_samples_per_second": 962.905, "eval_steps_per_second": 3.803, "step": 55998 }, { "epoch": 1098.04, "learning_rate": 2.941226292200244e-06, "loss": 0.3665, "step": 56000 }, { "epoch": 1098.24, "learning_rate": 2.929998928777483e-06, "loss": 0.374, "step": 56010 }, { "epoch": 1098.43, "learning_rate": 2.9187926081877146e-06, "loss": 0.3745, "step": 56020 }, { "epoch": 1098.63, "learning_rate": 2.9076073337029464e-06, "loss": 0.3685, "step": 56030 }, { "epoch": 1098.82, "learning_rate": 2.896443108589008e-06, "loss": 0.3728, "step": 56040 }, { "epoch": 1099.0, "eval_loss": 0.375933438539505, "eval_runtime": 2.4335, "eval_samples_per_second": 936.505, "eval_steps_per_second": 3.698, "step": 56049 }, { "epoch": 1099.02, "learning_rate": 2.8852999361056173e-06, "loss": 0.3733, "step": 56050 }, { "epoch": 1099.22, "learning_rate": 2.8741778195063377e-06, "loss": 0.3732, "step": 56060 }, { "epoch": 1099.41, "learning_rate": 2.8630767620385713e-06, "loss": 0.3675, "step": 56070 }, { "epoch": 1099.61, "learning_rate": 2.851996766943576e-06, "loss": 0.3739, "step": 56080 }, { "epoch": 1099.8, "learning_rate": 2.8409378374564806e-06, "loss": 0.3705, "step": 56090 }, { "epoch": 1100.0, "learning_rate": 2.829899976806219e-06, "loss": 0.3715, "step": 56100 }, { "epoch": 1100.0, "eval_loss": 0.3777158260345459, "eval_runtime": 2.2906, "eval_samples_per_second": 994.948, "eval_steps_per_second": 3.929, "step": 56100 }, { "epoch": 1100.2, "learning_rate": 2.8188831882156205e-06, "loss": 0.3721, "step": 56110 }, { "epoch": 1100.39, "learning_rate": 2.8078874749013463e-06, "loss": 0.367, "step": 56120 }, { "epoch": 1100.59, "learning_rate": 2.79691284007387e-06, "loss": 0.3694, "step": 56130 }, { "epoch": 1100.78, "learning_rate": 2.785959286937578e-06, "loss": 0.3682, "step": 56140 }, { "epoch": 1100.98, "learning_rate": 2.775026818690629e-06, "loss": 0.3712, "step": 56150 }, { "epoch": 1101.0, "eval_loss": 0.37754717469215393, "eval_runtime": 2.2472, "eval_samples_per_second": 1014.149, "eval_steps_per_second": 4.005, "step": 56151 }, { "epoch": 1101.18, "learning_rate": 2.7641154385250772e-06, "loss": 0.3703, "step": 56160 }, { "epoch": 1101.37, "learning_rate": 2.753225149626809e-06, "loss": 0.3712, "step": 56170 }, { "epoch": 1101.57, "learning_rate": 2.7423559551755376e-06, "loss": 0.3735, "step": 56180 }, { "epoch": 1101.76, "learning_rate": 2.7315078583448254e-06, "loss": 0.368, "step": 56190 }, { "epoch": 1101.96, "learning_rate": 2.7206808623020633e-06, "loss": 0.3695, "step": 56200 }, { "epoch": 1102.0, "eval_loss": 0.3767015337944031, "eval_runtime": 2.277, "eval_samples_per_second": 1000.886, "eval_steps_per_second": 3.953, "step": 56202 }, { "epoch": 1102.16, "learning_rate": 2.7098749702085142e-06, "loss": 0.3718, "step": 56210 }, { "epoch": 1102.35, "learning_rate": 2.699090185219238e-06, "loss": 0.3709, "step": 56220 }, { "epoch": 1102.55, "learning_rate": 2.6883265104831743e-06, "loss": 0.3696, "step": 56230 }, { "epoch": 1102.75, "learning_rate": 2.677583949143067e-06, "loss": 0.3741, "step": 56240 }, { "epoch": 1102.94, "learning_rate": 2.666862504335482e-06, "loss": 0.3715, "step": 56250 }, { "epoch": 1103.0, "eval_loss": 0.37621647119522095, "eval_runtime": 2.3576, "eval_samples_per_second": 966.651, "eval_steps_per_second": 3.817, "step": 56253 }, { "epoch": 1103.14, "learning_rate": 2.6561621791908655e-06, "loss": 0.3712, "step": 56260 }, { "epoch": 1103.33, "learning_rate": 2.6454829768334686e-06, "loss": 0.3662, "step": 56270 }, { "epoch": 1103.53, "learning_rate": 2.6348249003813883e-06, "loss": 0.3704, "step": 56280 }, { "epoch": 1103.73, "learning_rate": 2.6241879529465273e-06, "loss": 0.3718, "step": 56290 }, { "epoch": 1103.92, "learning_rate": 2.6135721376346592e-06, "loss": 0.3728, "step": 56300 }, { "epoch": 1104.0, "eval_loss": 0.3774849474430084, "eval_runtime": 2.2419, "eval_samples_per_second": 1016.552, "eval_steps_per_second": 4.014, "step": 56304 }, { "epoch": 1104.12, "learning_rate": 2.602977457545338e-06, "loss": 0.3664, "step": 56310 }, { "epoch": 1104.31, "learning_rate": 2.59240391577199e-06, "loss": 0.3665, "step": 56320 }, { "epoch": 1104.51, "learning_rate": 2.58185151540187e-06, "loss": 0.3712, "step": 56330 }, { "epoch": 1104.71, "learning_rate": 2.571320259516005e-06, "loss": 0.3702, "step": 56340 }, { "epoch": 1104.9, "learning_rate": 2.560810151189327e-06, "loss": 0.368, "step": 56350 }, { "epoch": 1105.0, "eval_loss": 0.37828439474105835, "eval_runtime": 2.2755, "eval_samples_per_second": 1001.518, "eval_steps_per_second": 3.955, "step": 56355 }, { "epoch": 1105.1, "learning_rate": 2.550321193490523e-06, "loss": 0.3763, "step": 56360 }, { "epoch": 1105.29, "learning_rate": 2.5398533894821437e-06, "loss": 0.3696, "step": 56370 }, { "epoch": 1105.49, "learning_rate": 2.5294067422205606e-06, "loss": 0.366, "step": 56380 }, { "epoch": 1105.69, "learning_rate": 2.5189812547559586e-06, "loss": 0.3672, "step": 56390 }, { "epoch": 1105.88, "learning_rate": 2.508576930132344e-06, "loss": 0.3705, "step": 56400 }, { "epoch": 1106.0, "eval_loss": 0.37971001863479614, "eval_runtime": 2.2636, "eval_samples_per_second": 1006.793, "eval_steps_per_second": 3.976, "step": 56406 }, { "epoch": 1106.08, "learning_rate": 2.498193771387547e-06, "loss": 0.3741, "step": 56410 }, { "epoch": 1106.27, "learning_rate": 2.487831781553223e-06, "loss": 0.3693, "step": 56420 }, { "epoch": 1106.47, "learning_rate": 2.477490963654846e-06, "loss": 0.3726, "step": 56430 }, { "epoch": 1106.67, "learning_rate": 2.4671713207117e-06, "loss": 0.3714, "step": 56440 }, { "epoch": 1106.86, "learning_rate": 2.456872855736891e-06, "loss": 0.3705, "step": 56450 }, { "epoch": 1107.0, "eval_loss": 0.37706848978996277, "eval_runtime": 2.2675, "eval_samples_per_second": 1005.061, "eval_steps_per_second": 3.969, "step": 56457 }, { "epoch": 1107.06, "learning_rate": 2.446595571737331e-06, "loss": 0.3651, "step": 56460 }, { "epoch": 1107.25, "learning_rate": 2.4363394717137608e-06, "loss": 0.3696, "step": 56470 }, { "epoch": 1107.45, "learning_rate": 2.4261045586607435e-06, "loss": 0.3731, "step": 56480 }, { "epoch": 1107.65, "learning_rate": 2.415890835566647e-06, "loss": 0.3712, "step": 56490 }, { "epoch": 1107.84, "learning_rate": 2.4056983054136365e-06, "loss": 0.3734, "step": 56500 }, { "epoch": 1108.0, "eval_loss": 0.375370591878891, "eval_runtime": 2.2889, "eval_samples_per_second": 995.683, "eval_steps_per_second": 3.932, "step": 56508 }, { "epoch": 1108.04, "learning_rate": 2.3955269711777218e-06, "loss": 0.3741, "step": 56510 }, { "epoch": 1108.24, "learning_rate": 2.3853768358286786e-06, "loss": 0.3698, "step": 56520 }, { "epoch": 1108.43, "learning_rate": 2.3752479023301434e-06, "loss": 0.3717, "step": 56530 }, { "epoch": 1108.63, "learning_rate": 2.365140173639535e-06, "loss": 0.3698, "step": 56540 }, { "epoch": 1108.82, "learning_rate": 2.3550536527080748e-06, "loss": 0.3701, "step": 56550 }, { "epoch": 1109.0, "eval_loss": 0.37934526801109314, "eval_runtime": 2.3521, "eval_samples_per_second": 968.928, "eval_steps_per_second": 3.826, "step": 56559 }, { "epoch": 1109.02, "learning_rate": 2.344988342480825e-06, "loss": 0.3704, "step": 56560 }, { "epoch": 1109.22, "learning_rate": 2.3349442458965917e-06, "loss": 0.3732, "step": 56570 }, { "epoch": 1109.41, "learning_rate": 2.3249213658880635e-06, "loss": 0.3692, "step": 56580 }, { "epoch": 1109.61, "learning_rate": 2.3149197053816822e-06, "loss": 0.3661, "step": 56590 }, { "epoch": 1109.8, "learning_rate": 2.3049392672977117e-06, "loss": 0.3711, "step": 56600 }, { "epoch": 1110.0, "learning_rate": 2.294980054550222e-06, "loss": 0.3707, "step": 56610 }, { "epoch": 1110.0, "eval_loss": 0.3728983700275421, "eval_runtime": 2.3629, "eval_samples_per_second": 964.494, "eval_steps_per_second": 3.809, "step": 56610 }, { "epoch": 1110.2, "learning_rate": 2.2850420700470773e-06, "loss": 0.3716, "step": 56620 }, { "epoch": 1110.39, "learning_rate": 2.275125316689941e-06, "loss": 0.3699, "step": 56630 }, { "epoch": 1110.59, "learning_rate": 2.265229797374296e-06, "loss": 0.367, "step": 56640 }, { "epoch": 1110.78, "learning_rate": 2.2553555149893987e-06, "loss": 0.3701, "step": 56650 }, { "epoch": 1110.98, "learning_rate": 2.2455024724183424e-06, "loss": 0.3677, "step": 56660 }, { "epoch": 1111.0, "eval_loss": 0.376329243183136, "eval_runtime": 2.3664, "eval_samples_per_second": 963.065, "eval_steps_per_second": 3.803, "step": 56661 }, { "epoch": 1111.18, "learning_rate": 2.2356706725379675e-06, "loss": 0.366, "step": 56670 }, { "epoch": 1111.37, "learning_rate": 2.2258601182189526e-06, "loss": 0.3732, "step": 56680 }, { "epoch": 1111.57, "learning_rate": 2.216070812325774e-06, "loss": 0.3684, "step": 56690 }, { "epoch": 1111.76, "learning_rate": 2.206302757716677e-06, "loss": 0.3669, "step": 56700 }, { "epoch": 1111.96, "learning_rate": 2.1965559572437147e-06, "loss": 0.3734, "step": 56710 }, { "epoch": 1112.0, "eval_loss": 0.3813353478908539, "eval_runtime": 2.431, "eval_samples_per_second": 937.488, "eval_steps_per_second": 3.702, "step": 56712 }, { "epoch": 1112.16, "learning_rate": 2.1868304137527354e-06, "loss": 0.3718, "step": 56720 }, { "epoch": 1112.35, "learning_rate": 2.177126130083384e-06, "loss": 0.3694, "step": 56730 }, { "epoch": 1112.55, "learning_rate": 2.167443109069103e-06, "loss": 0.3718, "step": 56740 }, { "epoch": 1112.75, "learning_rate": 2.157781353537105e-06, "loss": 0.3679, "step": 56750 }, { "epoch": 1112.94, "learning_rate": 2.1481408663084094e-06, "loss": 0.3714, "step": 56760 }, { "epoch": 1113.0, "eval_loss": 0.3772488236427307, "eval_runtime": 2.3362, "eval_samples_per_second": 975.496, "eval_steps_per_second": 3.852, "step": 56763 }, { "epoch": 1113.14, "learning_rate": 2.1385216501978384e-06, "loss": 0.3687, "step": 56770 }, { "epoch": 1113.33, "learning_rate": 2.128923708013963e-06, "loss": 0.3669, "step": 56780 }, { "epoch": 1113.53, "learning_rate": 2.119347042559191e-06, "loss": 0.3702, "step": 56790 }, { "epoch": 1113.73, "learning_rate": 2.1097916566296863e-06, "loss": 0.369, "step": 56800 }, { "epoch": 1113.92, "learning_rate": 2.1002575530153996e-06, "loss": 0.3654, "step": 56810 }, { "epoch": 1114.0, "eval_loss": 0.3765362501144409, "eval_runtime": 2.2523, "eval_samples_per_second": 1011.853, "eval_steps_per_second": 3.996, "step": 56814 }, { "epoch": 1114.12, "learning_rate": 2.0907447345000967e-06, "loss": 0.3689, "step": 56820 }, { "epoch": 1114.31, "learning_rate": 2.081253203861288e-06, "loss": 0.3746, "step": 56830 }, { "epoch": 1114.51, "learning_rate": 2.0717829638703e-06, "loss": 0.3751, "step": 56840 }, { "epoch": 1114.71, "learning_rate": 2.062334017292236e-06, "loss": 0.3689, "step": 56850 }, { "epoch": 1114.9, "learning_rate": 2.0529063668859646e-06, "loss": 0.3692, "step": 56860 }, { "epoch": 1115.0, "eval_loss": 0.3756592273712158, "eval_runtime": 2.3455, "eval_samples_per_second": 971.645, "eval_steps_per_second": 3.837, "step": 56865 }, { "epoch": 1115.1, "learning_rate": 2.0435000154041674e-06, "loss": 0.3747, "step": 56870 }, { "epoch": 1115.29, "learning_rate": 2.034114965593264e-06, "loss": 0.3657, "step": 56880 }, { "epoch": 1115.49, "learning_rate": 2.0247512201934964e-06, "loss": 0.3661, "step": 56890 }, { "epoch": 1115.69, "learning_rate": 2.01540878193886e-06, "loss": 0.3702, "step": 56900 }, { "epoch": 1115.88, "learning_rate": 2.0060876535571564e-06, "loss": 0.3721, "step": 56910 }, { "epoch": 1116.0, "eval_loss": 0.37494155764579773, "eval_runtime": 2.2377, "eval_samples_per_second": 1018.439, "eval_steps_per_second": 4.022, "step": 56916 }, { "epoch": 1116.08, "learning_rate": 1.996787837769942e-06, "loss": 0.3612, "step": 56920 }, { "epoch": 1116.27, "learning_rate": 1.9875093372925367e-06, "loss": 0.3668, "step": 56930 }, { "epoch": 1116.47, "learning_rate": 1.9782521548340645e-06, "loss": 0.3695, "step": 56940 }, { "epoch": 1116.67, "learning_rate": 1.969016293097422e-06, "loss": 0.3713, "step": 56950 }, { "epoch": 1116.86, "learning_rate": 1.959801754779286e-06, "loss": 0.3741, "step": 56960 }, { "epoch": 1117.0, "eval_loss": 0.3769468665122986, "eval_runtime": 2.3568, "eval_samples_per_second": 966.997, "eval_steps_per_second": 3.819, "step": 56967 }, { "epoch": 1117.06, "learning_rate": 1.95060854257007e-06, "loss": 0.3681, "step": 56970 }, { "epoch": 1117.25, "learning_rate": 1.9414366591540108e-06, "loss": 0.3726, "step": 56980 }, { "epoch": 1117.45, "learning_rate": 1.9322861072090746e-06, "loss": 0.3707, "step": 56990 }, { "epoch": 1117.65, "learning_rate": 1.9231568894070238e-06, "loss": 0.3666, "step": 57000 }, { "epoch": 1117.84, "learning_rate": 1.9140490084134013e-06, "loss": 0.3649, "step": 57010 }, { "epoch": 1118.0, "eval_loss": 0.3805931508541107, "eval_runtime": 2.2501, "eval_samples_per_second": 1012.856, "eval_steps_per_second": 4.0, "step": 57018 }, { "epoch": 1118.04, "learning_rate": 1.9049624668874885e-06, "loss": 0.3691, "step": 57020 }, { "epoch": 1118.24, "learning_rate": 1.8958972674823546e-06, "loss": 0.3667, "step": 57030 }, { "epoch": 1118.43, "learning_rate": 1.886853412844841e-06, "loss": 0.3674, "step": 57040 }, { "epoch": 1118.63, "learning_rate": 1.8778309056155433e-06, "loss": 0.368, "step": 57050 }, { "epoch": 1118.82, "learning_rate": 1.8688297484288544e-06, "loss": 0.3709, "step": 57060 }, { "epoch": 1119.0, "eval_loss": 0.37201598286628723, "eval_runtime": 2.3445, "eval_samples_per_second": 972.048, "eval_steps_per_second": 3.839, "step": 57069 }, { "epoch": 1119.02, "learning_rate": 1.8598499439128806e-06, "loss": 0.3719, "step": 57070 }, { "epoch": 1119.22, "learning_rate": 1.8508914946895492e-06, "loss": 0.3698, "step": 57080 }, { "epoch": 1119.41, "learning_rate": 1.8419544033745099e-06, "loss": 0.3725, "step": 57090 }, { "epoch": 1119.61, "learning_rate": 1.8330386725772e-06, "loss": 0.3702, "step": 57100 }, { "epoch": 1119.8, "learning_rate": 1.8241443049008208e-06, "loss": 0.3702, "step": 57110 }, { "epoch": 1120.0, "learning_rate": 1.8152713029423283e-06, "loss": 0.3721, "step": 57120 }, { "epoch": 1120.0, "eval_loss": 0.37935277819633484, "eval_runtime": 2.3207, "eval_samples_per_second": 982.051, "eval_steps_per_second": 3.878, "step": 57120 }, { "epoch": 1120.2, "learning_rate": 1.8064196692924416e-06, "loss": 0.3701, "step": 57130 }, { "epoch": 1120.39, "learning_rate": 1.7975894065356266e-06, "loss": 0.3746, "step": 57140 }, { "epoch": 1120.59, "learning_rate": 1.7887805172501295e-06, "loss": 0.3694, "step": 57150 }, { "epoch": 1120.78, "learning_rate": 1.7799930040079597e-06, "loss": 0.3692, "step": 57160 }, { "epoch": 1120.98, "learning_rate": 1.7712268693748727e-06, "loss": 0.3701, "step": 57170 }, { "epoch": 1121.0, "eval_loss": 0.3747633099555969, "eval_runtime": 2.2596, "eval_samples_per_second": 1008.585, "eval_steps_per_second": 3.983, "step": 57171 }, { "epoch": 1121.18, "learning_rate": 1.7624821159103714e-06, "loss": 0.3697, "step": 57180 }, { "epoch": 1121.37, "learning_rate": 1.7537587461677383e-06, "loss": 0.3703, "step": 57190 }, { "epoch": 1121.57, "learning_rate": 1.7450567626940026e-06, "loss": 0.3674, "step": 57200 }, { "epoch": 1121.76, "learning_rate": 1.7363761680299487e-06, "loss": 0.3717, "step": 57210 }, { "epoch": 1121.96, "learning_rate": 1.7277169647101157e-06, "loss": 0.3674, "step": 57220 }, { "epoch": 1122.0, "eval_loss": 0.3787176012992859, "eval_runtime": 2.2673, "eval_samples_per_second": 1005.16, "eval_steps_per_second": 3.969, "step": 57222 }, { "epoch": 1122.16, "learning_rate": 1.719079155262798e-06, "loss": 0.3674, "step": 57230 }, { "epoch": 1122.35, "learning_rate": 1.710462742210053e-06, "loss": 0.3702, "step": 57240 }, { "epoch": 1122.55, "learning_rate": 1.7018677280676601e-06, "loss": 0.3698, "step": 57250 }, { "epoch": 1122.75, "learning_rate": 1.6932941153451785e-06, "loss": 0.3665, "step": 57260 }, { "epoch": 1122.94, "learning_rate": 1.6847419065459306e-06, "loss": 0.3669, "step": 57270 }, { "epoch": 1123.0, "eval_loss": 0.37363022565841675, "eval_runtime": 2.3303, "eval_samples_per_second": 977.968, "eval_steps_per_second": 3.862, "step": 57273 }, { "epoch": 1123.14, "learning_rate": 1.6762111041669523e-06, "loss": 0.3675, "step": 57280 }, { "epoch": 1123.33, "learning_rate": 1.6677017106990597e-06, "loss": 0.3688, "step": 57290 }, { "epoch": 1123.53, "learning_rate": 1.6592137286267904e-06, "loss": 0.3718, "step": 57300 }, { "epoch": 1123.73, "learning_rate": 1.650747160428445e-06, "loss": 0.3675, "step": 57310 }, { "epoch": 1123.92, "learning_rate": 1.6423020085760963e-06, "loss": 0.3726, "step": 57320 }, { "epoch": 1124.0, "eval_loss": 0.37890663743019104, "eval_runtime": 2.2695, "eval_samples_per_second": 1004.198, "eval_steps_per_second": 3.966, "step": 57324 }, { "epoch": 1124.12, "learning_rate": 1.6338782755355218e-06, "loss": 0.3729, "step": 57330 }, { "epoch": 1124.31, "learning_rate": 1.6254759637662706e-06, "loss": 0.3732, "step": 57340 }, { "epoch": 1124.51, "learning_rate": 1.6170950757216223e-06, "loss": 0.3684, "step": 57350 }, { "epoch": 1124.71, "learning_rate": 1.6087356138486106e-06, "loss": 0.3683, "step": 57360 }, { "epoch": 1124.9, "learning_rate": 1.6003975805880171e-06, "loss": 0.3672, "step": 57370 }, { "epoch": 1125.0, "eval_loss": 0.3774057626724243, "eval_runtime": 2.3395, "eval_samples_per_second": 974.139, "eval_steps_per_second": 3.847, "step": 57375 }, { "epoch": 1125.1, "learning_rate": 1.5920809783743689e-06, "loss": 0.3688, "step": 57380 }, { "epoch": 1125.29, "learning_rate": 1.5837858096359151e-06, "loss": 0.3712, "step": 57390 }, { "epoch": 1125.49, "learning_rate": 1.5755120767946604e-06, "loss": 0.3693, "step": 57400 }, { "epoch": 1125.69, "learning_rate": 1.5672597822663557e-06, "loss": 0.37, "step": 57410 }, { "epoch": 1125.88, "learning_rate": 1.55902892846049e-06, "loss": 0.3674, "step": 57420 }, { "epoch": 1126.0, "eval_loss": 0.3777696490287781, "eval_runtime": 2.2811, "eval_samples_per_second": 999.061, "eval_steps_per_second": 3.945, "step": 57426 }, { "epoch": 1126.08, "learning_rate": 1.550819517780283e-06, "loss": 0.3695, "step": 57430 }, { "epoch": 1126.27, "learning_rate": 1.5426315526227e-06, "loss": 0.3663, "step": 57440 }, { "epoch": 1126.47, "learning_rate": 1.534465035378446e-06, "loss": 0.3745, "step": 57450 }, { "epoch": 1126.67, "learning_rate": 1.526319968431955e-06, "loss": 0.3704, "step": 57460 }, { "epoch": 1126.86, "learning_rate": 1.5181963541614161e-06, "loss": 0.3702, "step": 57470 }, { "epoch": 1127.0, "eval_loss": 0.37724199891090393, "eval_runtime": 2.2933, "eval_samples_per_second": 993.78, "eval_steps_per_second": 3.925, "step": 57477 }, { "epoch": 1127.06, "learning_rate": 1.5100941949387406e-06, "loss": 0.3692, "step": 57480 }, { "epoch": 1127.25, "learning_rate": 1.502013493129578e-06, "loss": 0.3721, "step": 57490 }, { "epoch": 1127.45, "learning_rate": 1.4939542510933072e-06, "loss": 0.3684, "step": 57500 }, { "epoch": 1127.65, "learning_rate": 1.4859164711830546e-06, "loss": 0.3699, "step": 57510 }, { "epoch": 1127.84, "learning_rate": 1.4779001557456593e-06, "loss": 0.3717, "step": 57520 }, { "epoch": 1128.0, "eval_loss": 0.376617968082428, "eval_runtime": 2.3749, "eval_samples_per_second": 959.606, "eval_steps_per_second": 3.79, "step": 57528 }, { "epoch": 1128.04, "learning_rate": 1.4699053071217326e-06, "loss": 0.3666, "step": 57530 }, { "epoch": 1128.24, "learning_rate": 1.461931927645557e-06, "loss": 0.3652, "step": 57540 }, { "epoch": 1128.43, "learning_rate": 1.4539800196452206e-06, "loss": 0.3693, "step": 57550 }, { "epoch": 1128.63, "learning_rate": 1.4460495854424659e-06, "loss": 0.366, "step": 57560 }, { "epoch": 1128.82, "learning_rate": 1.4381406273528239e-06, "loss": 0.3703, "step": 57570 }, { "epoch": 1129.0, "eval_loss": 0.3757016360759735, "eval_runtime": 2.2478, "eval_samples_per_second": 1013.876, "eval_steps_per_second": 4.004, "step": 57579 }, { "epoch": 1129.02, "learning_rate": 1.4302531476855312e-06, "loss": 0.3674, "step": 57580 }, { "epoch": 1129.22, "learning_rate": 1.4223871487435618e-06, "loss": 0.3746, "step": 57590 }, { "epoch": 1129.41, "learning_rate": 1.4145426328236036e-06, "loss": 0.3699, "step": 57600 }, { "epoch": 1129.61, "learning_rate": 1.406719602216075e-06, "loss": 0.3729, "step": 57610 }, { "epoch": 1129.8, "learning_rate": 1.3989180592051313e-06, "loss": 0.3675, "step": 57620 }, { "epoch": 1130.0, "learning_rate": 1.3911380060686593e-06, "loss": 0.3695, "step": 57630 }, { "epoch": 1130.0, "eval_loss": 0.3807949423789978, "eval_runtime": 2.3206, "eval_samples_per_second": 982.078, "eval_steps_per_second": 3.878, "step": 57630 }, { "epoch": 1130.2, "learning_rate": 1.3833794450782504e-06, "loss": 0.3742, "step": 57640 }, { "epoch": 1130.39, "learning_rate": 1.3756423784992253e-06, "loss": 0.369, "step": 57650 }, { "epoch": 1130.59, "learning_rate": 1.3679268085906608e-06, "loss": 0.3698, "step": 57660 }, { "epoch": 1130.78, "learning_rate": 1.3602327376052963e-06, "loss": 0.3692, "step": 57670 }, { "epoch": 1130.98, "learning_rate": 1.3525601677896513e-06, "loss": 0.3729, "step": 57680 }, { "epoch": 1131.0, "eval_loss": 0.37210267782211304, "eval_runtime": 2.3446, "eval_samples_per_second": 972.041, "eval_steps_per_second": 3.839, "step": 57681 }, { "epoch": 1131.18, "learning_rate": 1.3449091013839426e-06, "loss": 0.3687, "step": 57690 }, { "epoch": 1131.37, "learning_rate": 1.3372795406221076e-06, "loss": 0.3689, "step": 57700 }, { "epoch": 1131.57, "learning_rate": 1.3296714877318148e-06, "loss": 0.3752, "step": 57710 }, { "epoch": 1131.76, "learning_rate": 1.322084944934429e-06, "loss": 0.3695, "step": 57720 }, { "epoch": 1131.96, "learning_rate": 1.3145199144450613e-06, "loss": 0.3657, "step": 57730 }, { "epoch": 1132.0, "eval_loss": 0.37843912839889526, "eval_runtime": 2.2806, "eval_samples_per_second": 999.309, "eval_steps_per_second": 3.946, "step": 57732 }, { "epoch": 1132.16, "learning_rate": 1.3069763984725452e-06, "loss": 0.3704, "step": 57740 }, { "epoch": 1132.35, "learning_rate": 1.2994543992193935e-06, "loss": 0.3709, "step": 57750 }, { "epoch": 1132.55, "learning_rate": 1.2919539188818828e-06, "loss": 0.3705, "step": 57760 }, { "epoch": 1132.75, "learning_rate": 1.2844749596499782e-06, "loss": 0.3691, "step": 57770 }, { "epoch": 1132.94, "learning_rate": 1.277017523707366e-06, "loss": 0.3676, "step": 57780 }, { "epoch": 1133.0, "eval_loss": 0.3793400228023529, "eval_runtime": 2.3746, "eval_samples_per_second": 959.724, "eval_steps_per_second": 3.79, "step": 57783 }, { "epoch": 1133.14, "learning_rate": 1.2695816132314545e-06, "loss": 0.3724, "step": 57790 }, { "epoch": 1133.33, "learning_rate": 1.2621672303933738e-06, "loss": 0.3746, "step": 57800 }, { "epoch": 1133.53, "learning_rate": 1.254774377357942e-06, "loss": 0.3699, "step": 57810 }, { "epoch": 1133.73, "learning_rate": 1.2474030562837162e-06, "loss": 0.3683, "step": 57820 }, { "epoch": 1133.92, "learning_rate": 1.2400532693229493e-06, "loss": 0.3684, "step": 57830 }, { "epoch": 1134.0, "eval_loss": 0.37969231605529785, "eval_runtime": 2.3069, "eval_samples_per_second": 987.914, "eval_steps_per_second": 3.901, "step": 57834 }, { "epoch": 1134.12, "learning_rate": 1.2327250186216248e-06, "loss": 0.3688, "step": 57840 }, { "epoch": 1134.31, "learning_rate": 1.2254183063194312e-06, "loss": 0.3696, "step": 57850 }, { "epoch": 1134.51, "learning_rate": 1.2181331345497453e-06, "loss": 0.3682, "step": 57860 }, { "epoch": 1134.71, "learning_rate": 1.2108695054396988e-06, "loss": 0.3666, "step": 57870 }, { "epoch": 1134.9, "learning_rate": 1.2036274211100955e-06, "loss": 0.3703, "step": 57880 }, { "epoch": 1135.0, "eval_loss": 0.3771282136440277, "eval_runtime": 2.257, "eval_samples_per_second": 1009.765, "eval_steps_per_second": 3.988, "step": 57885 }, { "epoch": 1135.1, "learning_rate": 1.1964068836754687e-06, "loss": 0.3722, "step": 57890 }, { "epoch": 1135.29, "learning_rate": 1.189207895244057e-06, "loss": 0.3746, "step": 57900 }, { "epoch": 1135.49, "learning_rate": 1.182030457917796e-06, "loss": 0.3681, "step": 57910 }, { "epoch": 1135.69, "learning_rate": 1.174874573792342e-06, "loss": 0.3683, "step": 57920 }, { "epoch": 1135.88, "learning_rate": 1.167740244957041e-06, "loss": 0.3705, "step": 57930 }, { "epoch": 1136.0, "eval_loss": 0.37516215443611145, "eval_runtime": 2.2628, "eval_samples_per_second": 1007.181, "eval_steps_per_second": 3.977, "step": 57936 }, { "epoch": 1136.08, "learning_rate": 1.1606274734949766e-06, "loss": 0.3713, "step": 57940 }, { "epoch": 1136.27, "learning_rate": 1.1535362614829208e-06, "loss": 0.3704, "step": 57950 }, { "epoch": 1136.47, "learning_rate": 1.1464666109913256e-06, "loss": 0.3653, "step": 57960 }, { "epoch": 1136.67, "learning_rate": 1.1394185240843983e-06, "loss": 0.3673, "step": 57970 }, { "epoch": 1136.86, "learning_rate": 1.1323920028200096e-06, "loss": 0.3691, "step": 57980 }, { "epoch": 1137.0, "eval_loss": 0.3772904872894287, "eval_runtime": 2.2617, "eval_samples_per_second": 1007.636, "eval_steps_per_second": 3.979, "step": 57987 }, { "epoch": 1137.06, "learning_rate": 1.125387049249743e-06, "loss": 0.3706, "step": 57990 }, { "epoch": 1137.25, "learning_rate": 1.1184036654188877e-06, "loss": 0.3736, "step": 58000 }, { "epoch": 1137.45, "learning_rate": 1.1114418533664626e-06, "loss": 0.3705, "step": 58010 }, { "epoch": 1137.65, "learning_rate": 1.1045016151251335e-06, "loss": 0.3681, "step": 58020 }, { "epoch": 1137.84, "learning_rate": 1.0975829527212959e-06, "loss": 0.3673, "step": 58030 }, { "epoch": 1138.0, "eval_loss": 0.3766086995601654, "eval_runtime": 2.3006, "eval_samples_per_second": 990.6, "eval_steps_per_second": 3.912, "step": 58038 }, { "epoch": 1138.04, "learning_rate": 1.0906858681750508e-06, "loss": 0.3686, "step": 58040 }, { "epoch": 1138.24, "learning_rate": 1.0838103635002038e-06, "loss": 0.3697, "step": 58050 }, { "epoch": 1138.43, "learning_rate": 1.0769564407042407e-06, "loss": 0.3697, "step": 58060 }, { "epoch": 1138.63, "learning_rate": 1.0701241017883526e-06, "loss": 0.3656, "step": 58070 }, { "epoch": 1138.82, "learning_rate": 1.0633133487474189e-06, "loss": 0.3715, "step": 58080 }, { "epoch": 1139.0, "eval_loss": 0.3779025971889496, "eval_runtime": 2.3816, "eval_samples_per_second": 956.928, "eval_steps_per_second": 3.779, "step": 58089 }, { "epoch": 1139.02, "learning_rate": 1.0565241835700482e-06, "loss": 0.3668, "step": 58090 }, { "epoch": 1139.22, "learning_rate": 1.049756608238514e-06, "loss": 0.3665, "step": 58100 }, { "epoch": 1139.41, "learning_rate": 1.0430106247288018e-06, "loss": 0.3684, "step": 58110 }, { "epoch": 1139.61, "learning_rate": 1.036286235010586e-06, "loss": 0.3738, "step": 58120 }, { "epoch": 1139.8, "learning_rate": 1.0295834410472382e-06, "loss": 0.3737, "step": 58130 }, { "epoch": 1140.0, "learning_rate": 1.0229022447958256e-06, "loss": 0.37, "step": 58140 }, { "epoch": 1140.0, "eval_loss": 0.37501007318496704, "eval_runtime": 2.2685, "eval_samples_per_second": 1004.642, "eval_steps_per_second": 3.967, "step": 58140 }, { "epoch": 1140.2, "learning_rate": 1.016242648207105e-06, "loss": 0.3705, "step": 58150 }, { "epoch": 1140.39, "learning_rate": 1.0096046532255374e-06, "loss": 0.3679, "step": 58160 }, { "epoch": 1140.59, "learning_rate": 1.0029882617892643e-06, "loss": 0.3662, "step": 58170 }, { "epoch": 1140.78, "learning_rate": 9.963934758301235e-07, "loss": 0.3689, "step": 58180 }, { "epoch": 1140.98, "learning_rate": 9.898202972736497e-07, "loss": 0.3709, "step": 58190 }, { "epoch": 1141.0, "eval_loss": 0.37856853008270264, "eval_runtime": 2.2632, "eval_samples_per_second": 1006.96, "eval_steps_per_second": 3.977, "step": 58191 }, { "epoch": 1141.18, "learning_rate": 9.832687280390578e-07, "loss": 0.3671, "step": 58200 }, { "epoch": 1141.37, "learning_rate": 9.767387700392675e-07, "loss": 0.3687, "step": 58210 }, { "epoch": 1141.57, "learning_rate": 9.702304251808707e-07, "loss": 0.3715, "step": 58220 }, { "epoch": 1141.76, "learning_rate": 9.637436953641803e-07, "loss": 0.3679, "step": 58230 }, { "epoch": 1141.96, "learning_rate": 9.57278582483148e-07, "loss": 0.3696, "step": 58240 }, { "epoch": 1142.0, "eval_loss": 0.37759509682655334, "eval_runtime": 2.2953, "eval_samples_per_second": 992.889, "eval_steps_per_second": 3.921, "step": 58242 }, { "epoch": 1142.16, "learning_rate": 9.50835088425464e-07, "loss": 0.3672, "step": 58250 }, { "epoch": 1142.35, "learning_rate": 9.444132150724732e-07, "loss": 0.3663, "step": 58260 }, { "epoch": 1142.55, "learning_rate": 9.380129642992257e-07, "loss": 0.367, "step": 58270 }, { "epoch": 1142.75, "learning_rate": 9.316343379744517e-07, "loss": 0.3706, "step": 58280 }, { "epoch": 1142.94, "learning_rate": 9.252773379605616e-07, "loss": 0.3752, "step": 58290 }, { "epoch": 1143.0, "eval_loss": 0.3757573366165161, "eval_runtime": 2.3475, "eval_samples_per_second": 970.804, "eval_steps_per_second": 3.834, "step": 58293 }, { "epoch": 1143.14, "learning_rate": 9.189419661136621e-07, "loss": 0.3684, "step": 58300 }, { "epoch": 1143.33, "learning_rate": 9.126282242835487e-07, "loss": 0.3688, "step": 58310 }, { "epoch": 1143.53, "learning_rate": 9.0633611431368e-07, "loss": 0.3681, "step": 58320 }, { "epoch": 1143.73, "learning_rate": 9.000656380412114e-07, "loss": 0.3728, "step": 58330 }, { "epoch": 1143.92, "learning_rate": 8.93816797296995e-07, "loss": 0.3675, "step": 58340 }, { "epoch": 1144.0, "eval_loss": 0.37619441747665405, "eval_runtime": 2.2518, "eval_samples_per_second": 1012.06, "eval_steps_per_second": 3.997, "step": 58344 }, { "epoch": 1144.12, "learning_rate": 8.875895939055466e-07, "loss": 0.3683, "step": 58350 }, { "epoch": 1144.31, "learning_rate": 8.813840296850616e-07, "loss": 0.3677, "step": 58360 }, { "epoch": 1144.51, "learning_rate": 8.752001064474407e-07, "loss": 0.3693, "step": 58370 }, { "epoch": 1144.71, "learning_rate": 8.690378259982478e-07, "loss": 0.3662, "step": 58380 }, { "epoch": 1144.9, "learning_rate": 8.628971901367271e-07, "loss": 0.3681, "step": 58390 }, { "epoch": 1145.0, "eval_loss": 0.3740864396095276, "eval_runtime": 2.2362, "eval_samples_per_second": 1019.118, "eval_steps_per_second": 4.025, "step": 58395 }, { "epoch": 1145.1, "learning_rate": 8.567782006558027e-07, "loss": 0.3709, "step": 58400 }, { "epoch": 1145.29, "learning_rate": 8.506808593420955e-07, "loss": 0.3683, "step": 58410 }, { "epoch": 1145.49, "learning_rate": 8.446051679758814e-07, "loss": 0.3729, "step": 58420 }, { "epoch": 1145.69, "learning_rate": 8.38551128331133e-07, "loss": 0.3716, "step": 58430 }, { "epoch": 1145.88, "learning_rate": 8.325187421755031e-07, "loss": 0.3684, "step": 58440 }, { "epoch": 1146.0, "eval_loss": 0.3794369399547577, "eval_runtime": 2.2787, "eval_samples_per_second": 1000.144, "eval_steps_per_second": 3.95, "step": 58446 }, { "epoch": 1146.08, "learning_rate": 8.265080112702993e-07, "loss": 0.371, "step": 58450 }, { "epoch": 1146.27, "learning_rate": 8.205189373705262e-07, "loss": 0.3694, "step": 58460 }, { "epoch": 1146.47, "learning_rate": 8.145515222248599e-07, "loss": 0.3674, "step": 58470 }, { "epoch": 1146.67, "learning_rate": 8.086057675756569e-07, "loss": 0.3671, "step": 58480 }, { "epoch": 1146.86, "learning_rate": 8.026816751589366e-07, "loss": 0.3663, "step": 58490 }, { "epoch": 1147.0, "eval_loss": 0.3720145523548126, "eval_runtime": 2.2643, "eval_samples_per_second": 1006.479, "eval_steps_per_second": 3.975, "step": 58497 }, { "epoch": 1147.06, "learning_rate": 7.96779246704407e-07, "loss": 0.3702, "step": 58500 }, { "epoch": 1147.25, "learning_rate": 7.908984839354482e-07, "loss": 0.3687, "step": 58510 }, { "epoch": 1147.45, "learning_rate": 7.850393885691031e-07, "loss": 0.3683, "step": 58520 }, { "epoch": 1147.65, "learning_rate": 7.792019623161116e-07, "loss": 0.3691, "step": 58530 }, { "epoch": 1147.84, "learning_rate": 7.733862068808521e-07, "loss": 0.3712, "step": 58540 }, { "epoch": 1148.0, "eval_loss": 0.3741941452026367, "eval_runtime": 2.3347, "eval_samples_per_second": 976.132, "eval_steps_per_second": 3.855, "step": 58548 }, { "epoch": 1148.04, "learning_rate": 7.675921239614164e-07, "loss": 0.3694, "step": 58550 }, { "epoch": 1148.24, "learning_rate": 7.618197152495258e-07, "loss": 0.3693, "step": 58560 }, { "epoch": 1148.43, "learning_rate": 7.560689824306076e-07, "loss": 0.3691, "step": 58570 }, { "epoch": 1148.63, "learning_rate": 7.503399271837518e-07, "loss": 0.3709, "step": 58580 }, { "epoch": 1148.82, "learning_rate": 7.446325511817119e-07, "loss": 0.3672, "step": 58590 }, { "epoch": 1149.0, "eval_loss": 0.3786185681819916, "eval_runtime": 2.3832, "eval_samples_per_second": 956.28, "eval_steps_per_second": 3.776, "step": 58599 }, { "epoch": 1149.02, "learning_rate": 7.389468560909051e-07, "loss": 0.3692, "step": 58600 }, { "epoch": 1149.22, "learning_rate": 7.332828435714366e-07, "loss": 0.3673, "step": 58610 }, { "epoch": 1149.41, "learning_rate": 7.276405152770671e-07, "loss": 0.3684, "step": 58620 }, { "epoch": 1149.61, "learning_rate": 7.220198728552368e-07, "loss": 0.3733, "step": 58630 }, { "epoch": 1149.8, "learning_rate": 7.164209179470415e-07, "loss": 0.3687, "step": 58640 }, { "epoch": 1150.0, "learning_rate": 7.108436521872568e-07, "loss": 0.369, "step": 58650 }, { "epoch": 1150.0, "eval_loss": 0.37368664145469666, "eval_runtime": 2.4044, "eval_samples_per_second": 947.86, "eval_steps_per_second": 3.743, "step": 58650 }, { "epoch": 1150.2, "learning_rate": 7.052880772043134e-07, "loss": 0.371, "step": 58660 }, { "epoch": 1150.39, "learning_rate": 6.997541946203139e-07, "loss": 0.3704, "step": 58670 }, { "epoch": 1150.59, "learning_rate": 6.942420060510406e-07, "loss": 0.3694, "step": 58680 }, { "epoch": 1150.78, "learning_rate": 6.887515131059229e-07, "loss": 0.3674, "step": 58690 }, { "epoch": 1150.98, "learning_rate": 6.832827173880618e-07, "loss": 0.3648, "step": 58700 }, { "epoch": 1151.0, "eval_loss": 0.37666937708854675, "eval_runtime": 2.4198, "eval_samples_per_second": 941.831, "eval_steps_per_second": 3.719, "step": 58701 }, { "epoch": 1151.18, "learning_rate": 6.778356204942214e-07, "loss": 0.3701, "step": 58710 }, { "epoch": 1151.37, "learning_rate": 6.724102240148299e-07, "loss": 0.3712, "step": 58720 }, { "epoch": 1151.57, "learning_rate": 6.670065295339866e-07, "loss": 0.3712, "step": 58730 }, { "epoch": 1151.76, "learning_rate": 6.616245386294627e-07, "loss": 0.3705, "step": 58740 }, { "epoch": 1151.96, "learning_rate": 6.562642528726597e-07, "loss": 0.3704, "step": 58750 }, { "epoch": 1152.0, "eval_loss": 0.37399017810821533, "eval_runtime": 2.2859, "eval_samples_per_second": 996.982, "eval_steps_per_second": 3.937, "step": 58752 }, { "epoch": 1152.16, "learning_rate": 6.509256738286672e-07, "loss": 0.3687, "step": 58760 }, { "epoch": 1152.35, "learning_rate": 6.4560880305623e-07, "loss": 0.3616, "step": 58770 }, { "epoch": 1152.55, "learning_rate": 6.403136421077565e-07, "loss": 0.3695, "step": 58780 }, { "epoch": 1152.75, "learning_rate": 6.350401925293264e-07, "loss": 0.3713, "step": 58790 }, { "epoch": 1152.94, "learning_rate": 6.29788455860658e-07, "loss": 0.3695, "step": 58800 }, { "epoch": 1153.0, "eval_loss": 0.3780522346496582, "eval_runtime": 2.3433, "eval_samples_per_second": 972.56, "eval_steps_per_second": 3.841, "step": 58803 }, { "epoch": 1153.14, "learning_rate": 6.245584336351417e-07, "loss": 0.3701, "step": 58810 }, { "epoch": 1153.33, "learning_rate": 6.193501273798307e-07, "loss": 0.3689, "step": 58820 }, { "epoch": 1153.53, "learning_rate": 6.141635386154253e-07, "loss": 0.3688, "step": 58830 }, { "epoch": 1153.73, "learning_rate": 6.089986688563143e-07, "loss": 0.3695, "step": 58840 }, { "epoch": 1153.92, "learning_rate": 6.038555196105077e-07, "loss": 0.3707, "step": 58850 }, { "epoch": 1154.0, "eval_loss": 0.37525415420532227, "eval_runtime": 2.319, "eval_samples_per_second": 982.731, "eval_steps_per_second": 3.881, "step": 58854 }, { "epoch": 1154.12, "learning_rate": 5.987340923796879e-07, "loss": 0.3646, "step": 58860 }, { "epoch": 1154.31, "learning_rate": 5.936343886592087e-07, "loss": 0.3709, "step": 58870 }, { "epoch": 1154.51, "learning_rate": 5.885564099380624e-07, "loss": 0.3701, "step": 58880 }, { "epoch": 1154.71, "learning_rate": 5.835001576989129e-07, "loss": 0.3689, "step": 58890 }, { "epoch": 1154.9, "learning_rate": 5.784656334180709e-07, "loss": 0.3661, "step": 58900 }, { "epoch": 1155.0, "eval_loss": 0.37740692496299744, "eval_runtime": 2.2596, "eval_samples_per_second": 1008.586, "eval_steps_per_second": 3.983, "step": 58905 }, { "epoch": 1155.1, "learning_rate": 5.73452838565494e-07, "loss": 0.3704, "step": 58910 }, { "epoch": 1155.29, "learning_rate": 5.684617746048198e-07, "loss": 0.3677, "step": 58920 }, { "epoch": 1155.49, "learning_rate": 5.634924429933241e-07, "loss": 0.3684, "step": 58930 }, { "epoch": 1155.69, "learning_rate": 5.585448451819296e-07, "loss": 0.3633, "step": 58940 }, { "epoch": 1155.88, "learning_rate": 5.536189826152476e-07, "loss": 0.367, "step": 58950 }, { "epoch": 1156.0, "eval_loss": 0.3763006031513214, "eval_runtime": 2.2788, "eval_samples_per_second": 1000.081, "eval_steps_per_second": 3.949, "step": 58956 }, { "epoch": 1156.08, "learning_rate": 5.487148567315026e-07, "loss": 0.3703, "step": 58960 }, { "epoch": 1156.27, "learning_rate": 5.438324689625989e-07, "loss": 0.3678, "step": 58970 }, { "epoch": 1156.47, "learning_rate": 5.389718207340716e-07, "loss": 0.368, "step": 58980 }, { "epoch": 1156.67, "learning_rate": 5.341329134651351e-07, "loss": 0.3682, "step": 58990 }, { "epoch": 1156.86, "learning_rate": 5.293157485686428e-07, "loss": 0.3657, "step": 59000 }, { "epoch": 1157.0, "eval_loss": 0.3766930103302002, "eval_runtime": 2.379, "eval_samples_per_second": 957.978, "eval_steps_per_second": 3.783, "step": 59007 }, { "epoch": 1157.06, "learning_rate": 5.245203274510862e-07, "loss": 0.3731, "step": 59010 }, { "epoch": 1157.25, "learning_rate": 5.197466515126369e-07, "loss": 0.364, "step": 59020 }, { "epoch": 1157.45, "learning_rate": 5.149947221470885e-07, "loss": 0.3691, "step": 59030 }, { "epoch": 1157.65, "learning_rate": 5.102645407419059e-07, "loss": 0.3675, "step": 59040 }, { "epoch": 1157.84, "learning_rate": 5.055561086781928e-07, "loss": 0.3638, "step": 59050 }, { "epoch": 1158.0, "eval_loss": 0.37378421425819397, "eval_runtime": 2.2244, "eval_samples_per_second": 1024.566, "eval_steps_per_second": 4.046, "step": 59058 }, { "epoch": 1158.04, "learning_rate": 5.008694273307162e-07, "loss": 0.3721, "step": 59060 }, { "epoch": 1158.24, "learning_rate": 4.962044980678731e-07, "loss": 0.3723, "step": 59070 }, { "epoch": 1158.43, "learning_rate": 4.915613222517156e-07, "loss": 0.3693, "step": 59080 }, { "epoch": 1158.63, "learning_rate": 4.86939901237951e-07, "loss": 0.3684, "step": 59090 }, { "epoch": 1158.82, "learning_rate": 4.823402363759416e-07, "loss": 0.3728, "step": 59100 }, { "epoch": 1159.0, "eval_loss": 0.373190313577652, "eval_runtime": 2.2286, "eval_samples_per_second": 1022.623, "eval_steps_per_second": 4.038, "step": 59109 }, { "epoch": 1159.02, "learning_rate": 4.777623290086713e-07, "loss": 0.3714, "step": 59110 }, { "epoch": 1159.22, "learning_rate": 4.73206180472796e-07, "loss": 0.3674, "step": 59120 }, { "epoch": 1159.41, "learning_rate": 4.6867179209860995e-07, "loss": 0.3685, "step": 59130 }, { "epoch": 1159.61, "learning_rate": 4.641591652100457e-07, "loss": 0.3673, "step": 59140 }, { "epoch": 1159.8, "learning_rate": 4.59668301124691e-07, "loss": 0.3684, "step": 59150 }, { "epoch": 1160.0, "learning_rate": 4.551992011537886e-07, "loss": 0.3748, "step": 59160 }, { "epoch": 1160.0, "eval_loss": 0.37865450978279114, "eval_runtime": 2.3374, "eval_samples_per_second": 974.999, "eval_steps_per_second": 3.85, "step": 59160 }, { "epoch": 1160.2, "learning_rate": 4.507518666022114e-07, "loss": 0.3704, "step": 59170 }, { "epoch": 1160.39, "learning_rate": 4.463262987684707e-07, "loss": 0.3662, "step": 59180 }, { "epoch": 1160.59, "learning_rate": 4.419224989447495e-07, "loss": 0.3682, "step": 59190 }, { "epoch": 1160.78, "learning_rate": 4.3754046841685264e-07, "loss": 0.3694, "step": 59200 }, { "epoch": 1160.98, "learning_rate": 4.3318020846423163e-07, "loss": 0.3753, "step": 59210 }, { "epoch": 1161.0, "eval_loss": 0.37434232234954834, "eval_runtime": 2.3184, "eval_samples_per_second": 982.993, "eval_steps_per_second": 3.882, "step": 59211 }, { "epoch": 1161.18, "learning_rate": 4.288417203599848e-07, "loss": 0.3667, "step": 59220 }, { "epoch": 1161.37, "learning_rate": 4.245250053708654e-07, "loss": 0.3721, "step": 59230 }, { "epoch": 1161.57, "learning_rate": 4.2023006475724874e-07, "loss": 0.3694, "step": 59240 }, { "epoch": 1161.76, "learning_rate": 4.159568997731566e-07, "loss": 0.3677, "step": 59250 }, { "epoch": 1161.96, "learning_rate": 4.11705511666266e-07, "loss": 0.3663, "step": 59260 }, { "epoch": 1162.0, "eval_loss": 0.3757706880569458, "eval_runtime": 2.2639, "eval_samples_per_second": 1006.674, "eval_steps_per_second": 3.975, "step": 59262 }, { "epoch": 1162.16, "learning_rate": 4.074759016778839e-07, "loss": 0.3725, "step": 59270 }, { "epoch": 1162.35, "learning_rate": 4.0326807104297255e-07, "loss": 0.3687, "step": 59280 }, { "epoch": 1162.55, "learning_rate": 3.990820209901074e-07, "loss": 0.3672, "step": 59290 }, { "epoch": 1162.75, "learning_rate": 3.9491775274153594e-07, "loss": 0.3649, "step": 59300 }, { "epoch": 1162.94, "learning_rate": 3.9077526751312705e-07, "loss": 0.3694, "step": 59310 }, { "epoch": 1163.0, "eval_loss": 0.3772204518318176, "eval_runtime": 2.2967, "eval_samples_per_second": 992.275, "eval_steps_per_second": 3.919, "step": 59313 }, { "epoch": 1163.14, "learning_rate": 3.866545665143883e-07, "loss": 0.3713, "step": 59320 }, { "epoch": 1163.33, "learning_rate": 3.8255565094847393e-07, "loss": 0.3747, "step": 59330 }, { "epoch": 1163.53, "learning_rate": 3.7847852201218496e-07, "loss": 0.3671, "step": 59340 }, { "epoch": 1163.73, "learning_rate": 3.7442318089594416e-07, "loss": 0.3704, "step": 59350 }, { "epoch": 1163.92, "learning_rate": 3.7038962878382094e-07, "loss": 0.3657, "step": 59360 }, { "epoch": 1164.0, "eval_loss": 0.37626099586486816, "eval_runtime": 2.2934, "eval_samples_per_second": 993.736, "eval_steps_per_second": 3.924, "step": 59364 }, { "epoch": 1164.12, "learning_rate": 3.663778668535233e-07, "loss": 0.3707, "step": 59370 }, { "epoch": 1164.31, "learning_rate": 3.6238789627640596e-07, "loss": 0.3678, "step": 59380 }, { "epoch": 1164.51, "learning_rate": 3.5841971821742863e-07, "loss": 0.3652, "step": 59390 }, { "epoch": 1164.71, "learning_rate": 3.5447333383523123e-07, "loss": 0.3686, "step": 59400 }, { "epoch": 1164.9, "learning_rate": 3.50548744282067e-07, "loss": 0.3643, "step": 59410 }, { "epoch": 1165.0, "eval_loss": 0.3769838213920593, "eval_runtime": 2.2497, "eval_samples_per_second": 1013.009, "eval_steps_per_second": 4.0, "step": 59415 }, { "epoch": 1165.1, "learning_rate": 3.466459507038277e-07, "loss": 0.3708, "step": 59420 }, { "epoch": 1165.29, "learning_rate": 3.427649542400351e-07, "loss": 0.3676, "step": 59430 }, { "epoch": 1165.49, "learning_rate": 3.389057560238578e-07, "loss": 0.3694, "step": 59440 }, { "epoch": 1165.69, "learning_rate": 3.3506835718209447e-07, "loss": 0.369, "step": 59450 }, { "epoch": 1165.88, "learning_rate": 3.312527588351821e-07, "loss": 0.3679, "step": 59460 }, { "epoch": 1166.0, "eval_loss": 0.3771611452102661, "eval_runtime": 2.3268, "eval_samples_per_second": 979.474, "eval_steps_per_second": 3.868, "step": 59466 }, { "epoch": 1166.08, "learning_rate": 3.274589620971879e-07, "loss": 0.3707, "step": 59470 }, { "epoch": 1166.27, "learning_rate": 3.236869680758175e-07, "loss": 0.3632, "step": 59480 }, { "epoch": 1166.47, "learning_rate": 3.1993677787241487e-07, "loss": 0.3704, "step": 59490 }, { "epoch": 1166.67, "learning_rate": 3.162083925819375e-07, "loss": 0.3704, "step": 59500 }, { "epoch": 1166.86, "learning_rate": 3.1250181329300626e-07, "loss": 0.37, "step": 59510 }, { "epoch": 1167.0, "eval_loss": 0.37242335081100464, "eval_runtime": 2.238, "eval_samples_per_second": 1018.333, "eval_steps_per_second": 4.022, "step": 59517 }, { "epoch": 1167.06, "learning_rate": 3.088170410878471e-07, "loss": 0.3691, "step": 59520 }, { "epoch": 1167.25, "learning_rate": 3.051540770423411e-07, "loss": 0.3692, "step": 59530 }, { "epoch": 1167.45, "learning_rate": 3.01512922225991e-07, "loss": 0.374, "step": 59540 }, { "epoch": 1167.65, "learning_rate": 2.9789357770192147e-07, "loss": 0.369, "step": 59550 }, { "epoch": 1167.84, "learning_rate": 2.942960445269121e-07, "loss": 0.3693, "step": 59560 }, { "epoch": 1168.0, "eval_loss": 0.3752315938472748, "eval_runtime": 2.3291, "eval_samples_per_second": 978.487, "eval_steps_per_second": 3.864, "step": 59568 }, { "epoch": 1168.04, "learning_rate": 2.9072032375136445e-07, "loss": 0.3643, "step": 59570 }, { "epoch": 1168.24, "learning_rate": 2.871664164192933e-07, "loss": 0.3731, "step": 59580 }, { "epoch": 1168.43, "learning_rate": 2.836343235683769e-07, "loss": 0.3728, "step": 59590 }, { "epoch": 1168.63, "learning_rate": 2.8012404622989873e-07, "loss": 0.3686, "step": 59600 }, { "epoch": 1168.82, "learning_rate": 2.766355854287888e-07, "loss": 0.3705, "step": 59610 }, { "epoch": 1169.0, "eval_loss": 0.37323248386383057, "eval_runtime": 2.2636, "eval_samples_per_second": 1006.791, "eval_steps_per_second": 3.976, "step": 59619 }, { "epoch": 1169.02, "learning_rate": 2.731689421835909e-07, "loss": 0.3702, "step": 59620 }, { "epoch": 1169.22, "learning_rate": 2.697241175064868e-07, "loss": 0.3682, "step": 59630 }, { "epoch": 1169.41, "learning_rate": 2.6630111240330543e-07, "loss": 0.3734, "step": 59640 }, { "epoch": 1169.61, "learning_rate": 2.6289992787347224e-07, "loss": 0.3712, "step": 59650 }, { "epoch": 1169.8, "learning_rate": 2.5952056491005126e-07, "loss": 0.3656, "step": 59660 }, { "epoch": 1170.0, "learning_rate": 2.5616302449976145e-07, "loss": 0.3671, "step": 59670 }, { "epoch": 1170.0, "eval_loss": 0.37673383951187134, "eval_runtime": 2.2475, "eval_samples_per_second": 1013.999, "eval_steps_per_second": 4.004, "step": 59670 }, { "epoch": 1170.2, "learning_rate": 2.528273076229187e-07, "loss": 0.3696, "step": 59680 }, { "epoch": 1170.39, "learning_rate": 2.495134152534772e-07, "loss": 0.3731, "step": 59690 }, { "epoch": 1170.59, "learning_rate": 2.4622134835901307e-07, "loss": 0.3657, "step": 59700 }, { "epoch": 1170.78, "learning_rate": 2.429511079007573e-07, "loss": 0.3659, "step": 59710 }, { "epoch": 1170.98, "learning_rate": 2.397026948335212e-07, "loss": 0.3729, "step": 59720 }, { "epoch": 1171.0, "eval_loss": 0.37225037813186646, "eval_runtime": 2.3919, "eval_samples_per_second": 952.798, "eval_steps_per_second": 3.763, "step": 59721 }, { "epoch": 1171.18, "learning_rate": 2.364761101057877e-07, "loss": 0.3698, "step": 59730 }, { "epoch": 1171.37, "learning_rate": 2.3327135465964487e-07, "loss": 0.3671, "step": 59740 }, { "epoch": 1171.57, "learning_rate": 2.3008842943080253e-07, "loss": 0.3752, "step": 59750 }, { "epoch": 1171.76, "learning_rate": 2.2692733534860896e-07, "loss": 0.371, "step": 59760 }, { "epoch": 1171.96, "learning_rate": 2.2378807333603421e-07, "loss": 0.3701, "step": 59770 }, { "epoch": 1172.0, "eval_loss": 0.3768444359302521, "eval_runtime": 2.2603, "eval_samples_per_second": 1008.295, "eval_steps_per_second": 3.982, "step": 59772 }, { "epoch": 1172.16, "learning_rate": 2.2067064430967007e-07, "loss": 0.3688, "step": 59780 }, { "epoch": 1172.35, "learning_rate": 2.1757504917973013e-07, "loss": 0.3652, "step": 59790 }, { "epoch": 1172.55, "learning_rate": 2.1450128885007468e-07, "loss": 0.3655, "step": 59800 }, { "epoch": 1172.75, "learning_rate": 2.1144936421816083e-07, "loss": 0.3707, "step": 59810 }, { "epoch": 1172.94, "learning_rate": 2.0841927617508415e-07, "loss": 0.3717, "step": 59820 }, { "epoch": 1173.0, "eval_loss": 0.37819233536720276, "eval_runtime": 2.391, "eval_samples_per_second": 953.142, "eval_steps_per_second": 3.764, "step": 59823 }, { "epoch": 1173.14, "learning_rate": 2.0541102560556188e-07, "loss": 0.3676, "step": 59830 }, { "epoch": 1173.33, "learning_rate": 2.0242461338794979e-07, "loss": 0.3685, "step": 59840 }, { "epoch": 1173.53, "learning_rate": 1.9946004039420037e-07, "loss": 0.3697, "step": 59850 }, { "epoch": 1173.73, "learning_rate": 1.9651730748989624e-07, "loss": 0.37, "step": 59860 }, { "epoch": 1173.92, "learning_rate": 1.9359641553426675e-07, "loss": 0.3716, "step": 59870 }, { "epoch": 1174.0, "eval_loss": 0.37209558486938477, "eval_runtime": 2.2294, "eval_samples_per_second": 1022.25, "eval_steps_per_second": 4.037, "step": 59874 }, { "epoch": 1174.12, "learning_rate": 1.906973653801297e-07, "loss": 0.3686, "step": 59880 }, { "epoch": 1174.31, "learning_rate": 1.878201578739663e-07, "loss": 0.368, "step": 59890 }, { "epoch": 1174.51, "learning_rate": 1.8496479385583797e-07, "loss": 0.3702, "step": 59900 }, { "epoch": 1174.71, "learning_rate": 1.821312741594444e-07, "loss": 0.3686, "step": 59910 }, { "epoch": 1174.9, "learning_rate": 1.7931959961213215e-07, "loss": 0.3723, "step": 59920 }, { "epoch": 1175.0, "eval_loss": 0.3712206780910492, "eval_runtime": 2.2517, "eval_samples_per_second": 1012.146, "eval_steps_per_second": 3.997, "step": 59925 }, { "epoch": 1175.1, "learning_rate": 1.765297710348196e-07, "loss": 0.3677, "step": 59930 }, { "epoch": 1175.29, "learning_rate": 1.7376178924209673e-07, "loss": 0.3676, "step": 59940 }, { "epoch": 1175.49, "learning_rate": 1.710156550421421e-07, "loss": 0.3669, "step": 59950 }, { "epoch": 1175.69, "learning_rate": 1.6829136923677268e-07, "loss": 0.3649, "step": 59960 }, { "epoch": 1175.88, "learning_rate": 1.6558893262141048e-07, "loss": 0.3674, "step": 59970 }, { "epoch": 1176.0, "eval_loss": 0.37456443905830383, "eval_runtime": 2.291, "eval_samples_per_second": 994.763, "eval_steps_per_second": 3.928, "step": 59976 }, { "epoch": 1176.08, "learning_rate": 1.629083459851077e-07, "loss": 0.3682, "step": 59980 }, { "epoch": 1176.27, "learning_rate": 1.602496101105466e-07, "loss": 0.3701, "step": 59990 }, { "epoch": 1176.47, "learning_rate": 1.576127257740062e-07, "loss": 0.3719, "step": 60000 }, { "epoch": 1176.67, "learning_rate": 1.5499769374540394e-07, "loss": 0.3672, "step": 60010 }, { "epoch": 1176.86, "learning_rate": 1.5240451478826244e-07, "loss": 0.365, "step": 60020 }, { "epoch": 1177.0, "eval_loss": 0.37678277492523193, "eval_runtime": 2.2264, "eval_samples_per_second": 1023.605, "eval_steps_per_second": 4.042, "step": 60027 }, { "epoch": 1177.06, "learning_rate": 1.4983318965974267e-07, "loss": 0.3681, "step": 60030 }, { "epoch": 1177.25, "learning_rate": 1.4728371911061909e-07, "loss": 0.3704, "step": 60040 }, { "epoch": 1177.45, "learning_rate": 1.4475610388526294e-07, "loss": 0.3682, "step": 60050 }, { "epoch": 1177.65, "learning_rate": 1.4225034472169216e-07, "loss": 0.3694, "step": 60060 }, { "epoch": 1177.84, "learning_rate": 1.3976644235153823e-07, "loss": 0.3725, "step": 60070 }, { "epoch": 1178.0, "eval_loss": 0.3759779632091522, "eval_runtime": 2.316, "eval_samples_per_second": 984.038, "eval_steps_per_second": 3.886, "step": 60078 }, { "epoch": 1178.04, "learning_rate": 1.373043975000293e-07, "loss": 0.3739, "step": 60080 }, { "epoch": 1178.24, "learning_rate": 1.3486421088604038e-07, "loss": 0.3693, "step": 60090 }, { "epoch": 1178.43, "learning_rate": 1.324458832220432e-07, "loss": 0.3683, "step": 60100 }, { "epoch": 1178.63, "learning_rate": 1.300494152141396e-07, "loss": 0.3689, "step": 60110 }, { "epoch": 1178.82, "learning_rate": 1.2767480756205318e-07, "loss": 0.3679, "step": 60120 }, { "epoch": 1179.0, "eval_loss": 0.37421050667762756, "eval_runtime": 2.2922, "eval_samples_per_second": 994.254, "eval_steps_per_second": 3.926, "step": 60129 }, { "epoch": 1179.02, "learning_rate": 1.2532206095909604e-07, "loss": 0.3686, "step": 60130 }, { "epoch": 1179.22, "learning_rate": 1.229911760922353e-07, "loss": 0.3644, "step": 60140 }, { "epoch": 1179.41, "learning_rate": 1.2068215364203493e-07, "loss": 0.3666, "step": 60150 }, { "epoch": 1179.61, "learning_rate": 1.183949942826723e-07, "loss": 0.3658, "step": 60160 }, { "epoch": 1179.8, "learning_rate": 1.1612969868195488e-07, "loss": 0.3681, "step": 60170 }, { "epoch": 1180.0, "learning_rate": 1.1388626750128693e-07, "loss": 0.3707, "step": 60180 }, { "epoch": 1180.0, "eval_loss": 0.37527692317962646, "eval_runtime": 2.3791, "eval_samples_per_second": 957.916, "eval_steps_per_second": 3.783, "step": 60180 }, { "epoch": 1180.2, "learning_rate": 1.1166470139570282e-07, "loss": 0.3689, "step": 60190 }, { "epoch": 1180.39, "learning_rate": 1.0946500101385869e-07, "loss": 0.3663, "step": 60200 }, { "epoch": 1180.59, "learning_rate": 1.0728716699801576e-07, "loss": 0.366, "step": 60210 }, { "epoch": 1180.78, "learning_rate": 1.0513119998404873e-07, "loss": 0.3691, "step": 60220 }, { "epoch": 1180.98, "learning_rate": 1.0299710060144572e-07, "loss": 0.3698, "step": 60230 }, { "epoch": 1181.0, "eval_loss": 0.373009592294693, "eval_runtime": 2.3903, "eval_samples_per_second": 953.454, "eval_steps_per_second": 3.765, "step": 60231 }, { "epoch": 1181.18, "learning_rate": 1.008848694733333e-07, "loss": 0.3702, "step": 60240 }, { "epoch": 1181.37, "learning_rate": 9.879450721642645e-08, "loss": 0.3721, "step": 60250 }, { "epoch": 1181.57, "learning_rate": 9.672601444106198e-08, "loss": 0.3669, "step": 60260 }, { "epoch": 1181.76, "learning_rate": 9.467939175119843e-08, "loss": 0.3679, "step": 60270 }, { "epoch": 1181.96, "learning_rate": 9.265463974439947e-08, "loss": 0.3697, "step": 60280 }, { "epoch": 1182.0, "eval_loss": 0.3748382329940796, "eval_runtime": 2.359, "eval_samples_per_second": 966.069, "eval_steps_per_second": 3.815, "step": 60282 }, { "epoch": 1182.16, "learning_rate": 9.065175901185052e-08, "loss": 0.3702, "step": 60290 }, { "epoch": 1182.35, "learning_rate": 8.867075013834213e-08, "loss": 0.3686, "step": 60300 }, { "epoch": 1182.55, "learning_rate": 8.671161370229496e-08, "loss": 0.3706, "step": 60310 }, { "epoch": 1182.75, "learning_rate": 8.477435027572643e-08, "loss": 0.373, "step": 60320 }, { "epoch": 1182.94, "learning_rate": 8.285896042427576e-08, "loss": 0.368, "step": 60330 }, { "epoch": 1183.0, "eval_loss": 0.37221765518188477, "eval_runtime": 2.3579, "eval_samples_per_second": 966.524, "eval_steps_per_second": 3.817, "step": 60333 }, { "epoch": 1183.14, "learning_rate": 8.096544470719557e-08, "loss": 0.37, "step": 60340 }, { "epoch": 1183.33, "learning_rate": 7.909380367735197e-08, "loss": 0.3684, "step": 60350 }, { "epoch": 1183.53, "learning_rate": 7.724403788121614e-08, "loss": 0.3696, "step": 60360 }, { "epoch": 1183.73, "learning_rate": 7.541614785888105e-08, "loss": 0.3703, "step": 60370 }, { "epoch": 1183.92, "learning_rate": 7.361013414405315e-08, "loss": 0.3689, "step": 60380 }, { "epoch": 1184.0, "eval_loss": 0.3724251091480255, "eval_runtime": 2.2633, "eval_samples_per_second": 1006.959, "eval_steps_per_second": 3.977, "step": 60384 }, { "epoch": 1184.12, "learning_rate": 7.182599726404393e-08, "loss": 0.3701, "step": 60390 }, { "epoch": 1184.31, "learning_rate": 7.006373773977836e-08, "loss": 0.3723, "step": 60400 }, { "epoch": 1184.51, "learning_rate": 6.832335608581152e-08, "loss": 0.3658, "step": 60410 }, { "epoch": 1184.71, "learning_rate": 6.660485281027861e-08, "loss": 0.3747, "step": 60420 }, { "epoch": 1184.9, "learning_rate": 6.490822841495324e-08, "loss": 0.3667, "step": 60430 }, { "epoch": 1185.0, "eval_loss": 0.3731216490268707, "eval_runtime": 2.4824, "eval_samples_per_second": 918.05, "eval_steps_per_second": 3.625, "step": 60435 }, { "epoch": 1185.1, "learning_rate": 6.323348339521416e-08, "loss": 0.366, "step": 60440 }, { "epoch": 1185.29, "learning_rate": 6.15806182400369e-08, "loss": 0.3657, "step": 60450 }, { "epoch": 1185.49, "learning_rate": 5.99496334320354e-08, "loss": 0.3673, "step": 60460 }, { "epoch": 1185.69, "learning_rate": 5.8340529447420403e-08, "loss": 0.3743, "step": 60470 }, { "epoch": 1185.88, "learning_rate": 5.675330675600775e-08, "loss": 0.3708, "step": 60480 }, { "epoch": 1186.0, "eval_loss": 0.3785109519958496, "eval_runtime": 2.3042, "eval_samples_per_second": 989.051, "eval_steps_per_second": 3.906, "step": 60486 }, { "epoch": 1186.08, "learning_rate": 5.5187965821226755e-08, "loss": 0.3672, "step": 60490 }, { "epoch": 1186.27, "learning_rate": 5.3644507100128466e-08, "loss": 0.3723, "step": 60500 }, { "epoch": 1186.47, "learning_rate": 5.212293104337739e-08, "loss": 0.3683, "step": 60510 }, { "epoch": 1186.67, "learning_rate": 5.062323809522651e-08, "loss": 0.3655, "step": 60520 }, { "epoch": 1186.86, "learning_rate": 4.9145428693550536e-08, "loss": 0.3684, "step": 60530 }, { "epoch": 1187.0, "eval_loss": 0.3754968047142029, "eval_runtime": 2.2349, "eval_samples_per_second": 1019.716, "eval_steps_per_second": 4.027, "step": 60537 }, { "epoch": 1187.06, "learning_rate": 4.7689503269846e-08, "loss": 0.3687, "step": 60540 }, { "epoch": 1187.25, "learning_rate": 4.6255462249214505e-08, "loss": 0.3706, "step": 60550 }, { "epoch": 1187.45, "learning_rate": 4.484330605034614e-08, "loss": 0.3703, "step": 60560 }, { "epoch": 1187.65, "learning_rate": 4.345303508557774e-08, "loss": 0.3698, "step": 60570 }, { "epoch": 1187.84, "learning_rate": 4.208464976082626e-08, "loss": 0.3701, "step": 60580 }, { "epoch": 1188.0, "eval_loss": 0.3774392902851105, "eval_runtime": 2.3768, "eval_samples_per_second": 958.862, "eval_steps_per_second": 3.787, "step": 60588 }, { "epoch": 1188.04, "learning_rate": 4.073815047563878e-08, "loss": 0.3723, "step": 60590 }, { "epoch": 1188.24, "learning_rate": 3.94135376231508e-08, "loss": 0.3624, "step": 60600 }, { "epoch": 1188.43, "learning_rate": 3.811081159013629e-08, "loss": 0.3694, "step": 60610 }, { "epoch": 1188.63, "learning_rate": 3.6829972756941e-08, "loss": 0.3766, "step": 60620 }, { "epoch": 1188.82, "learning_rate": 3.5571021497557415e-08, "loss": 0.3685, "step": 60630 }, { "epoch": 1189.0, "eval_loss": 0.3732983469963074, "eval_runtime": 2.243, "eval_samples_per_second": 1016.052, "eval_steps_per_second": 4.012, "step": 60639 }, { "epoch": 1189.02, "learning_rate": 3.4333958179566526e-08, "loss": 0.3683, "step": 60640 }, { "epoch": 1189.22, "learning_rate": 3.311878316416272e-08, "loss": 0.3726, "step": 60650 }, { "epoch": 1189.41, "learning_rate": 3.192549680615386e-08, "loss": 0.3681, "step": 60660 }, { "epoch": 1189.61, "learning_rate": 3.075409945394458e-08, "loss": 0.3716, "step": 60670 }, { "epoch": 1189.8, "learning_rate": 2.9604591449569614e-08, "loss": 0.3659, "step": 60680 }, { "epoch": 1190.0, "learning_rate": 2.8476973128643832e-08, "loss": 0.37, "step": 60690 }, { "epoch": 1190.0, "eval_loss": 0.3773120641708374, "eval_runtime": 2.3368, "eval_samples_per_second": 975.255, "eval_steps_per_second": 3.851, "step": 60690 }, { "epoch": 1190.2, "learning_rate": 2.7371244820420524e-08, "loss": 0.3676, "step": 60700 }, { "epoch": 1190.39, "learning_rate": 2.6287406847733115e-08, "loss": 0.3668, "step": 60710 }, { "epoch": 1190.59, "learning_rate": 2.522545952705346e-08, "loss": 0.3667, "step": 60720 }, { "epoch": 1190.78, "learning_rate": 2.4185403168441863e-08, "loss": 0.372, "step": 60730 }, { "epoch": 1190.98, "learning_rate": 2.3167238075563754e-08, "loss": 0.372, "step": 60740 }, { "epoch": 1191.0, "eval_loss": 0.37607088685035706, "eval_runtime": 2.2766, "eval_samples_per_second": 1001.046, "eval_steps_per_second": 3.953, "step": 60741 }, { "epoch": 1191.18, "learning_rate": 2.2170964545714653e-08, "loss": 0.3716, "step": 60750 }, { "epoch": 1191.37, "learning_rate": 2.1196582869770217e-08, "loss": 0.3714, "step": 60760 }, { "epoch": 1191.57, "learning_rate": 2.0244093332227874e-08, "loss": 0.3682, "step": 60770 }, { "epoch": 1191.76, "learning_rate": 1.9313496211206813e-08, "loss": 0.3675, "step": 60780 }, { "epoch": 1191.96, "learning_rate": 1.8404791778414697e-08, "loss": 0.3677, "step": 60790 }, { "epoch": 1192.0, "eval_loss": 0.37334564328193665, "eval_runtime": 2.2903, "eval_samples_per_second": 995.079, "eval_steps_per_second": 3.93, "step": 60792 }, { "epoch": 1192.16, "learning_rate": 1.7517980299172618e-08, "loss": 0.3719, "step": 60800 }, { "epoch": 1192.35, "learning_rate": 1.6653062032406796e-08, "loss": 0.3668, "step": 60810 }, { "epoch": 1192.55, "learning_rate": 1.5810037230648553e-08, "loss": 0.3675, "step": 60820 }, { "epoch": 1192.75, "learning_rate": 1.498890614005932e-08, "loss": 0.3713, "step": 60830 }, { "epoch": 1192.94, "learning_rate": 1.4189669000380654e-08, "loss": 0.367, "step": 60840 }, { "epoch": 1193.0, "eval_loss": 0.37703248858451843, "eval_runtime": 2.3099, "eval_samples_per_second": 986.616, "eval_steps_per_second": 3.896, "step": 60843 }, { "epoch": 1193.14, "learning_rate": 1.3412326044967559e-08, "loss": 0.3731, "step": 60850 }, { "epoch": 1193.33, "learning_rate": 1.2656877500796803e-08, "loss": 0.3671, "step": 60860 }, { "epoch": 1193.53, "learning_rate": 1.192332358843362e-08, "loss": 0.3688, "step": 60870 }, { "epoch": 1193.73, "learning_rate": 1.1211664522065012e-08, "loss": 0.3713, "step": 60880 }, { "epoch": 1193.92, "learning_rate": 1.0521900509474768e-08, "loss": 0.3641, "step": 60890 }, { "epoch": 1194.0, "eval_loss": 0.3731459081172943, "eval_runtime": 2.3405, "eval_samples_per_second": 973.728, "eval_steps_per_second": 3.845, "step": 60894 }, { "epoch": 1194.12, "learning_rate": 9.854031752068447e-09, "loss": 0.3639, "step": 60900 }, { "epoch": 1194.31, "learning_rate": 9.208058444840072e-09, "loss": 0.371, "step": 60910 }, { "epoch": 1194.51, "learning_rate": 8.583980776397104e-09, "loss": 0.3663, "step": 60920 }, { "epoch": 1194.71, "learning_rate": 7.981798928968775e-09, "loss": 0.3714, "step": 60930 }, { "epoch": 1194.9, "learning_rate": 7.401513078364452e-09, "loss": 0.3679, "step": 60940 }, { "epoch": 1195.0, "eval_loss": 0.37386608123779297, "eval_runtime": 2.2252, "eval_samples_per_second": 1024.179, "eval_steps_per_second": 4.045, "step": 60945 }, { "epoch": 1195.1, "learning_rate": 6.843123394023598e-09, "loss": 0.3725, "step": 60950 }, { "epoch": 1195.29, "learning_rate": 6.306630038982463e-09, "loss": 0.3694, "step": 60960 }, { "epoch": 1195.49, "learning_rate": 5.792033169882415e-09, "loss": 0.3669, "step": 60970 }, { "epoch": 1195.69, "learning_rate": 5.299332936969935e-09, "loss": 0.3689, "step": 60980 }, { "epoch": 1195.88, "learning_rate": 4.8285294841132745e-09, "loss": 0.3709, "step": 60990 }, { "epoch": 1196.0, "eval_loss": 0.37305885553359985, "eval_runtime": 2.3119, "eval_samples_per_second": 985.777, "eval_steps_per_second": 3.893, "step": 60996 }, { "epoch": 1196.08, "learning_rate": 4.3796229487774725e-09, "loss": 0.3687, "step": 61000 }, { "epoch": 1196.27, "learning_rate": 3.95261346201603e-09, "loss": 0.3692, "step": 61010 }, { "epoch": 1196.47, "learning_rate": 3.5475011485208703e-09, "loss": 0.3659, "step": 61020 }, { "epoch": 1196.67, "learning_rate": 3.1642861265723794e-09, "loss": 0.3662, "step": 61030 }, { "epoch": 1196.86, "learning_rate": 2.802968508064385e-09, "loss": 0.3668, "step": 61040 }, { "epoch": 1197.0, "eval_loss": 0.37835466861724854, "eval_runtime": 2.3758, "eval_samples_per_second": 959.262, "eval_steps_per_second": 3.788, "step": 61047 }, { "epoch": 1197.06, "learning_rate": 2.4635483984875025e-09, "loss": 0.3672, "step": 61050 }, { "epoch": 1197.25, "learning_rate": 2.146025896945791e-09, "loss": 0.3718, "step": 61060 }, { "epoch": 1197.45, "learning_rate": 1.8504010961484238e-09, "loss": 0.3628, "step": 61070 }, { "epoch": 1197.65, "learning_rate": 1.576674082418017e-09, "loss": 0.3723, "step": 61080 }, { "epoch": 1197.84, "learning_rate": 1.3248449356739743e-09, "loss": 0.3678, "step": 61090 }, { "epoch": 1198.0, "eval_loss": 0.37536975741386414, "eval_runtime": 2.2755, "eval_samples_per_second": 1001.548, "eval_steps_per_second": 3.955, "step": 61098 }, { "epoch": 1198.04, "learning_rate": 1.0949137294324895e-09, "loss": 0.365, "step": 61100 }, { "epoch": 1198.24, "learning_rate": 8.868805308481775e-10, "loss": 0.3705, "step": 61110 }, { "epoch": 1198.43, "learning_rate": 7.007454006474623e-10, "loss": 0.3716, "step": 61120 }, { "epoch": 1198.63, "learning_rate": 5.365083931785363e-10, "loss": 0.3701, "step": 61130 }, { "epoch": 1198.82, "learning_rate": 3.9416955640303447e-10, "loss": 0.3642, "step": 61140 }, { "epoch": 1199.0, "eval_loss": 0.3795470595359802, "eval_runtime": 2.3862, "eval_samples_per_second": 955.076, "eval_steps_per_second": 3.772, "step": 61149 }, { "epoch": 1199.02, "learning_rate": 2.7372893187938003e-10, "loss": 0.3652, "step": 61150 }, { "epoch": 1199.22, "learning_rate": 1.751865547627851e-10, "loss": 0.3687, "step": 61160 }, { "epoch": 1199.41, "learning_rate": 9.854245383855708e-11, "loss": 0.3639, "step": 61170 }, { "epoch": 1199.61, "learning_rate": 4.379665148046552e-11, "loss": 0.3695, "step": 61180 }, { "epoch": 1199.8, "learning_rate": 1.0949163667395289e-11, "loss": 0.3652, "step": 61190 }, { "epoch": 1200.0, "learning_rate": 0.0, "loss": 0.3717, "step": 61200 }, { "epoch": 1200.0, "eval_loss": 0.3766399323940277, "eval_runtime": 2.4193, "eval_samples_per_second": 942.017, "eval_steps_per_second": 3.72, "step": 61200 }, { "epoch": 1200.0, "step": 61200, "total_flos": 1.6045884532435452e+21, "train_loss": 0.47761312031667996, "train_runtime": 27118.5193, "train_samples_per_second": 571.314, "train_steps_per_second": 2.257 } ], "max_steps": 61200, "num_train_epochs": 1200, "total_flos": 1.6045884532435452e+21, "trial_name": null, "trial_params": null }