{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.088194470034762, "global_step": 85000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 2e-08, "loss": 0.3166, "step": 1 }, { "epoch": 0.0, "learning_rate": 2.0000000000000002e-07, "loss": 0.3244, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.0000000000000003e-07, "loss": 0.3269, "step": 20 }, { "epoch": 0.0, "learning_rate": 6.000000000000001e-07, "loss": 0.2996, "step": 30 }, { "epoch": 0.0, "learning_rate": 8.000000000000001e-07, "loss": 0.318, "step": 40 }, { "epoch": 0.0, "learning_rate": 1.0000000000000002e-06, "loss": 0.2972, "step": 50 }, { "epoch": 0.0, "learning_rate": 1.2000000000000002e-06, "loss": 0.2929, "step": 60 }, { "epoch": 0.0, "learning_rate": 1.4000000000000001e-06, "loss": 0.2955, "step": 70 }, { "epoch": 0.0, "learning_rate": 1.6000000000000001e-06, "loss": 0.2687, "step": 80 }, { "epoch": 0.0, "learning_rate": 1.8000000000000001e-06, "loss": 0.2826, "step": 90 }, { "epoch": 0.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.2692, "step": 100 }, { "epoch": 0.0, "learning_rate": 2.2e-06, "loss": 0.2596, "step": 110 }, { "epoch": 0.0, "learning_rate": 2.4000000000000003e-06, "loss": 0.2533, "step": 120 }, { "epoch": 0.0, "learning_rate": 2.6e-06, "loss": 0.236, "step": 130 }, { "epoch": 0.0, "learning_rate": 2.8000000000000003e-06, "loss": 0.2295, "step": 140 }, { "epoch": 0.0, "learning_rate": 3e-06, "loss": 0.2383, "step": 150 }, { "epoch": 0.0, "learning_rate": 3.2000000000000003e-06, "loss": 0.2304, "step": 160 }, { "epoch": 0.0, "learning_rate": 3.4000000000000005e-06, "loss": 0.2045, "step": 170 }, { "epoch": 0.0, "learning_rate": 3.6000000000000003e-06, "loss": 0.1952, "step": 180 }, { "epoch": 0.0, "learning_rate": 3.8000000000000005e-06, "loss": 0.1973, "step": 190 }, { "epoch": 0.0, "learning_rate": 4.000000000000001e-06, "loss": 0.1987, "step": 200 }, { "epoch": 0.01, "learning_rate": 4.2000000000000004e-06, "loss": 0.1861, "step": 210 }, { "epoch": 0.01, "learning_rate": 4.4e-06, "loss": 0.175, "step": 220 }, { "epoch": 0.01, "learning_rate": 4.600000000000001e-06, "loss": 0.1636, "step": 230 }, { "epoch": 0.01, "learning_rate": 4.800000000000001e-06, "loss": 0.1643, "step": 240 }, { "epoch": 0.01, "learning_rate": 5e-06, "loss": 0.1545, "step": 250 }, { "epoch": 0.01, "learning_rate": 5.2e-06, "loss": 0.1573, "step": 260 }, { "epoch": 0.01, "learning_rate": 5.400000000000001e-06, "loss": 0.1514, "step": 270 }, { "epoch": 0.01, "learning_rate": 5.600000000000001e-06, "loss": 0.1349, "step": 280 }, { "epoch": 0.01, "learning_rate": 5.8e-06, "loss": 0.1383, "step": 290 }, { "epoch": 0.01, "learning_rate": 6e-06, "loss": 0.1325, "step": 300 }, { "epoch": 0.01, "learning_rate": 6.200000000000001e-06, "loss": 0.1276, "step": 310 }, { "epoch": 0.01, "learning_rate": 6.4000000000000006e-06, "loss": 0.1206, "step": 320 }, { "epoch": 0.01, "learning_rate": 6.600000000000001e-06, "loss": 0.1208, "step": 330 }, { "epoch": 0.01, "learning_rate": 6.800000000000001e-06, "loss": 0.1216, "step": 340 }, { "epoch": 0.01, "learning_rate": 7e-06, "loss": 0.1193, "step": 350 }, { "epoch": 0.01, "learning_rate": 7.2000000000000005e-06, "loss": 0.1163, "step": 360 }, { "epoch": 0.01, "learning_rate": 7.4e-06, "loss": 0.1136, "step": 370 }, { "epoch": 0.01, "learning_rate": 7.600000000000001e-06, "loss": 0.1152, "step": 380 }, { "epoch": 0.01, "learning_rate": 7.800000000000002e-06, "loss": 0.1122, "step": 390 }, { "epoch": 0.01, "learning_rate": 8.000000000000001e-06, "loss": 0.1062, "step": 400 }, { "epoch": 0.01, "learning_rate": 8.2e-06, "loss": 0.1091, "step": 410 }, { "epoch": 0.01, "learning_rate": 8.400000000000001e-06, "loss": 0.1079, "step": 420 }, { "epoch": 0.01, "learning_rate": 8.6e-06, "loss": 0.1198, "step": 430 }, { "epoch": 0.01, "learning_rate": 8.8e-06, "loss": 0.0944, "step": 440 }, { "epoch": 0.01, "learning_rate": 9e-06, "loss": 0.0963, "step": 450 }, { "epoch": 0.01, "learning_rate": 9.200000000000002e-06, "loss": 0.1114, "step": 460 }, { "epoch": 0.01, "learning_rate": 9.4e-06, "loss": 0.0942, "step": 470 }, { "epoch": 0.01, "learning_rate": 9.600000000000001e-06, "loss": 0.1033, "step": 480 }, { "epoch": 0.01, "learning_rate": 9.800000000000001e-06, "loss": 0.094, "step": 490 }, { "epoch": 0.01, "learning_rate": 1e-05, "loss": 0.0976, "step": 500 }, { "epoch": 0.01, "learning_rate": 1.02e-05, "loss": 0.0971, "step": 510 }, { "epoch": 0.01, "learning_rate": 1.04e-05, "loss": 0.0855, "step": 520 }, { "epoch": 0.01, "learning_rate": 1.0600000000000002e-05, "loss": 0.0801, "step": 530 }, { "epoch": 0.01, "learning_rate": 1.0800000000000002e-05, "loss": 0.0909, "step": 540 }, { "epoch": 0.01, "learning_rate": 1.1000000000000001e-05, "loss": 0.0846, "step": 550 }, { "epoch": 0.01, "learning_rate": 1.1200000000000001e-05, "loss": 0.0858, "step": 560 }, { "epoch": 0.01, "learning_rate": 1.14e-05, "loss": 0.082, "step": 570 }, { "epoch": 0.01, "learning_rate": 1.16e-05, "loss": 0.0786, "step": 580 }, { "epoch": 0.01, "learning_rate": 1.18e-05, "loss": 0.0793, "step": 590 }, { "epoch": 0.01, "learning_rate": 1.2e-05, "loss": 0.0798, "step": 600 }, { "epoch": 0.01, "learning_rate": 1.22e-05, "loss": 0.0796, "step": 610 }, { "epoch": 0.02, "learning_rate": 1.2400000000000002e-05, "loss": 0.0777, "step": 620 }, { "epoch": 0.02, "learning_rate": 1.2600000000000001e-05, "loss": 0.0802, "step": 630 }, { "epoch": 0.02, "learning_rate": 1.2800000000000001e-05, "loss": 0.0773, "step": 640 }, { "epoch": 0.02, "learning_rate": 1.3000000000000001e-05, "loss": 0.0776, "step": 650 }, { "epoch": 0.02, "learning_rate": 1.3200000000000002e-05, "loss": 0.0802, "step": 660 }, { "epoch": 0.02, "learning_rate": 1.3400000000000002e-05, "loss": 0.0713, "step": 670 }, { "epoch": 0.02, "learning_rate": 1.3600000000000002e-05, "loss": 0.0795, "step": 680 }, { "epoch": 0.02, "learning_rate": 1.38e-05, "loss": 0.0755, "step": 690 }, { "epoch": 0.02, "learning_rate": 1.4e-05, "loss": 0.0977, "step": 700 }, { "epoch": 0.02, "learning_rate": 1.4200000000000001e-05, "loss": 0.0685, "step": 710 }, { "epoch": 0.02, "learning_rate": 1.4400000000000001e-05, "loss": 0.0728, "step": 720 }, { "epoch": 0.02, "learning_rate": 1.46e-05, "loss": 0.068, "step": 730 }, { "epoch": 0.02, "learning_rate": 1.48e-05, "loss": 0.07, "step": 740 }, { "epoch": 0.02, "learning_rate": 1.5000000000000002e-05, "loss": 0.073, "step": 750 }, { "epoch": 0.02, "learning_rate": 1.5200000000000002e-05, "loss": 0.0679, "step": 760 }, { "epoch": 0.02, "learning_rate": 1.54e-05, "loss": 0.08, "step": 770 }, { "epoch": 0.02, "learning_rate": 1.5600000000000003e-05, "loss": 0.0659, "step": 780 }, { "epoch": 0.02, "learning_rate": 1.58e-05, "loss": 0.0641, "step": 790 }, { "epoch": 0.02, "learning_rate": 1.6000000000000003e-05, "loss": 0.0613, "step": 800 }, { "epoch": 0.02, "learning_rate": 1.62e-05, "loss": 0.0656, "step": 810 }, { "epoch": 0.02, "learning_rate": 1.64e-05, "loss": 0.0638, "step": 820 }, { "epoch": 0.02, "learning_rate": 1.66e-05, "loss": 0.0593, "step": 830 }, { "epoch": 0.02, "learning_rate": 1.6800000000000002e-05, "loss": 0.0733, "step": 840 }, { "epoch": 0.02, "learning_rate": 1.7e-05, "loss": 0.075, "step": 850 }, { "epoch": 0.02, "learning_rate": 1.72e-05, "loss": 0.0641, "step": 860 }, { "epoch": 0.02, "learning_rate": 1.7400000000000003e-05, "loss": 0.0698, "step": 870 }, { "epoch": 0.02, "learning_rate": 1.76e-05, "loss": 0.0624, "step": 880 }, { "epoch": 0.02, "learning_rate": 1.7800000000000002e-05, "loss": 0.0729, "step": 890 }, { "epoch": 0.02, "learning_rate": 1.8e-05, "loss": 0.0587, "step": 900 }, { "epoch": 0.02, "learning_rate": 1.8200000000000002e-05, "loss": 0.0678, "step": 910 }, { "epoch": 0.02, "learning_rate": 1.8400000000000003e-05, "loss": 0.069, "step": 920 }, { "epoch": 0.02, "learning_rate": 1.86e-05, "loss": 0.0587, "step": 930 }, { "epoch": 0.02, "learning_rate": 1.88e-05, "loss": 0.0555, "step": 940 }, { "epoch": 0.02, "learning_rate": 1.9e-05, "loss": 0.0683, "step": 950 }, { "epoch": 0.02, "learning_rate": 1.9200000000000003e-05, "loss": 0.0518, "step": 960 }, { "epoch": 0.02, "learning_rate": 1.94e-05, "loss": 0.0628, "step": 970 }, { "epoch": 0.02, "learning_rate": 1.9600000000000002e-05, "loss": 0.0573, "step": 980 }, { "epoch": 0.02, "learning_rate": 1.98e-05, "loss": 0.057, "step": 990 }, { "epoch": 0.02, "learning_rate": 2e-05, "loss": 0.0533, "step": 1000 }, { "epoch": 0.02, "learning_rate": 1.999950744982145e-05, "loss": 0.059, "step": 1010 }, { "epoch": 0.03, "learning_rate": 1.9999014899642903e-05, "loss": 0.0515, "step": 1020 }, { "epoch": 0.03, "learning_rate": 1.9998522349464352e-05, "loss": 0.0547, "step": 1030 }, { "epoch": 0.03, "learning_rate": 1.99980297992858e-05, "loss": 0.053, "step": 1040 }, { "epoch": 0.03, "learning_rate": 1.9997537249107254e-05, "loss": 0.0762, "step": 1050 }, { "epoch": 0.03, "learning_rate": 1.9997044698928703e-05, "loss": 0.0677, "step": 1060 }, { "epoch": 0.03, "learning_rate": 1.9996552148750156e-05, "loss": 0.0558, "step": 1070 }, { "epoch": 0.03, "learning_rate": 1.9996059598571605e-05, "loss": 0.0534, "step": 1080 }, { "epoch": 0.03, "learning_rate": 1.9995567048393058e-05, "loss": 0.0481, "step": 1090 }, { "epoch": 0.03, "learning_rate": 1.9995074498214507e-05, "loss": 0.0523, "step": 1100 }, { "epoch": 0.03, "learning_rate": 1.999458194803596e-05, "loss": 0.0473, "step": 1110 }, { "epoch": 0.03, "learning_rate": 1.999408939785741e-05, "loss": 0.0518, "step": 1120 }, { "epoch": 0.03, "learning_rate": 1.999359684767886e-05, "loss": 0.0528, "step": 1130 }, { "epoch": 0.03, "learning_rate": 1.999310429750031e-05, "loss": 0.0505, "step": 1140 }, { "epoch": 0.03, "learning_rate": 1.999261174732176e-05, "loss": 0.0459, "step": 1150 }, { "epoch": 0.03, "learning_rate": 1.9992119197143212e-05, "loss": 0.0458, "step": 1160 }, { "epoch": 0.03, "learning_rate": 1.999162664696466e-05, "loss": 0.0545, "step": 1170 }, { "epoch": 0.03, "learning_rate": 1.999113409678611e-05, "loss": 0.0409, "step": 1180 }, { "epoch": 0.03, "learning_rate": 1.9990641546607563e-05, "loss": 0.0602, "step": 1190 }, { "epoch": 0.03, "learning_rate": 1.9990148996429012e-05, "loss": 0.0699, "step": 1200 }, { "epoch": 0.03, "learning_rate": 1.9989656446250464e-05, "loss": 0.0464, "step": 1210 }, { "epoch": 0.03, "learning_rate": 1.9989163896071913e-05, "loss": 0.0469, "step": 1220 }, { "epoch": 0.03, "learning_rate": 1.9988671345893366e-05, "loss": 0.0508, "step": 1230 }, { "epoch": 0.03, "learning_rate": 1.9988178795714815e-05, "loss": 0.0423, "step": 1240 }, { "epoch": 0.03, "learning_rate": 1.9987686245536268e-05, "loss": 0.0468, "step": 1250 }, { "epoch": 0.03, "learning_rate": 1.9987193695357717e-05, "loss": 0.0471, "step": 1260 }, { "epoch": 0.03, "learning_rate": 1.9986701145179166e-05, "loss": 0.0477, "step": 1270 }, { "epoch": 0.03, "learning_rate": 1.998620859500062e-05, "loss": 0.0423, "step": 1280 }, { "epoch": 0.03, "learning_rate": 1.9985716044822068e-05, "loss": 0.0446, "step": 1290 }, { "epoch": 0.03, "learning_rate": 1.9985223494643517e-05, "loss": 0.0386, "step": 1300 }, { "epoch": 0.03, "learning_rate": 1.998473094446497e-05, "loss": 0.0446, "step": 1310 }, { "epoch": 0.03, "learning_rate": 1.998423839428642e-05, "loss": 0.0454, "step": 1320 }, { "epoch": 0.03, "learning_rate": 1.9983745844107868e-05, "loss": 0.0443, "step": 1330 }, { "epoch": 0.03, "learning_rate": 1.998325329392932e-05, "loss": 0.0521, "step": 1340 }, { "epoch": 0.03, "learning_rate": 1.998276074375077e-05, "loss": 0.0417, "step": 1350 }, { "epoch": 0.03, "learning_rate": 1.9982268193572222e-05, "loss": 0.046, "step": 1360 }, { "epoch": 0.03, "learning_rate": 1.9981775643393674e-05, "loss": 0.0415, "step": 1370 }, { "epoch": 0.03, "learning_rate": 1.9981283093215123e-05, "loss": 0.0496, "step": 1380 }, { "epoch": 0.03, "learning_rate": 1.9980790543036576e-05, "loss": 0.063, "step": 1390 }, { "epoch": 0.03, "learning_rate": 1.9980297992858025e-05, "loss": 0.0699, "step": 1400 }, { "epoch": 0.03, "learning_rate": 1.9979805442679474e-05, "loss": 0.0407, "step": 1410 }, { "epoch": 0.03, "learning_rate": 1.9979312892500927e-05, "loss": 0.044, "step": 1420 }, { "epoch": 0.04, "learning_rate": 1.9978820342322376e-05, "loss": 0.0418, "step": 1430 }, { "epoch": 0.04, "learning_rate": 1.9978327792143825e-05, "loss": 0.0397, "step": 1440 }, { "epoch": 0.04, "learning_rate": 1.9977835241965278e-05, "loss": 0.0555, "step": 1450 }, { "epoch": 0.04, "learning_rate": 1.9977342691786727e-05, "loss": 0.0544, "step": 1460 }, { "epoch": 0.04, "learning_rate": 1.9976850141608176e-05, "loss": 0.0574, "step": 1470 }, { "epoch": 0.04, "learning_rate": 1.997635759142963e-05, "loss": 0.0578, "step": 1480 }, { "epoch": 0.04, "learning_rate": 1.9975865041251078e-05, "loss": 0.0394, "step": 1490 }, { "epoch": 0.04, "learning_rate": 1.997537249107253e-05, "loss": 0.0399, "step": 1500 }, { "epoch": 0.04, "learning_rate": 1.997487994089398e-05, "loss": 0.0404, "step": 1510 }, { "epoch": 0.04, "learning_rate": 1.9974387390715432e-05, "loss": 0.0385, "step": 1520 }, { "epoch": 0.04, "learning_rate": 1.997389484053688e-05, "loss": 0.0377, "step": 1530 }, { "epoch": 0.04, "learning_rate": 1.9973402290358334e-05, "loss": 0.0463, "step": 1540 }, { "epoch": 0.04, "learning_rate": 1.9972909740179783e-05, "loss": 0.0423, "step": 1550 }, { "epoch": 0.04, "learning_rate": 1.9972417190001232e-05, "loss": 0.0454, "step": 1560 }, { "epoch": 0.04, "learning_rate": 1.9971924639822684e-05, "loss": 0.0395, "step": 1570 }, { "epoch": 0.04, "learning_rate": 1.9971432089644134e-05, "loss": 0.0398, "step": 1580 }, { "epoch": 0.04, "learning_rate": 1.9970939539465583e-05, "loss": 0.0395, "step": 1590 }, { "epoch": 0.04, "learning_rate": 1.9970446989287035e-05, "loss": 0.0465, "step": 1600 }, { "epoch": 0.04, "learning_rate": 1.9969954439108484e-05, "loss": 0.0473, "step": 1610 }, { "epoch": 0.04, "learning_rate": 1.9969461888929937e-05, "loss": 0.0429, "step": 1620 }, { "epoch": 0.04, "learning_rate": 1.9968969338751386e-05, "loss": 0.0388, "step": 1630 }, { "epoch": 0.04, "learning_rate": 1.996847678857284e-05, "loss": 0.0381, "step": 1640 }, { "epoch": 0.04, "learning_rate": 1.9967984238394288e-05, "loss": 0.0522, "step": 1650 }, { "epoch": 0.04, "learning_rate": 1.996749168821574e-05, "loss": 0.0494, "step": 1660 }, { "epoch": 0.04, "learning_rate": 1.996699913803719e-05, "loss": 0.0358, "step": 1670 }, { "epoch": 0.04, "learning_rate": 1.9966506587858642e-05, "loss": 0.0444, "step": 1680 }, { "epoch": 0.04, "learning_rate": 1.996601403768009e-05, "loss": 0.0359, "step": 1690 }, { "epoch": 0.04, "learning_rate": 1.996552148750154e-05, "loss": 0.0437, "step": 1700 }, { "epoch": 0.04, "learning_rate": 1.9965028937322993e-05, "loss": 0.0372, "step": 1710 }, { "epoch": 0.04, "learning_rate": 1.9964536387144442e-05, "loss": 0.041, "step": 1720 }, { "epoch": 0.04, "learning_rate": 1.996404383696589e-05, "loss": 0.053, "step": 1730 }, { "epoch": 0.04, "learning_rate": 1.9963551286787344e-05, "loss": 0.0376, "step": 1740 }, { "epoch": 0.04, "learning_rate": 1.9963058736608793e-05, "loss": 0.0376, "step": 1750 }, { "epoch": 0.04, "learning_rate": 1.9962566186430242e-05, "loss": 0.038, "step": 1760 }, { "epoch": 0.04, "learning_rate": 1.9962073636251694e-05, "loss": 0.0362, "step": 1770 }, { "epoch": 0.04, "learning_rate": 1.9961581086073144e-05, "loss": 0.0403, "step": 1780 }, { "epoch": 0.04, "learning_rate": 1.9961088535894596e-05, "loss": 0.0374, "step": 1790 }, { "epoch": 0.04, "learning_rate": 1.9960595985716045e-05, "loss": 0.0373, "step": 1800 }, { "epoch": 0.04, "learning_rate": 1.9960103435537498e-05, "loss": 0.039, "step": 1810 }, { "epoch": 0.04, "learning_rate": 1.9959610885358947e-05, "loss": 0.0445, "step": 1820 }, { "epoch": 0.04, "learning_rate": 1.99591183351804e-05, "loss": 0.0345, "step": 1830 }, { "epoch": 0.05, "learning_rate": 1.995862578500185e-05, "loss": 0.0388, "step": 1840 }, { "epoch": 0.05, "learning_rate": 1.99581332348233e-05, "loss": 0.0349, "step": 1850 }, { "epoch": 0.05, "learning_rate": 1.995764068464475e-05, "loss": 0.0414, "step": 1860 }, { "epoch": 0.05, "learning_rate": 1.99571481344662e-05, "loss": 0.0421, "step": 1870 }, { "epoch": 0.05, "learning_rate": 1.9956655584287652e-05, "loss": 0.0421, "step": 1880 }, { "epoch": 0.05, "learning_rate": 1.99561630341091e-05, "loss": 0.0516, "step": 1890 }, { "epoch": 0.05, "learning_rate": 1.995567048393055e-05, "loss": 0.0387, "step": 1900 }, { "epoch": 0.05, "learning_rate": 1.9955177933752003e-05, "loss": 0.0363, "step": 1910 }, { "epoch": 0.05, "learning_rate": 1.9954685383573452e-05, "loss": 0.0383, "step": 1920 }, { "epoch": 0.05, "learning_rate": 1.9954192833394905e-05, "loss": 0.0366, "step": 1930 }, { "epoch": 0.05, "learning_rate": 1.9953700283216354e-05, "loss": 0.0354, "step": 1940 }, { "epoch": 0.05, "learning_rate": 1.9953207733037806e-05, "loss": 0.0353, "step": 1950 }, { "epoch": 0.05, "learning_rate": 1.9952715182859255e-05, "loss": 0.0496, "step": 1960 }, { "epoch": 0.05, "learning_rate": 1.9952222632680708e-05, "loss": 0.0369, "step": 1970 }, { "epoch": 0.05, "learning_rate": 1.9951730082502157e-05, "loss": 0.0449, "step": 1980 }, { "epoch": 0.05, "learning_rate": 1.9951237532323606e-05, "loss": 0.0378, "step": 1990 }, { "epoch": 0.05, "learning_rate": 1.995074498214506e-05, "loss": 0.0397, "step": 2000 }, { "epoch": 0.05, "learning_rate": 1.9950252431966508e-05, "loss": 0.0301, "step": 2010 }, { "epoch": 0.05, "learning_rate": 1.9949759881787957e-05, "loss": 0.0332, "step": 2020 }, { "epoch": 0.05, "learning_rate": 1.994926733160941e-05, "loss": 0.0363, "step": 2030 }, { "epoch": 0.05, "learning_rate": 1.994877478143086e-05, "loss": 0.0474, "step": 2040 }, { "epoch": 0.05, "learning_rate": 1.9948282231252308e-05, "loss": 0.0316, "step": 2050 }, { "epoch": 0.05, "learning_rate": 1.994778968107376e-05, "loss": 0.0336, "step": 2060 }, { "epoch": 0.05, "learning_rate": 1.994729713089521e-05, "loss": 0.0326, "step": 2070 }, { "epoch": 0.05, "learning_rate": 1.9946804580716662e-05, "loss": 0.0322, "step": 2080 }, { "epoch": 0.05, "learning_rate": 1.9946312030538115e-05, "loss": 0.0504, "step": 2090 }, { "epoch": 0.05, "learning_rate": 1.9945819480359564e-05, "loss": 0.0367, "step": 2100 }, { "epoch": 0.05, "learning_rate": 1.9945326930181016e-05, "loss": 0.0323, "step": 2110 }, { "epoch": 0.05, "learning_rate": 1.9944834380002466e-05, "loss": 0.0323, "step": 2120 }, { "epoch": 0.05, "learning_rate": 1.9944341829823915e-05, "loss": 0.048, "step": 2130 }, { "epoch": 0.05, "learning_rate": 1.9943849279645367e-05, "loss": 0.0383, "step": 2140 }, { "epoch": 0.05, "learning_rate": 1.9943356729466816e-05, "loss": 0.0345, "step": 2150 }, { "epoch": 0.05, "learning_rate": 1.9942864179288266e-05, "loss": 0.0468, "step": 2160 }, { "epoch": 0.05, "learning_rate": 1.9942371629109718e-05, "loss": 0.0396, "step": 2170 }, { "epoch": 0.05, "learning_rate": 1.9941879078931167e-05, "loss": 0.0338, "step": 2180 }, { "epoch": 0.05, "learning_rate": 1.9941386528752616e-05, "loss": 0.0359, "step": 2190 }, { "epoch": 0.05, "learning_rate": 1.994089397857407e-05, "loss": 0.0364, "step": 2200 }, { "epoch": 0.05, "learning_rate": 1.9940401428395518e-05, "loss": 0.0331, "step": 2210 }, { "epoch": 0.05, "learning_rate": 1.993990887821697e-05, "loss": 0.0314, "step": 2220 }, { "epoch": 0.05, "learning_rate": 1.993941632803842e-05, "loss": 0.0397, "step": 2230 }, { "epoch": 0.06, "learning_rate": 1.9938923777859872e-05, "loss": 0.0351, "step": 2240 }, { "epoch": 0.06, "learning_rate": 1.993843122768132e-05, "loss": 0.0333, "step": 2250 }, { "epoch": 0.06, "learning_rate": 1.9937938677502774e-05, "loss": 0.0429, "step": 2260 }, { "epoch": 0.06, "learning_rate": 1.9937446127324223e-05, "loss": 0.0357, "step": 2270 }, { "epoch": 0.06, "learning_rate": 1.9936953577145672e-05, "loss": 0.0339, "step": 2280 }, { "epoch": 0.06, "learning_rate": 1.9936461026967125e-05, "loss": 0.0447, "step": 2290 }, { "epoch": 0.06, "learning_rate": 1.9935968476788574e-05, "loss": 0.0369, "step": 2300 }, { "epoch": 0.06, "learning_rate": 1.9935475926610023e-05, "loss": 0.0351, "step": 2310 }, { "epoch": 0.06, "learning_rate": 1.9934983376431476e-05, "loss": 0.0348, "step": 2320 }, { "epoch": 0.06, "learning_rate": 1.9934490826252925e-05, "loss": 0.0334, "step": 2330 }, { "epoch": 0.06, "learning_rate": 1.9933998276074377e-05, "loss": 0.0345, "step": 2340 }, { "epoch": 0.06, "learning_rate": 1.9933505725895826e-05, "loss": 0.033, "step": 2350 }, { "epoch": 0.06, "learning_rate": 1.993301317571728e-05, "loss": 0.0356, "step": 2360 }, { "epoch": 0.06, "learning_rate": 1.9932520625538728e-05, "loss": 0.0311, "step": 2370 }, { "epoch": 0.06, "learning_rate": 1.993202807536018e-05, "loss": 0.0322, "step": 2380 }, { "epoch": 0.06, "learning_rate": 1.993153552518163e-05, "loss": 0.0394, "step": 2390 }, { "epoch": 0.06, "learning_rate": 1.9931042975003082e-05, "loss": 0.033, "step": 2400 }, { "epoch": 0.06, "learning_rate": 1.993055042482453e-05, "loss": 0.0335, "step": 2410 }, { "epoch": 0.06, "learning_rate": 1.993005787464598e-05, "loss": 0.0396, "step": 2420 }, { "epoch": 0.06, "learning_rate": 1.9929565324467433e-05, "loss": 0.0322, "step": 2430 }, { "epoch": 0.06, "learning_rate": 1.9929072774288882e-05, "loss": 0.0314, "step": 2440 }, { "epoch": 0.06, "learning_rate": 1.992858022411033e-05, "loss": 0.033, "step": 2450 }, { "epoch": 0.06, "learning_rate": 1.9928087673931784e-05, "loss": 0.0343, "step": 2460 }, { "epoch": 0.06, "learning_rate": 1.9927595123753233e-05, "loss": 0.0313, "step": 2470 }, { "epoch": 0.06, "learning_rate": 1.9927102573574682e-05, "loss": 0.0359, "step": 2480 }, { "epoch": 0.06, "learning_rate": 1.9926610023396135e-05, "loss": 0.0333, "step": 2490 }, { "epoch": 0.06, "learning_rate": 1.9926117473217584e-05, "loss": 0.0347, "step": 2500 }, { "epoch": 0.06, "learning_rate": 1.9925624923039037e-05, "loss": 0.035, "step": 2510 }, { "epoch": 0.06, "learning_rate": 1.9925132372860486e-05, "loss": 0.0342, "step": 2520 }, { "epoch": 0.06, "learning_rate": 1.9924639822681938e-05, "loss": 0.0317, "step": 2530 }, { "epoch": 0.06, "learning_rate": 1.9924147272503387e-05, "loss": 0.032, "step": 2540 }, { "epoch": 0.06, "learning_rate": 1.992365472232484e-05, "loss": 0.0303, "step": 2550 }, { "epoch": 0.06, "learning_rate": 1.992316217214629e-05, "loss": 0.0353, "step": 2560 }, { "epoch": 0.06, "learning_rate": 1.992266962196774e-05, "loss": 0.0373, "step": 2570 }, { "epoch": 0.06, "learning_rate": 1.992217707178919e-05, "loss": 0.0278, "step": 2580 }, { "epoch": 0.06, "learning_rate": 1.992168452161064e-05, "loss": 0.0324, "step": 2590 }, { "epoch": 0.06, "learning_rate": 1.9921191971432092e-05, "loss": 0.0319, "step": 2600 }, { "epoch": 0.06, "learning_rate": 1.992069942125354e-05, "loss": 0.03, "step": 2610 }, { "epoch": 0.06, "learning_rate": 1.992020687107499e-05, "loss": 0.0355, "step": 2620 }, { "epoch": 0.06, "learning_rate": 1.9919714320896443e-05, "loss": 0.032, "step": 2630 }, { "epoch": 0.06, "learning_rate": 1.9919221770717892e-05, "loss": 0.0306, "step": 2640 }, { "epoch": 0.07, "learning_rate": 1.9918729220539345e-05, "loss": 0.0305, "step": 2650 }, { "epoch": 0.07, "learning_rate": 1.9918236670360794e-05, "loss": 0.0436, "step": 2660 }, { "epoch": 0.07, "learning_rate": 1.9917744120182247e-05, "loss": 0.0305, "step": 2670 }, { "epoch": 0.07, "learning_rate": 1.9917251570003696e-05, "loss": 0.0382, "step": 2680 }, { "epoch": 0.07, "learning_rate": 1.9916759019825148e-05, "loss": 0.0303, "step": 2690 }, { "epoch": 0.07, "learning_rate": 1.9916266469646597e-05, "loss": 0.0395, "step": 2700 }, { "epoch": 0.07, "learning_rate": 1.9915773919468047e-05, "loss": 0.0392, "step": 2710 }, { "epoch": 0.07, "learning_rate": 1.99152813692895e-05, "loss": 0.0281, "step": 2720 }, { "epoch": 0.07, "learning_rate": 1.9914788819110948e-05, "loss": 0.0273, "step": 2730 }, { "epoch": 0.07, "learning_rate": 1.9914296268932397e-05, "loss": 0.0414, "step": 2740 }, { "epoch": 0.07, "learning_rate": 1.991380371875385e-05, "loss": 0.0312, "step": 2750 }, { "epoch": 0.07, "learning_rate": 1.99133111685753e-05, "loss": 0.0307, "step": 2760 }, { "epoch": 0.07, "learning_rate": 1.9912818618396748e-05, "loss": 0.0311, "step": 2770 }, { "epoch": 0.07, "learning_rate": 1.99123260682182e-05, "loss": 0.0307, "step": 2780 }, { "epoch": 0.07, "learning_rate": 1.991183351803965e-05, "loss": 0.0273, "step": 2790 }, { "epoch": 0.07, "learning_rate": 1.9911340967861102e-05, "loss": 0.0318, "step": 2800 }, { "epoch": 0.07, "learning_rate": 1.9910848417682555e-05, "loss": 0.0436, "step": 2810 }, { "epoch": 0.07, "learning_rate": 1.9910355867504004e-05, "loss": 0.035, "step": 2820 }, { "epoch": 0.07, "learning_rate": 1.9909863317325457e-05, "loss": 0.0323, "step": 2830 }, { "epoch": 0.07, "learning_rate": 1.9909370767146906e-05, "loss": 0.0393, "step": 2840 }, { "epoch": 0.07, "learning_rate": 1.9908878216968355e-05, "loss": 0.0294, "step": 2850 }, { "epoch": 0.07, "learning_rate": 1.9908385666789808e-05, "loss": 0.0309, "step": 2860 }, { "epoch": 0.07, "learning_rate": 1.9907893116611257e-05, "loss": 0.0328, "step": 2870 }, { "epoch": 0.07, "learning_rate": 1.9907400566432706e-05, "loss": 0.03, "step": 2880 }, { "epoch": 0.07, "learning_rate": 1.990690801625416e-05, "loss": 0.0315, "step": 2890 }, { "epoch": 0.07, "learning_rate": 1.9906415466075608e-05, "loss": 0.0319, "step": 2900 }, { "epoch": 0.07, "learning_rate": 1.9905922915897057e-05, "loss": 0.0367, "step": 2910 }, { "epoch": 0.07, "learning_rate": 1.990543036571851e-05, "loss": 0.0299, "step": 2920 }, { "epoch": 0.07, "learning_rate": 1.990493781553996e-05, "loss": 0.0299, "step": 2930 }, { "epoch": 0.07, "learning_rate": 1.990444526536141e-05, "loss": 0.0393, "step": 2940 }, { "epoch": 0.07, "learning_rate": 1.990395271518286e-05, "loss": 0.0299, "step": 2950 }, { "epoch": 0.07, "learning_rate": 1.9903460165004313e-05, "loss": 0.0403, "step": 2960 }, { "epoch": 0.07, "learning_rate": 1.9902967614825762e-05, "loss": 0.0314, "step": 2970 }, { "epoch": 0.07, "learning_rate": 1.9902475064647214e-05, "loss": 0.0324, "step": 2980 }, { "epoch": 0.07, "learning_rate": 1.9901982514468663e-05, "loss": 0.0296, "step": 2990 }, { "epoch": 0.07, "learning_rate": 1.9901489964290113e-05, "loss": 0.0295, "step": 3000 }, { "epoch": 0.07, "learning_rate": 1.9900997414111565e-05, "loss": 0.0272, "step": 3010 }, { "epoch": 0.07, "learning_rate": 1.9900504863933014e-05, "loss": 0.0315, "step": 3020 }, { "epoch": 0.07, "learning_rate": 1.9900012313754463e-05, "loss": 0.03, "step": 3030 }, { "epoch": 0.07, "learning_rate": 1.9899519763575916e-05, "loss": 0.0434, "step": 3040 }, { "epoch": 0.07, "learning_rate": 1.9899027213397365e-05, "loss": 0.0328, "step": 3050 }, { "epoch": 0.08, "learning_rate": 1.9898534663218818e-05, "loss": 0.029, "step": 3060 }, { "epoch": 0.08, "learning_rate": 1.9898042113040267e-05, "loss": 0.0313, "step": 3070 }, { "epoch": 0.08, "learning_rate": 1.989754956286172e-05, "loss": 0.0533, "step": 3080 }, { "epoch": 0.08, "learning_rate": 1.989705701268317e-05, "loss": 0.0282, "step": 3090 }, { "epoch": 0.08, "learning_rate": 1.989656446250462e-05, "loss": 0.0315, "step": 3100 }, { "epoch": 0.08, "learning_rate": 1.989607191232607e-05, "loss": 0.0257, "step": 3110 }, { "epoch": 0.08, "learning_rate": 1.9895579362147523e-05, "loss": 0.037, "step": 3120 }, { "epoch": 0.08, "learning_rate": 1.9895086811968972e-05, "loss": 0.0385, "step": 3130 }, { "epoch": 0.08, "learning_rate": 1.989459426179042e-05, "loss": 0.0281, "step": 3140 }, { "epoch": 0.08, "learning_rate": 1.9894101711611874e-05, "loss": 0.0309, "step": 3150 }, { "epoch": 0.08, "learning_rate": 1.9893609161433323e-05, "loss": 0.0277, "step": 3160 }, { "epoch": 0.08, "learning_rate": 1.9893116611254772e-05, "loss": 0.0271, "step": 3170 }, { "epoch": 0.08, "learning_rate": 1.9892624061076224e-05, "loss": 0.0285, "step": 3180 }, { "epoch": 0.08, "learning_rate": 1.9892131510897673e-05, "loss": 0.028, "step": 3190 }, { "epoch": 0.08, "learning_rate": 1.9891638960719123e-05, "loss": 0.0283, "step": 3200 }, { "epoch": 0.08, "learning_rate": 1.9891146410540575e-05, "loss": 0.0297, "step": 3210 }, { "epoch": 0.08, "learning_rate": 1.9890653860362024e-05, "loss": 0.0269, "step": 3220 }, { "epoch": 0.08, "learning_rate": 1.9890161310183477e-05, "loss": 0.028, "step": 3230 }, { "epoch": 0.08, "learning_rate": 1.9889668760004926e-05, "loss": 0.0274, "step": 3240 }, { "epoch": 0.08, "learning_rate": 1.988917620982638e-05, "loss": 0.0385, "step": 3250 }, { "epoch": 0.08, "learning_rate": 1.9888683659647828e-05, "loss": 0.0304, "step": 3260 }, { "epoch": 0.08, "learning_rate": 1.988819110946928e-05, "loss": 0.0296, "step": 3270 }, { "epoch": 0.08, "learning_rate": 1.988769855929073e-05, "loss": 0.0266, "step": 3280 }, { "epoch": 0.08, "learning_rate": 1.9887206009112182e-05, "loss": 0.0267, "step": 3290 }, { "epoch": 0.08, "learning_rate": 1.988671345893363e-05, "loss": 0.0261, "step": 3300 }, { "epoch": 0.08, "learning_rate": 1.988622090875508e-05, "loss": 0.029, "step": 3310 }, { "epoch": 0.08, "learning_rate": 1.9885728358576533e-05, "loss": 0.0298, "step": 3320 }, { "epoch": 0.08, "learning_rate": 1.9885235808397982e-05, "loss": 0.0279, "step": 3330 }, { "epoch": 0.08, "learning_rate": 1.988474325821943e-05, "loss": 0.0389, "step": 3340 }, { "epoch": 0.08, "learning_rate": 1.9884250708040884e-05, "loss": 0.0284, "step": 3350 }, { "epoch": 0.08, "learning_rate": 1.9883758157862333e-05, "loss": 0.0283, "step": 3360 }, { "epoch": 0.08, "learning_rate": 1.9883265607683785e-05, "loss": 0.0326, "step": 3370 }, { "epoch": 0.08, "learning_rate": 1.9882773057505234e-05, "loss": 0.0282, "step": 3380 }, { "epoch": 0.08, "learning_rate": 1.9882280507326687e-05, "loss": 0.0267, "step": 3390 }, { "epoch": 0.08, "learning_rate": 1.9881787957148136e-05, "loss": 0.0393, "step": 3400 }, { "epoch": 0.08, "learning_rate": 1.988129540696959e-05, "loss": 0.0286, "step": 3410 }, { "epoch": 0.08, "learning_rate": 1.9880802856791038e-05, "loss": 0.0279, "step": 3420 }, { "epoch": 0.08, "learning_rate": 1.9880310306612487e-05, "loss": 0.0296, "step": 3430 }, { "epoch": 0.08, "learning_rate": 1.987981775643394e-05, "loss": 0.028, "step": 3440 }, { "epoch": 0.08, "learning_rate": 1.987932520625539e-05, "loss": 0.0382, "step": 3450 }, { "epoch": 0.09, "learning_rate": 1.9878832656076838e-05, "loss": 0.0374, "step": 3460 }, { "epoch": 0.09, "learning_rate": 1.987834010589829e-05, "loss": 0.0287, "step": 3470 }, { "epoch": 0.09, "learning_rate": 1.987784755571974e-05, "loss": 0.0325, "step": 3480 }, { "epoch": 0.09, "learning_rate": 1.987735500554119e-05, "loss": 0.0262, "step": 3490 }, { "epoch": 0.09, "learning_rate": 1.987686245536264e-05, "loss": 0.0309, "step": 3500 }, { "epoch": 0.09, "learning_rate": 1.987636990518409e-05, "loss": 0.0291, "step": 3510 }, { "epoch": 0.09, "learning_rate": 1.9875877355005543e-05, "loss": 0.0561, "step": 3520 }, { "epoch": 0.09, "learning_rate": 1.9875384804826995e-05, "loss": 0.0276, "step": 3530 }, { "epoch": 0.09, "learning_rate": 1.9874892254648445e-05, "loss": 0.0327, "step": 3540 }, { "epoch": 0.09, "learning_rate": 1.9874399704469897e-05, "loss": 0.0292, "step": 3550 }, { "epoch": 0.09, "learning_rate": 1.9873907154291346e-05, "loss": 0.0257, "step": 3560 }, { "epoch": 0.09, "learning_rate": 1.9873414604112795e-05, "loss": 0.0284, "step": 3570 }, { "epoch": 0.09, "learning_rate": 1.9872922053934248e-05, "loss": 0.0263, "step": 3580 }, { "epoch": 0.09, "learning_rate": 1.9872429503755697e-05, "loss": 0.0281, "step": 3590 }, { "epoch": 0.09, "learning_rate": 1.9871936953577146e-05, "loss": 0.027, "step": 3600 }, { "epoch": 0.09, "learning_rate": 1.98714444033986e-05, "loss": 0.0255, "step": 3610 }, { "epoch": 0.09, "learning_rate": 1.9870951853220048e-05, "loss": 0.0261, "step": 3620 }, { "epoch": 0.09, "learning_rate": 1.9870459303041497e-05, "loss": 0.0386, "step": 3630 }, { "epoch": 0.09, "learning_rate": 1.986996675286295e-05, "loss": 0.0277, "step": 3640 }, { "epoch": 0.09, "learning_rate": 1.98694742026844e-05, "loss": 0.0308, "step": 3650 }, { "epoch": 0.09, "learning_rate": 1.986898165250585e-05, "loss": 0.027, "step": 3660 }, { "epoch": 0.09, "learning_rate": 1.98684891023273e-05, "loss": 0.0253, "step": 3670 }, { "epoch": 0.09, "learning_rate": 1.9867996552148753e-05, "loss": 0.0294, "step": 3680 }, { "epoch": 0.09, "learning_rate": 1.9867504001970202e-05, "loss": 0.0269, "step": 3690 }, { "epoch": 0.09, "learning_rate": 1.9867011451791655e-05, "loss": 0.0254, "step": 3700 }, { "epoch": 0.09, "learning_rate": 1.9866518901613104e-05, "loss": 0.026, "step": 3710 }, { "epoch": 0.09, "learning_rate": 1.9866026351434553e-05, "loss": 0.0277, "step": 3720 }, { "epoch": 0.09, "learning_rate": 1.9865533801256005e-05, "loss": 0.0395, "step": 3730 }, { "epoch": 0.09, "learning_rate": 1.9865041251077455e-05, "loss": 0.0265, "step": 3740 }, { "epoch": 0.09, "learning_rate": 1.9864548700898904e-05, "loss": 0.0252, "step": 3750 }, { "epoch": 0.09, "learning_rate": 1.9864056150720356e-05, "loss": 0.0291, "step": 3760 }, { "epoch": 0.09, "learning_rate": 1.9863563600541805e-05, "loss": 0.028, "step": 3770 }, { "epoch": 0.09, "learning_rate": 1.9863071050363258e-05, "loss": 0.0261, "step": 3780 }, { "epoch": 0.09, "learning_rate": 1.9862578500184707e-05, "loss": 0.0255, "step": 3790 }, { "epoch": 0.09, "learning_rate": 1.986208595000616e-05, "loss": 0.036, "step": 3800 }, { "epoch": 0.09, "learning_rate": 1.986159339982761e-05, "loss": 0.0355, "step": 3810 }, { "epoch": 0.09, "learning_rate": 1.986110084964906e-05, "loss": 0.0291, "step": 3820 }, { "epoch": 0.09, "learning_rate": 1.986060829947051e-05, "loss": 0.0384, "step": 3830 }, { "epoch": 0.09, "learning_rate": 1.9860115749291963e-05, "loss": 0.0262, "step": 3840 }, { "epoch": 0.09, "learning_rate": 1.9859623199113412e-05, "loss": 0.0361, "step": 3850 }, { "epoch": 0.09, "learning_rate": 1.985913064893486e-05, "loss": 0.0285, "step": 3860 }, { "epoch": 0.1, "learning_rate": 1.9858638098756314e-05, "loss": 0.0285, "step": 3870 }, { "epoch": 0.1, "learning_rate": 1.9858145548577763e-05, "loss": 0.0277, "step": 3880 }, { "epoch": 0.1, "learning_rate": 1.9857652998399212e-05, "loss": 0.0298, "step": 3890 }, { "epoch": 0.1, "learning_rate": 1.9857160448220665e-05, "loss": 0.0368, "step": 3900 }, { "epoch": 0.1, "learning_rate": 1.9856667898042114e-05, "loss": 0.0294, "step": 3910 }, { "epoch": 0.1, "learning_rate": 1.9856175347863563e-05, "loss": 0.0468, "step": 3920 }, { "epoch": 0.1, "learning_rate": 1.9855682797685016e-05, "loss": 0.0239, "step": 3930 }, { "epoch": 0.1, "learning_rate": 1.9855190247506465e-05, "loss": 0.0361, "step": 3940 }, { "epoch": 0.1, "learning_rate": 1.9854697697327917e-05, "loss": 0.0282, "step": 3950 }, { "epoch": 0.1, "learning_rate": 1.9854205147149366e-05, "loss": 0.031, "step": 3960 }, { "epoch": 0.1, "learning_rate": 1.985371259697082e-05, "loss": 0.0282, "step": 3970 }, { "epoch": 0.1, "learning_rate": 1.9853220046792268e-05, "loss": 0.0274, "step": 3980 }, { "epoch": 0.1, "learning_rate": 1.985272749661372e-05, "loss": 0.0258, "step": 3990 }, { "epoch": 0.1, "learning_rate": 1.985223494643517e-05, "loss": 0.0292, "step": 4000 }, { "epoch": 0.1, "learning_rate": 1.9851742396256622e-05, "loss": 0.0253, "step": 4010 }, { "epoch": 0.1, "learning_rate": 1.985124984607807e-05, "loss": 0.0264, "step": 4020 }, { "epoch": 0.1, "learning_rate": 1.985075729589952e-05, "loss": 0.037, "step": 4030 }, { "epoch": 0.1, "learning_rate": 1.9850264745720973e-05, "loss": 0.0257, "step": 4040 }, { "epoch": 0.1, "learning_rate": 1.9849772195542422e-05, "loss": 0.0241, "step": 4050 }, { "epoch": 0.1, "learning_rate": 1.984927964536387e-05, "loss": 0.0273, "step": 4060 }, { "epoch": 0.1, "learning_rate": 1.9848787095185324e-05, "loss": 0.0285, "step": 4070 }, { "epoch": 0.1, "learning_rate": 1.9848294545006773e-05, "loss": 0.0269, "step": 4080 }, { "epoch": 0.1, "learning_rate": 1.9847801994828226e-05, "loss": 0.0362, "step": 4090 }, { "epoch": 0.1, "learning_rate": 1.9847309444649675e-05, "loss": 0.0263, "step": 4100 }, { "epoch": 0.1, "learning_rate": 1.9846816894471127e-05, "loss": 0.0293, "step": 4110 }, { "epoch": 0.1, "learning_rate": 1.9846324344292576e-05, "loss": 0.0286, "step": 4120 }, { "epoch": 0.1, "learning_rate": 1.984583179411403e-05, "loss": 0.0219, "step": 4130 }, { "epoch": 0.1, "learning_rate": 1.9845339243935478e-05, "loss": 0.0263, "step": 4140 }, { "epoch": 0.1, "learning_rate": 1.9844846693756927e-05, "loss": 0.0282, "step": 4150 }, { "epoch": 0.1, "learning_rate": 1.984435414357838e-05, "loss": 0.0322, "step": 4160 }, { "epoch": 0.1, "learning_rate": 1.984386159339983e-05, "loss": 0.0265, "step": 4170 }, { "epoch": 0.1, "learning_rate": 1.9843369043221278e-05, "loss": 0.0254, "step": 4180 }, { "epoch": 0.1, "learning_rate": 1.984287649304273e-05, "loss": 0.0273, "step": 4190 }, { "epoch": 0.1, "learning_rate": 1.984238394286418e-05, "loss": 0.0314, "step": 4200 }, { "epoch": 0.1, "learning_rate": 1.984189139268563e-05, "loss": 0.0254, "step": 4210 }, { "epoch": 0.1, "learning_rate": 1.984139884250708e-05, "loss": 0.0233, "step": 4220 }, { "epoch": 0.1, "learning_rate": 1.984090629232853e-05, "loss": 0.0267, "step": 4230 }, { "epoch": 0.1, "learning_rate": 1.9840413742149983e-05, "loss": 0.0225, "step": 4240 }, { "epoch": 0.1, "learning_rate": 1.9839921191971436e-05, "loss": 0.0249, "step": 4250 }, { "epoch": 0.1, "learning_rate": 1.9839428641792885e-05, "loss": 0.0257, "step": 4260 }, { "epoch": 0.1, "learning_rate": 1.9838936091614337e-05, "loss": 0.0281, "step": 4270 }, { "epoch": 0.11, "learning_rate": 1.9838443541435787e-05, "loss": 0.0247, "step": 4280 }, { "epoch": 0.11, "learning_rate": 1.9837950991257236e-05, "loss": 0.0232, "step": 4290 }, { "epoch": 0.11, "learning_rate": 1.9837458441078688e-05, "loss": 0.0245, "step": 4300 }, { "epoch": 0.11, "learning_rate": 1.9836965890900137e-05, "loss": 0.0237, "step": 4310 }, { "epoch": 0.11, "learning_rate": 1.9836473340721587e-05, "loss": 0.0264, "step": 4320 }, { "epoch": 0.11, "learning_rate": 1.983598079054304e-05, "loss": 0.0265, "step": 4330 }, { "epoch": 0.11, "learning_rate": 1.9835488240364488e-05, "loss": 0.0249, "step": 4340 }, { "epoch": 0.11, "learning_rate": 1.9834995690185937e-05, "loss": 0.0384, "step": 4350 }, { "epoch": 0.11, "learning_rate": 1.983450314000739e-05, "loss": 0.0451, "step": 4360 }, { "epoch": 0.11, "learning_rate": 1.983401058982884e-05, "loss": 0.024, "step": 4370 }, { "epoch": 0.11, "learning_rate": 1.983351803965029e-05, "loss": 0.0264, "step": 4380 }, { "epoch": 0.11, "learning_rate": 1.983302548947174e-05, "loss": 0.0248, "step": 4390 }, { "epoch": 0.11, "learning_rate": 1.9832532939293193e-05, "loss": 0.0272, "step": 4400 }, { "epoch": 0.11, "learning_rate": 1.9832040389114642e-05, "loss": 0.0248, "step": 4410 }, { "epoch": 0.11, "learning_rate": 1.9831547838936095e-05, "loss": 0.0247, "step": 4420 }, { "epoch": 0.11, "learning_rate": 1.9831055288757544e-05, "loss": 0.0341, "step": 4430 }, { "epoch": 0.11, "learning_rate": 1.9830562738578993e-05, "loss": 0.0338, "step": 4440 }, { "epoch": 0.11, "learning_rate": 1.9830070188400446e-05, "loss": 0.0252, "step": 4450 }, { "epoch": 0.11, "learning_rate": 1.9829577638221895e-05, "loss": 0.0237, "step": 4460 }, { "epoch": 0.11, "learning_rate": 1.9829085088043344e-05, "loss": 0.0307, "step": 4470 }, { "epoch": 0.11, "learning_rate": 1.9828592537864797e-05, "loss": 0.023, "step": 4480 }, { "epoch": 0.11, "learning_rate": 1.9828099987686246e-05, "loss": 0.0371, "step": 4490 }, { "epoch": 0.11, "learning_rate": 1.9827607437507698e-05, "loss": 0.0261, "step": 4500 }, { "epoch": 0.11, "learning_rate": 1.9827114887329147e-05, "loss": 0.0251, "step": 4510 }, { "epoch": 0.11, "learning_rate": 1.98266223371506e-05, "loss": 0.0259, "step": 4520 }, { "epoch": 0.11, "learning_rate": 1.982612978697205e-05, "loss": 0.0285, "step": 4530 }, { "epoch": 0.11, "learning_rate": 1.98256372367935e-05, "loss": 0.0317, "step": 4540 }, { "epoch": 0.11, "learning_rate": 1.982514468661495e-05, "loss": 0.0237, "step": 4550 }, { "epoch": 0.11, "learning_rate": 1.9824652136436403e-05, "loss": 0.0277, "step": 4560 }, { "epoch": 0.11, "learning_rate": 1.9824159586257853e-05, "loss": 0.025, "step": 4570 }, { "epoch": 0.11, "learning_rate": 1.98236670360793e-05, "loss": 0.0322, "step": 4580 }, { "epoch": 0.11, "learning_rate": 1.9823174485900754e-05, "loss": 0.0268, "step": 4590 }, { "epoch": 0.11, "learning_rate": 1.9822681935722203e-05, "loss": 0.0353, "step": 4600 }, { "epoch": 0.11, "learning_rate": 1.9822189385543652e-05, "loss": 0.0245, "step": 4610 }, { "epoch": 0.11, "learning_rate": 1.9821696835365105e-05, "loss": 0.0248, "step": 4620 }, { "epoch": 0.11, "learning_rate": 1.9821204285186554e-05, "loss": 0.0244, "step": 4630 }, { "epoch": 0.11, "learning_rate": 1.9820711735008003e-05, "loss": 0.0263, "step": 4640 }, { "epoch": 0.11, "learning_rate": 1.9820219184829456e-05, "loss": 0.0356, "step": 4650 }, { "epoch": 0.11, "learning_rate": 1.9819726634650905e-05, "loss": 0.0295, "step": 4660 }, { "epoch": 0.11, "learning_rate": 1.9819234084472358e-05, "loss": 0.0262, "step": 4670 }, { "epoch": 0.11, "learning_rate": 1.9818741534293807e-05, "loss": 0.0328, "step": 4680 }, { "epoch": 0.12, "learning_rate": 1.981824898411526e-05, "loss": 0.0243, "step": 4690 }, { "epoch": 0.12, "learning_rate": 1.981775643393671e-05, "loss": 0.0248, "step": 4700 }, { "epoch": 0.12, "learning_rate": 1.981726388375816e-05, "loss": 0.026, "step": 4710 }, { "epoch": 0.12, "learning_rate": 1.981677133357961e-05, "loss": 0.0281, "step": 4720 }, { "epoch": 0.12, "learning_rate": 1.9816278783401063e-05, "loss": 0.033, "step": 4730 }, { "epoch": 0.12, "learning_rate": 1.9815786233222512e-05, "loss": 0.0273, "step": 4740 }, { "epoch": 0.12, "learning_rate": 1.981529368304396e-05, "loss": 0.0251, "step": 4750 }, { "epoch": 0.12, "learning_rate": 1.9814801132865413e-05, "loss": 0.0289, "step": 4760 }, { "epoch": 0.12, "learning_rate": 1.9814308582686863e-05, "loss": 0.0344, "step": 4770 }, { "epoch": 0.12, "learning_rate": 1.9813816032508312e-05, "loss": 0.0238, "step": 4780 }, { "epoch": 0.12, "learning_rate": 1.9813323482329764e-05, "loss": 0.0242, "step": 4790 }, { "epoch": 0.12, "learning_rate": 1.9812830932151213e-05, "loss": 0.0442, "step": 4800 }, { "epoch": 0.12, "learning_rate": 1.9812338381972666e-05, "loss": 0.0303, "step": 4810 }, { "epoch": 0.12, "learning_rate": 1.9811845831794115e-05, "loss": 0.0259, "step": 4820 }, { "epoch": 0.12, "learning_rate": 1.9811353281615568e-05, "loss": 0.0244, "step": 4830 }, { "epoch": 0.12, "learning_rate": 1.9810860731437017e-05, "loss": 0.0413, "step": 4840 }, { "epoch": 0.12, "learning_rate": 1.981036818125847e-05, "loss": 0.0232, "step": 4850 }, { "epoch": 0.12, "learning_rate": 1.980987563107992e-05, "loss": 0.0264, "step": 4860 }, { "epoch": 0.12, "learning_rate": 1.9809383080901368e-05, "loss": 0.0261, "step": 4870 }, { "epoch": 0.12, "learning_rate": 1.980889053072282e-05, "loss": 0.0245, "step": 4880 }, { "epoch": 0.12, "learning_rate": 1.980839798054427e-05, "loss": 0.0286, "step": 4890 }, { "epoch": 0.12, "learning_rate": 1.980790543036572e-05, "loss": 0.0244, "step": 4900 }, { "epoch": 0.12, "learning_rate": 1.980741288018717e-05, "loss": 0.0259, "step": 4910 }, { "epoch": 0.12, "learning_rate": 1.980692033000862e-05, "loss": 0.0228, "step": 4920 }, { "epoch": 0.12, "learning_rate": 1.980642777983007e-05, "loss": 0.0341, "step": 4930 }, { "epoch": 0.12, "learning_rate": 1.9805935229651522e-05, "loss": 0.0266, "step": 4940 }, { "epoch": 0.12, "learning_rate": 1.980544267947297e-05, "loss": 0.0258, "step": 4950 }, { "epoch": 0.12, "learning_rate": 1.9804950129294424e-05, "loss": 0.0248, "step": 4960 }, { "epoch": 0.12, "learning_rate": 1.9804457579115876e-05, "loss": 0.0274, "step": 4970 }, { "epoch": 0.12, "learning_rate": 1.9803965028937325e-05, "loss": 0.0259, "step": 4980 }, { "epoch": 0.12, "learning_rate": 1.9803472478758778e-05, "loss": 0.0235, "step": 4990 }, { "epoch": 0.12, "learning_rate": 1.9802979928580227e-05, "loss": 0.0315, "step": 5000 }, { "epoch": 0.12, "eval_bleu": 95.924, "eval_gen_len_ratio": 0.9997, "eval_loss": 0.016404934227466583, "eval_runtime": 254.5595, "eval_samples_per_second": 7.33, "eval_steps_per_second": 0.232, "step": 5000 }, { "epoch": 0.12, "learning_rate": 1.9802487378401676e-05, "loss": 0.0274, "step": 5010 }, { "epoch": 0.12, "learning_rate": 1.980199482822313e-05, "loss": 0.0284, "step": 5020 }, { "epoch": 0.12, "learning_rate": 1.9801502278044578e-05, "loss": 0.0236, "step": 5030 }, { "epoch": 0.12, "learning_rate": 1.9801009727866027e-05, "loss": 0.026, "step": 5040 }, { "epoch": 0.12, "learning_rate": 1.980051717768748e-05, "loss": 0.0337, "step": 5050 }, { "epoch": 0.12, "learning_rate": 1.980002462750893e-05, "loss": 0.0268, "step": 5060 }, { "epoch": 0.12, "learning_rate": 1.9799532077330378e-05, "loss": 0.0235, "step": 5070 }, { "epoch": 0.12, "learning_rate": 1.979903952715183e-05, "loss": 0.0354, "step": 5080 }, { "epoch": 0.13, "learning_rate": 1.979854697697328e-05, "loss": 0.0258, "step": 5090 }, { "epoch": 0.13, "learning_rate": 1.9798054426794732e-05, "loss": 0.0219, "step": 5100 }, { "epoch": 0.13, "learning_rate": 1.979756187661618e-05, "loss": 0.0226, "step": 5110 }, { "epoch": 0.13, "learning_rate": 1.9797069326437634e-05, "loss": 0.023, "step": 5120 }, { "epoch": 0.13, "learning_rate": 1.9796576776259083e-05, "loss": 0.0249, "step": 5130 }, { "epoch": 0.13, "learning_rate": 1.9796084226080535e-05, "loss": 0.0266, "step": 5140 }, { "epoch": 0.13, "learning_rate": 1.9795591675901984e-05, "loss": 0.0212, "step": 5150 }, { "epoch": 0.13, "learning_rate": 1.9795099125723434e-05, "loss": 0.025, "step": 5160 }, { "epoch": 0.13, "learning_rate": 1.9794606575544886e-05, "loss": 0.0309, "step": 5170 }, { "epoch": 0.13, "learning_rate": 1.9794114025366335e-05, "loss": 0.0279, "step": 5180 }, { "epoch": 0.13, "learning_rate": 1.9793621475187784e-05, "loss": 0.0295, "step": 5190 }, { "epoch": 0.13, "learning_rate": 1.9793128925009237e-05, "loss": 0.0253, "step": 5200 }, { "epoch": 0.13, "learning_rate": 1.9792636374830686e-05, "loss": 0.0251, "step": 5210 }, { "epoch": 0.13, "learning_rate": 1.979214382465214e-05, "loss": 0.0223, "step": 5220 }, { "epoch": 0.13, "learning_rate": 1.9791651274473588e-05, "loss": 0.0323, "step": 5230 }, { "epoch": 0.13, "learning_rate": 1.979115872429504e-05, "loss": 0.0223, "step": 5240 }, { "epoch": 0.13, "learning_rate": 1.979066617411649e-05, "loss": 0.0257, "step": 5250 }, { "epoch": 0.13, "learning_rate": 1.9790173623937942e-05, "loss": 0.0355, "step": 5260 }, { "epoch": 0.13, "learning_rate": 1.978968107375939e-05, "loss": 0.0292, "step": 5270 }, { "epoch": 0.13, "learning_rate": 1.9789188523580844e-05, "loss": 0.0329, "step": 5280 }, { "epoch": 0.13, "learning_rate": 1.9788695973402293e-05, "loss": 0.0216, "step": 5290 }, { "epoch": 0.13, "learning_rate": 1.9788203423223742e-05, "loss": 0.0244, "step": 5300 }, { "epoch": 0.13, "learning_rate": 1.9787710873045195e-05, "loss": 0.0239, "step": 5310 }, { "epoch": 0.13, "learning_rate": 1.9787218322866644e-05, "loss": 0.0243, "step": 5320 }, { "epoch": 0.13, "learning_rate": 1.9786725772688093e-05, "loss": 0.0232, "step": 5330 }, { "epoch": 0.13, "learning_rate": 1.9786233222509545e-05, "loss": 0.0273, "step": 5340 }, { "epoch": 0.13, "learning_rate": 1.9785740672330995e-05, "loss": 0.0269, "step": 5350 }, { "epoch": 0.13, "learning_rate": 1.9785248122152444e-05, "loss": 0.0245, "step": 5360 }, { "epoch": 0.13, "learning_rate": 1.9784755571973896e-05, "loss": 0.0222, "step": 5370 }, { "epoch": 0.13, "learning_rate": 1.9784263021795345e-05, "loss": 0.0371, "step": 5380 }, { "epoch": 0.13, "learning_rate": 1.9783770471616798e-05, "loss": 0.0231, "step": 5390 }, { "epoch": 0.13, "learning_rate": 1.9783277921438247e-05, "loss": 0.0248, "step": 5400 }, { "epoch": 0.13, "learning_rate": 1.97827853712597e-05, "loss": 0.021, "step": 5410 }, { "epoch": 0.13, "learning_rate": 1.978229282108115e-05, "loss": 0.0243, "step": 5420 }, { "epoch": 0.13, "learning_rate": 1.97818002709026e-05, "loss": 0.0239, "step": 5430 }, { "epoch": 0.13, "learning_rate": 1.978130772072405e-05, "loss": 0.0227, "step": 5440 }, { "epoch": 0.13, "learning_rate": 1.9780815170545503e-05, "loss": 0.0227, "step": 5450 }, { "epoch": 0.13, "learning_rate": 1.9780322620366952e-05, "loss": 0.0247, "step": 5460 }, { "epoch": 0.13, "learning_rate": 1.97798300701884e-05, "loss": 0.0239, "step": 5470 }, { "epoch": 0.13, "learning_rate": 1.9779337520009854e-05, "loss": 0.0263, "step": 5480 }, { "epoch": 0.13, "learning_rate": 1.9778844969831303e-05, "loss": 0.0259, "step": 5490 }, { "epoch": 0.14, "learning_rate": 1.9778352419652752e-05, "loss": 0.023, "step": 5500 }, { "epoch": 0.14, "learning_rate": 1.9777859869474205e-05, "loss": 0.0364, "step": 5510 }, { "epoch": 0.14, "learning_rate": 1.9777367319295654e-05, "loss": 0.0261, "step": 5520 }, { "epoch": 0.14, "learning_rate": 1.9776874769117106e-05, "loss": 0.0231, "step": 5530 }, { "epoch": 0.14, "learning_rate": 1.9776382218938555e-05, "loss": 0.0255, "step": 5540 }, { "epoch": 0.14, "learning_rate": 1.9775889668760008e-05, "loss": 0.0233, "step": 5550 }, { "epoch": 0.14, "learning_rate": 1.9775397118581457e-05, "loss": 0.022, "step": 5560 }, { "epoch": 0.14, "learning_rate": 1.977490456840291e-05, "loss": 0.0295, "step": 5570 }, { "epoch": 0.14, "learning_rate": 1.977441201822436e-05, "loss": 0.0217, "step": 5580 }, { "epoch": 0.14, "learning_rate": 1.9773919468045808e-05, "loss": 0.0324, "step": 5590 }, { "epoch": 0.14, "learning_rate": 1.977342691786726e-05, "loss": 0.0313, "step": 5600 }, { "epoch": 0.14, "learning_rate": 1.977293436768871e-05, "loss": 0.0201, "step": 5610 }, { "epoch": 0.14, "learning_rate": 1.977244181751016e-05, "loss": 0.0227, "step": 5620 }, { "epoch": 0.14, "learning_rate": 1.977194926733161e-05, "loss": 0.0213, "step": 5630 }, { "epoch": 0.14, "learning_rate": 1.977145671715306e-05, "loss": 0.0233, "step": 5640 }, { "epoch": 0.14, "learning_rate": 1.977096416697451e-05, "loss": 0.0215, "step": 5650 }, { "epoch": 0.14, "learning_rate": 1.9770471616795962e-05, "loss": 0.023, "step": 5660 }, { "epoch": 0.14, "learning_rate": 1.976997906661741e-05, "loss": 0.0219, "step": 5670 }, { "epoch": 0.14, "learning_rate": 1.9769486516438864e-05, "loss": 0.0241, "step": 5680 }, { "epoch": 0.14, "learning_rate": 1.9768993966260316e-05, "loss": 0.0263, "step": 5690 }, { "epoch": 0.14, "learning_rate": 1.9768501416081766e-05, "loss": 0.0391, "step": 5700 }, { "epoch": 0.14, "learning_rate": 1.9768008865903218e-05, "loss": 0.0248, "step": 5710 }, { "epoch": 0.14, "learning_rate": 1.9767516315724667e-05, "loss": 0.0218, "step": 5720 }, { "epoch": 0.14, "learning_rate": 1.9767023765546116e-05, "loss": 0.0217, "step": 5730 }, { "epoch": 0.14, "learning_rate": 1.976653121536757e-05, "loss": 0.0231, "step": 5740 }, { "epoch": 0.14, "learning_rate": 1.9766038665189018e-05, "loss": 0.0319, "step": 5750 }, { "epoch": 0.14, "learning_rate": 1.9765546115010467e-05, "loss": 0.0218, "step": 5760 }, { "epoch": 0.14, "learning_rate": 1.976505356483192e-05, "loss": 0.0242, "step": 5770 }, { "epoch": 0.14, "learning_rate": 1.976456101465337e-05, "loss": 0.0269, "step": 5780 }, { "epoch": 0.14, "learning_rate": 1.9764068464474818e-05, "loss": 0.0358, "step": 5790 }, { "epoch": 0.14, "learning_rate": 1.976357591429627e-05, "loss": 0.0228, "step": 5800 }, { "epoch": 0.14, "learning_rate": 1.976308336411772e-05, "loss": 0.0242, "step": 5810 }, { "epoch": 0.14, "learning_rate": 1.9762590813939172e-05, "loss": 0.0202, "step": 5820 }, { "epoch": 0.14, "learning_rate": 1.976209826376062e-05, "loss": 0.0319, "step": 5830 }, { "epoch": 0.14, "learning_rate": 1.9761605713582074e-05, "loss": 0.0217, "step": 5840 }, { "epoch": 0.14, "learning_rate": 1.9761113163403523e-05, "loss": 0.0346, "step": 5850 }, { "epoch": 0.14, "learning_rate": 1.9760620613224976e-05, "loss": 0.0218, "step": 5860 }, { "epoch": 0.14, "learning_rate": 1.9760128063046425e-05, "loss": 0.0235, "step": 5870 }, { "epoch": 0.14, "learning_rate": 1.9759635512867874e-05, "loss": 0.028, "step": 5880 }, { "epoch": 0.14, "learning_rate": 1.9759142962689326e-05, "loss": 0.0334, "step": 5890 }, { "epoch": 0.14, "learning_rate": 1.9758650412510776e-05, "loss": 0.0219, "step": 5900 }, { "epoch": 0.15, "learning_rate": 1.9758157862332225e-05, "loss": 0.0238, "step": 5910 }, { "epoch": 0.15, "learning_rate": 1.9757665312153677e-05, "loss": 0.0247, "step": 5920 }, { "epoch": 0.15, "learning_rate": 1.9757172761975126e-05, "loss": 0.0224, "step": 5930 }, { "epoch": 0.15, "learning_rate": 1.975668021179658e-05, "loss": 0.0221, "step": 5940 }, { "epoch": 0.15, "learning_rate": 1.9756187661618028e-05, "loss": 0.0234, "step": 5950 }, { "epoch": 0.15, "learning_rate": 1.975569511143948e-05, "loss": 0.0206, "step": 5960 }, { "epoch": 0.15, "learning_rate": 1.975520256126093e-05, "loss": 0.0306, "step": 5970 }, { "epoch": 0.15, "learning_rate": 1.9754710011082382e-05, "loss": 0.0244, "step": 5980 }, { "epoch": 0.15, "learning_rate": 1.975421746090383e-05, "loss": 0.0232, "step": 5990 }, { "epoch": 0.15, "learning_rate": 1.9753724910725284e-05, "loss": 0.0218, "step": 6000 }, { "epoch": 0.15, "learning_rate": 1.9753232360546733e-05, "loss": 0.0276, "step": 6010 }, { "epoch": 0.15, "learning_rate": 1.9752739810368182e-05, "loss": 0.0243, "step": 6020 }, { "epoch": 0.15, "learning_rate": 1.9752247260189635e-05, "loss": 0.0224, "step": 6030 }, { "epoch": 0.15, "learning_rate": 1.9751754710011084e-05, "loss": 0.0242, "step": 6040 }, { "epoch": 0.15, "learning_rate": 1.9751262159832533e-05, "loss": 0.0193, "step": 6050 }, { "epoch": 0.15, "learning_rate": 1.9750769609653986e-05, "loss": 0.0212, "step": 6060 }, { "epoch": 0.15, "learning_rate": 1.9750277059475435e-05, "loss": 0.0294, "step": 6070 }, { "epoch": 0.15, "learning_rate": 1.9749784509296884e-05, "loss": 0.0245, "step": 6080 }, { "epoch": 0.15, "learning_rate": 1.9749291959118337e-05, "loss": 0.0269, "step": 6090 }, { "epoch": 0.15, "learning_rate": 1.9748799408939786e-05, "loss": 0.0248, "step": 6100 }, { "epoch": 0.15, "learning_rate": 1.9748306858761238e-05, "loss": 0.0238, "step": 6110 }, { "epoch": 0.15, "learning_rate": 1.9747814308582687e-05, "loss": 0.026, "step": 6120 }, { "epoch": 0.15, "learning_rate": 1.974732175840414e-05, "loss": 0.0231, "step": 6130 }, { "epoch": 0.15, "learning_rate": 1.974682920822559e-05, "loss": 0.0261, "step": 6140 }, { "epoch": 0.15, "learning_rate": 1.974633665804704e-05, "loss": 0.0241, "step": 6150 }, { "epoch": 0.15, "learning_rate": 1.974584410786849e-05, "loss": 0.0235, "step": 6160 }, { "epoch": 0.15, "learning_rate": 1.9745351557689943e-05, "loss": 0.0233, "step": 6170 }, { "epoch": 0.15, "learning_rate": 1.9744859007511392e-05, "loss": 0.02, "step": 6180 }, { "epoch": 0.15, "learning_rate": 1.974436645733284e-05, "loss": 0.0251, "step": 6190 }, { "epoch": 0.15, "learning_rate": 1.9743873907154294e-05, "loss": 0.0224, "step": 6200 }, { "epoch": 0.15, "learning_rate": 1.9743381356975743e-05, "loss": 0.0323, "step": 6210 }, { "epoch": 0.15, "learning_rate": 1.9742888806797192e-05, "loss": 0.0375, "step": 6220 }, { "epoch": 0.15, "learning_rate": 1.9742396256618645e-05, "loss": 0.042, "step": 6230 }, { "epoch": 0.15, "learning_rate": 1.9741903706440094e-05, "loss": 0.022, "step": 6240 }, { "epoch": 0.15, "learning_rate": 1.9741411156261547e-05, "loss": 0.0354, "step": 6250 }, { "epoch": 0.15, "learning_rate": 1.9740918606082996e-05, "loss": 0.0258, "step": 6260 }, { "epoch": 0.15, "learning_rate": 1.974042605590445e-05, "loss": 0.0404, "step": 6270 }, { "epoch": 0.15, "learning_rate": 1.9739933505725897e-05, "loss": 0.0213, "step": 6280 }, { "epoch": 0.15, "learning_rate": 1.973944095554735e-05, "loss": 0.0241, "step": 6290 }, { "epoch": 0.15, "learning_rate": 1.97389484053688e-05, "loss": 0.0229, "step": 6300 }, { "epoch": 0.16, "learning_rate": 1.973845585519025e-05, "loss": 0.0229, "step": 6310 }, { "epoch": 0.16, "learning_rate": 1.97379633050117e-05, "loss": 0.0204, "step": 6320 }, { "epoch": 0.16, "learning_rate": 1.973747075483315e-05, "loss": 0.0513, "step": 6330 }, { "epoch": 0.16, "learning_rate": 1.97369782046546e-05, "loss": 0.0248, "step": 6340 }, { "epoch": 0.16, "learning_rate": 1.973648565447605e-05, "loss": 0.0218, "step": 6350 }, { "epoch": 0.16, "learning_rate": 1.97359931042975e-05, "loss": 0.0217, "step": 6360 }, { "epoch": 0.16, "learning_rate": 1.973550055411895e-05, "loss": 0.0237, "step": 6370 }, { "epoch": 0.16, "learning_rate": 1.9735008003940403e-05, "loss": 0.0315, "step": 6380 }, { "epoch": 0.16, "learning_rate": 1.973451545376185e-05, "loss": 0.0244, "step": 6390 }, { "epoch": 0.16, "learning_rate": 1.9734022903583304e-05, "loss": 0.0233, "step": 6400 }, { "epoch": 0.16, "learning_rate": 1.9733530353404757e-05, "loss": 0.0265, "step": 6410 }, { "epoch": 0.16, "learning_rate": 1.9733037803226206e-05, "loss": 0.0233, "step": 6420 }, { "epoch": 0.16, "learning_rate": 1.973254525304766e-05, "loss": 0.0253, "step": 6430 }, { "epoch": 0.16, "learning_rate": 1.9732052702869108e-05, "loss": 0.0246, "step": 6440 }, { "epoch": 0.16, "learning_rate": 1.9731560152690557e-05, "loss": 0.0259, "step": 6450 }, { "epoch": 0.16, "learning_rate": 1.973106760251201e-05, "loss": 0.0233, "step": 6460 }, { "epoch": 0.16, "learning_rate": 1.973057505233346e-05, "loss": 0.0231, "step": 6470 }, { "epoch": 0.16, "learning_rate": 1.9730082502154908e-05, "loss": 0.0287, "step": 6480 }, { "epoch": 0.16, "learning_rate": 1.972958995197636e-05, "loss": 0.0278, "step": 6490 }, { "epoch": 0.16, "learning_rate": 1.972909740179781e-05, "loss": 0.0206, "step": 6500 }, { "epoch": 0.16, "learning_rate": 1.972860485161926e-05, "loss": 0.021, "step": 6510 }, { "epoch": 0.16, "learning_rate": 1.972811230144071e-05, "loss": 0.0234, "step": 6520 }, { "epoch": 0.16, "learning_rate": 1.972761975126216e-05, "loss": 0.0232, "step": 6530 }, { "epoch": 0.16, "learning_rate": 1.9727127201083613e-05, "loss": 0.021, "step": 6540 }, { "epoch": 0.16, "learning_rate": 1.9726634650905062e-05, "loss": 0.0243, "step": 6550 }, { "epoch": 0.16, "learning_rate": 1.9726142100726514e-05, "loss": 0.0293, "step": 6560 }, { "epoch": 0.16, "learning_rate": 1.9725649550547963e-05, "loss": 0.0241, "step": 6570 }, { "epoch": 0.16, "learning_rate": 1.9725157000369416e-05, "loss": 0.0211, "step": 6580 }, { "epoch": 0.16, "learning_rate": 1.9724664450190865e-05, "loss": 0.0208, "step": 6590 }, { "epoch": 0.16, "learning_rate": 1.9724171900012314e-05, "loss": 0.023, "step": 6600 }, { "epoch": 0.16, "learning_rate": 1.9723679349833767e-05, "loss": 0.0268, "step": 6610 }, { "epoch": 0.16, "learning_rate": 1.9723186799655216e-05, "loss": 0.0231, "step": 6620 }, { "epoch": 0.16, "learning_rate": 1.9722694249476665e-05, "loss": 0.0277, "step": 6630 }, { "epoch": 0.16, "learning_rate": 1.9722201699298118e-05, "loss": 0.0224, "step": 6640 }, { "epoch": 0.16, "learning_rate": 1.9721709149119567e-05, "loss": 0.024, "step": 6650 }, { "epoch": 0.16, "learning_rate": 1.972121659894102e-05, "loss": 0.0286, "step": 6660 }, { "epoch": 0.16, "learning_rate": 1.972072404876247e-05, "loss": 0.0246, "step": 6670 }, { "epoch": 0.16, "learning_rate": 1.972023149858392e-05, "loss": 0.0241, "step": 6680 }, { "epoch": 0.16, "learning_rate": 1.971973894840537e-05, "loss": 0.0275, "step": 6690 }, { "epoch": 0.16, "learning_rate": 1.9719246398226823e-05, "loss": 0.0222, "step": 6700 }, { "epoch": 0.16, "learning_rate": 1.9718753848048272e-05, "loss": 0.0194, "step": 6710 }, { "epoch": 0.17, "learning_rate": 1.9718261297869724e-05, "loss": 0.024, "step": 6720 }, { "epoch": 0.17, "learning_rate": 1.9717768747691174e-05, "loss": 0.0216, "step": 6730 }, { "epoch": 0.17, "learning_rate": 1.9717276197512623e-05, "loss": 0.0232, "step": 6740 }, { "epoch": 0.17, "learning_rate": 1.9716783647334075e-05, "loss": 0.0225, "step": 6750 }, { "epoch": 0.17, "learning_rate": 1.9716291097155524e-05, "loss": 0.0196, "step": 6760 }, { "epoch": 0.17, "learning_rate": 1.9715798546976974e-05, "loss": 0.0263, "step": 6770 }, { "epoch": 0.17, "learning_rate": 1.9715305996798426e-05, "loss": 0.0324, "step": 6780 }, { "epoch": 0.17, "learning_rate": 1.9714813446619875e-05, "loss": 0.0222, "step": 6790 }, { "epoch": 0.17, "learning_rate": 1.9714320896441324e-05, "loss": 0.0215, "step": 6800 }, { "epoch": 0.17, "learning_rate": 1.9713828346262777e-05, "loss": 0.0216, "step": 6810 }, { "epoch": 0.17, "learning_rate": 1.9713335796084226e-05, "loss": 0.0243, "step": 6820 }, { "epoch": 0.17, "learning_rate": 1.971284324590568e-05, "loss": 0.0182, "step": 6830 }, { "epoch": 0.17, "learning_rate": 1.9712350695727128e-05, "loss": 0.024, "step": 6840 }, { "epoch": 0.17, "learning_rate": 1.971185814554858e-05, "loss": 0.0233, "step": 6850 }, { "epoch": 0.17, "learning_rate": 1.971136559537003e-05, "loss": 0.0205, "step": 6860 }, { "epoch": 0.17, "learning_rate": 1.9710873045191482e-05, "loss": 0.0225, "step": 6870 }, { "epoch": 0.17, "learning_rate": 1.971038049501293e-05, "loss": 0.0193, "step": 6880 }, { "epoch": 0.17, "learning_rate": 1.970988794483438e-05, "loss": 0.0216, "step": 6890 }, { "epoch": 0.17, "learning_rate": 1.9709395394655833e-05, "loss": 0.0215, "step": 6900 }, { "epoch": 0.17, "learning_rate": 1.9708902844477282e-05, "loss": 0.0191, "step": 6910 }, { "epoch": 0.17, "learning_rate": 1.9708410294298734e-05, "loss": 0.0233, "step": 6920 }, { "epoch": 0.17, "learning_rate": 1.9707917744120184e-05, "loss": 0.0312, "step": 6930 }, { "epoch": 0.17, "learning_rate": 1.9707425193941633e-05, "loss": 0.0203, "step": 6940 }, { "epoch": 0.17, "learning_rate": 1.9706932643763085e-05, "loss": 0.0311, "step": 6950 }, { "epoch": 0.17, "learning_rate": 1.9706440093584534e-05, "loss": 0.023, "step": 6960 }, { "epoch": 0.17, "learning_rate": 1.9705947543405987e-05, "loss": 0.0216, "step": 6970 }, { "epoch": 0.17, "learning_rate": 1.9705454993227436e-05, "loss": 0.0333, "step": 6980 }, { "epoch": 0.17, "learning_rate": 1.970496244304889e-05, "loss": 0.0227, "step": 6990 }, { "epoch": 0.17, "learning_rate": 1.9704469892870338e-05, "loss": 0.0243, "step": 7000 }, { "epoch": 0.17, "learning_rate": 1.970397734269179e-05, "loss": 0.0279, "step": 7010 }, { "epoch": 0.17, "learning_rate": 1.970348479251324e-05, "loss": 0.0211, "step": 7020 }, { "epoch": 0.17, "learning_rate": 1.970299224233469e-05, "loss": 0.0322, "step": 7030 }, { "epoch": 0.17, "learning_rate": 1.970249969215614e-05, "loss": 0.0212, "step": 7040 }, { "epoch": 0.17, "learning_rate": 1.970200714197759e-05, "loss": 0.0224, "step": 7050 }, { "epoch": 0.17, "learning_rate": 1.970151459179904e-05, "loss": 0.0225, "step": 7060 }, { "epoch": 0.17, "learning_rate": 1.9701022041620492e-05, "loss": 0.0222, "step": 7070 }, { "epoch": 0.17, "learning_rate": 1.970052949144194e-05, "loss": 0.0232, "step": 7080 }, { "epoch": 0.17, "learning_rate": 1.970003694126339e-05, "loss": 0.0301, "step": 7090 }, { "epoch": 0.17, "learning_rate": 1.9699544391084843e-05, "loss": 0.0218, "step": 7100 }, { "epoch": 0.17, "learning_rate": 1.9699051840906292e-05, "loss": 0.0318, "step": 7110 }, { "epoch": 0.17, "learning_rate": 1.9698559290727745e-05, "loss": 0.0231, "step": 7120 }, { "epoch": 0.18, "learning_rate": 1.9698066740549197e-05, "loss": 0.0199, "step": 7130 }, { "epoch": 0.18, "learning_rate": 1.9697574190370646e-05, "loss": 0.0238, "step": 7140 }, { "epoch": 0.18, "learning_rate": 1.96970816401921e-05, "loss": 0.0189, "step": 7150 }, { "epoch": 0.18, "learning_rate": 1.9696589090013548e-05, "loss": 0.0336, "step": 7160 }, { "epoch": 0.18, "learning_rate": 1.9696096539834997e-05, "loss": 0.0211, "step": 7170 }, { "epoch": 0.18, "learning_rate": 1.969560398965645e-05, "loss": 0.021, "step": 7180 }, { "epoch": 0.18, "learning_rate": 1.96951114394779e-05, "loss": 0.0214, "step": 7190 }, { "epoch": 0.18, "learning_rate": 1.9694618889299348e-05, "loss": 0.0179, "step": 7200 }, { "epoch": 0.18, "learning_rate": 1.96941263391208e-05, "loss": 0.0264, "step": 7210 }, { "epoch": 0.18, "learning_rate": 1.969363378894225e-05, "loss": 0.0269, "step": 7220 }, { "epoch": 0.18, "learning_rate": 1.96931412387637e-05, "loss": 0.021, "step": 7230 }, { "epoch": 0.18, "learning_rate": 1.969264868858515e-05, "loss": 0.0181, "step": 7240 }, { "epoch": 0.18, "learning_rate": 1.96921561384066e-05, "loss": 0.0178, "step": 7250 }, { "epoch": 0.18, "learning_rate": 1.9691663588228053e-05, "loss": 0.0195, "step": 7260 }, { "epoch": 0.18, "learning_rate": 1.9691171038049502e-05, "loss": 0.0266, "step": 7270 }, { "epoch": 0.18, "learning_rate": 1.9690678487870955e-05, "loss": 0.02, "step": 7280 }, { "epoch": 0.18, "learning_rate": 1.9690185937692404e-05, "loss": 0.0234, "step": 7290 }, { "epoch": 0.18, "learning_rate": 1.9689693387513856e-05, "loss": 0.0215, "step": 7300 }, { "epoch": 0.18, "learning_rate": 1.9689200837335305e-05, "loss": 0.0226, "step": 7310 }, { "epoch": 0.18, "learning_rate": 1.9688708287156755e-05, "loss": 0.0431, "step": 7320 }, { "epoch": 0.18, "learning_rate": 1.9688215736978207e-05, "loss": 0.0236, "step": 7330 }, { "epoch": 0.18, "learning_rate": 1.9687723186799656e-05, "loss": 0.0228, "step": 7340 }, { "epoch": 0.18, "learning_rate": 1.9687230636621105e-05, "loss": 0.0231, "step": 7350 }, { "epoch": 0.18, "learning_rate": 1.9686738086442558e-05, "loss": 0.0375, "step": 7360 }, { "epoch": 0.18, "learning_rate": 1.9686245536264007e-05, "loss": 0.0194, "step": 7370 }, { "epoch": 0.18, "learning_rate": 1.968575298608546e-05, "loss": 0.0274, "step": 7380 }, { "epoch": 0.18, "learning_rate": 1.968526043590691e-05, "loss": 0.0229, "step": 7390 }, { "epoch": 0.18, "learning_rate": 1.968476788572836e-05, "loss": 0.0184, "step": 7400 }, { "epoch": 0.18, "learning_rate": 1.968427533554981e-05, "loss": 0.0211, "step": 7410 }, { "epoch": 0.18, "learning_rate": 1.9683782785371263e-05, "loss": 0.0181, "step": 7420 }, { "epoch": 0.18, "learning_rate": 1.9683290235192712e-05, "loss": 0.022, "step": 7430 }, { "epoch": 0.18, "learning_rate": 1.9682797685014165e-05, "loss": 0.019, "step": 7440 }, { "epoch": 0.18, "learning_rate": 1.9682305134835614e-05, "loss": 0.02, "step": 7450 }, { "epoch": 0.18, "learning_rate": 1.9681812584657063e-05, "loss": 0.0219, "step": 7460 }, { "epoch": 0.18, "learning_rate": 1.9681320034478516e-05, "loss": 0.0214, "step": 7470 }, { "epoch": 0.18, "learning_rate": 1.9680827484299965e-05, "loss": 0.0205, "step": 7480 }, { "epoch": 0.18, "learning_rate": 1.9680334934121414e-05, "loss": 0.0254, "step": 7490 }, { "epoch": 0.18, "learning_rate": 1.9679842383942866e-05, "loss": 0.0231, "step": 7500 }, { "epoch": 0.18, "learning_rate": 1.9679349833764316e-05, "loss": 0.0214, "step": 7510 }, { "epoch": 0.18, "learning_rate": 1.9678857283585765e-05, "loss": 0.0324, "step": 7520 }, { "epoch": 0.18, "learning_rate": 1.9678364733407217e-05, "loss": 0.0221, "step": 7530 }, { "epoch": 0.19, "learning_rate": 1.9677872183228666e-05, "loss": 0.0204, "step": 7540 }, { "epoch": 0.19, "learning_rate": 1.967737963305012e-05, "loss": 0.0189, "step": 7550 }, { "epoch": 0.19, "learning_rate": 1.9676887082871568e-05, "loss": 0.0202, "step": 7560 }, { "epoch": 0.19, "learning_rate": 1.967639453269302e-05, "loss": 0.0204, "step": 7570 }, { "epoch": 0.19, "learning_rate": 1.967590198251447e-05, "loss": 0.0338, "step": 7580 }, { "epoch": 0.19, "learning_rate": 1.9675409432335922e-05, "loss": 0.0234, "step": 7590 }, { "epoch": 0.19, "learning_rate": 1.967491688215737e-05, "loss": 0.0189, "step": 7600 }, { "epoch": 0.19, "learning_rate": 1.967442433197882e-05, "loss": 0.0286, "step": 7610 }, { "epoch": 0.19, "learning_rate": 1.9673931781800273e-05, "loss": 0.0217, "step": 7620 }, { "epoch": 0.19, "learning_rate": 1.9673439231621722e-05, "loss": 0.0203, "step": 7630 }, { "epoch": 0.19, "learning_rate": 1.9672946681443175e-05, "loss": 0.0211, "step": 7640 }, { "epoch": 0.19, "learning_rate": 1.9672454131264624e-05, "loss": 0.0211, "step": 7650 }, { "epoch": 0.19, "learning_rate": 1.9671961581086073e-05, "loss": 0.0224, "step": 7660 }, { "epoch": 0.19, "learning_rate": 1.9671469030907526e-05, "loss": 0.0234, "step": 7670 }, { "epoch": 0.19, "learning_rate": 1.9670976480728975e-05, "loss": 0.0187, "step": 7680 }, { "epoch": 0.19, "learning_rate": 1.9670483930550427e-05, "loss": 0.0209, "step": 7690 }, { "epoch": 0.19, "learning_rate": 1.9669991380371876e-05, "loss": 0.0234, "step": 7700 }, { "epoch": 0.19, "learning_rate": 1.966949883019333e-05, "loss": 0.0211, "step": 7710 }, { "epoch": 0.19, "learning_rate": 1.9669006280014778e-05, "loss": 0.0186, "step": 7720 }, { "epoch": 0.19, "learning_rate": 1.966851372983623e-05, "loss": 0.0199, "step": 7730 }, { "epoch": 0.19, "learning_rate": 1.966802117965768e-05, "loss": 0.031, "step": 7740 }, { "epoch": 0.19, "learning_rate": 1.966752862947913e-05, "loss": 0.0235, "step": 7750 }, { "epoch": 0.19, "learning_rate": 1.966703607930058e-05, "loss": 0.0294, "step": 7760 }, { "epoch": 0.19, "learning_rate": 1.966654352912203e-05, "loss": 0.0239, "step": 7770 }, { "epoch": 0.19, "learning_rate": 1.966605097894348e-05, "loss": 0.0179, "step": 7780 }, { "epoch": 0.19, "learning_rate": 1.9665558428764932e-05, "loss": 0.0328, "step": 7790 }, { "epoch": 0.19, "learning_rate": 1.966506587858638e-05, "loss": 0.0242, "step": 7800 }, { "epoch": 0.19, "learning_rate": 1.966457332840783e-05, "loss": 0.0282, "step": 7810 }, { "epoch": 0.19, "learning_rate": 1.9664080778229283e-05, "loss": 0.0238, "step": 7820 }, { "epoch": 0.19, "learning_rate": 1.9663588228050732e-05, "loss": 0.0218, "step": 7830 }, { "epoch": 0.19, "learning_rate": 1.9663095677872185e-05, "loss": 0.0197, "step": 7840 }, { "epoch": 0.19, "learning_rate": 1.9662603127693637e-05, "loss": 0.0299, "step": 7850 }, { "epoch": 0.19, "learning_rate": 1.9662110577515087e-05, "loss": 0.0195, "step": 7860 }, { "epoch": 0.19, "learning_rate": 1.966161802733654e-05, "loss": 0.0192, "step": 7870 }, { "epoch": 0.19, "learning_rate": 1.9661125477157988e-05, "loss": 0.0191, "step": 7880 }, { "epoch": 0.19, "learning_rate": 1.9660632926979437e-05, "loss": 0.0195, "step": 7890 }, { "epoch": 0.19, "learning_rate": 1.966014037680089e-05, "loss": 0.0208, "step": 7900 }, { "epoch": 0.19, "learning_rate": 1.965964782662234e-05, "loss": 0.0236, "step": 7910 }, { "epoch": 0.19, "learning_rate": 1.9659155276443788e-05, "loss": 0.0214, "step": 7920 }, { "epoch": 0.19, "learning_rate": 1.965866272626524e-05, "loss": 0.0367, "step": 7930 }, { "epoch": 0.2, "learning_rate": 1.965817017608669e-05, "loss": 0.0266, "step": 7940 }, { "epoch": 0.2, "learning_rate": 1.965767762590814e-05, "loss": 0.0281, "step": 7950 }, { "epoch": 0.2, "learning_rate": 1.965718507572959e-05, "loss": 0.0219, "step": 7960 }, { "epoch": 0.2, "learning_rate": 1.965669252555104e-05, "loss": 0.0208, "step": 7970 }, { "epoch": 0.2, "learning_rate": 1.9656199975372493e-05, "loss": 0.0181, "step": 7980 }, { "epoch": 0.2, "learning_rate": 1.9655707425193942e-05, "loss": 0.0259, "step": 7990 }, { "epoch": 0.2, "learning_rate": 1.9655214875015395e-05, "loss": 0.024, "step": 8000 }, { "epoch": 0.2, "learning_rate": 1.9654722324836844e-05, "loss": 0.0233, "step": 8010 }, { "epoch": 0.2, "learning_rate": 1.9654229774658297e-05, "loss": 0.0203, "step": 8020 }, { "epoch": 0.2, "learning_rate": 1.9653737224479746e-05, "loss": 0.018, "step": 8030 }, { "epoch": 0.2, "learning_rate": 1.9653244674301195e-05, "loss": 0.0208, "step": 8040 }, { "epoch": 0.2, "learning_rate": 1.9652752124122647e-05, "loss": 0.032, "step": 8050 }, { "epoch": 0.2, "learning_rate": 1.9652259573944097e-05, "loss": 0.0256, "step": 8060 }, { "epoch": 0.2, "learning_rate": 1.9651767023765546e-05, "loss": 0.032, "step": 8070 }, { "epoch": 0.2, "learning_rate": 1.9651274473587e-05, "loss": 0.0219, "step": 8080 }, { "epoch": 0.2, "learning_rate": 1.9650781923408447e-05, "loss": 0.0215, "step": 8090 }, { "epoch": 0.2, "learning_rate": 1.96502893732299e-05, "loss": 0.0211, "step": 8100 }, { "epoch": 0.2, "learning_rate": 1.964979682305135e-05, "loss": 0.019, "step": 8110 }, { "epoch": 0.2, "learning_rate": 1.9649304272872802e-05, "loss": 0.0209, "step": 8120 }, { "epoch": 0.2, "learning_rate": 1.964881172269425e-05, "loss": 0.0216, "step": 8130 }, { "epoch": 0.2, "learning_rate": 1.9648319172515703e-05, "loss": 0.0221, "step": 8140 }, { "epoch": 0.2, "learning_rate": 1.9647826622337153e-05, "loss": 0.0172, "step": 8150 }, { "epoch": 0.2, "learning_rate": 1.9647334072158605e-05, "loss": 0.0351, "step": 8160 }, { "epoch": 0.2, "learning_rate": 1.9646841521980054e-05, "loss": 0.031, "step": 8170 }, { "epoch": 0.2, "learning_rate": 1.9646348971801503e-05, "loss": 0.0224, "step": 8180 }, { "epoch": 0.2, "learning_rate": 1.9645856421622956e-05, "loss": 0.0214, "step": 8190 }, { "epoch": 0.2, "learning_rate": 1.9645363871444405e-05, "loss": 0.0211, "step": 8200 }, { "epoch": 0.2, "learning_rate": 1.9644871321265854e-05, "loss": 0.0204, "step": 8210 }, { "epoch": 0.2, "learning_rate": 1.9644378771087307e-05, "loss": 0.0209, "step": 8220 }, { "epoch": 0.2, "learning_rate": 1.9643886220908756e-05, "loss": 0.0311, "step": 8230 }, { "epoch": 0.2, "learning_rate": 1.9643393670730205e-05, "loss": 0.0221, "step": 8240 }, { "epoch": 0.2, "learning_rate": 1.9642901120551658e-05, "loss": 0.0192, "step": 8250 }, { "epoch": 0.2, "learning_rate": 1.9642408570373107e-05, "loss": 0.0284, "step": 8260 }, { "epoch": 0.2, "learning_rate": 1.964191602019456e-05, "loss": 0.0253, "step": 8270 }, { "epoch": 0.2, "learning_rate": 1.964142347001601e-05, "loss": 0.027, "step": 8280 }, { "epoch": 0.2, "learning_rate": 1.964093091983746e-05, "loss": 0.0214, "step": 8290 }, { "epoch": 0.2, "learning_rate": 1.964043836965891e-05, "loss": 0.0266, "step": 8300 }, { "epoch": 0.2, "learning_rate": 1.9639945819480363e-05, "loss": 0.0314, "step": 8310 }, { "epoch": 0.2, "learning_rate": 1.9639453269301812e-05, "loss": 0.0276, "step": 8320 }, { "epoch": 0.2, "learning_rate": 1.963896071912326e-05, "loss": 0.0212, "step": 8330 }, { "epoch": 0.2, "learning_rate": 1.9638468168944713e-05, "loss": 0.0318, "step": 8340 }, { "epoch": 0.21, "learning_rate": 1.9637975618766163e-05, "loss": 0.0389, "step": 8350 }, { "epoch": 0.21, "learning_rate": 1.9637483068587615e-05, "loss": 0.0206, "step": 8360 }, { "epoch": 0.21, "learning_rate": 1.9636990518409064e-05, "loss": 0.0217, "step": 8370 }, { "epoch": 0.21, "learning_rate": 1.9636497968230513e-05, "loss": 0.0202, "step": 8380 }, { "epoch": 0.21, "learning_rate": 1.9636005418051966e-05, "loss": 0.0181, "step": 8390 }, { "epoch": 0.21, "learning_rate": 1.9635512867873415e-05, "loss": 0.034, "step": 8400 }, { "epoch": 0.21, "learning_rate": 1.9635020317694868e-05, "loss": 0.0211, "step": 8410 }, { "epoch": 0.21, "learning_rate": 1.9634527767516317e-05, "loss": 0.0217, "step": 8420 }, { "epoch": 0.21, "learning_rate": 1.963403521733777e-05, "loss": 0.0196, "step": 8430 }, { "epoch": 0.21, "learning_rate": 1.963354266715922e-05, "loss": 0.0211, "step": 8440 }, { "epoch": 0.21, "learning_rate": 1.963305011698067e-05, "loss": 0.0394, "step": 8450 }, { "epoch": 0.21, "learning_rate": 1.963255756680212e-05, "loss": 0.0219, "step": 8460 }, { "epoch": 0.21, "learning_rate": 1.963206501662357e-05, "loss": 0.0212, "step": 8470 }, { "epoch": 0.21, "learning_rate": 1.9631572466445022e-05, "loss": 0.0208, "step": 8480 }, { "epoch": 0.21, "learning_rate": 1.963107991626647e-05, "loss": 0.0251, "step": 8490 }, { "epoch": 0.21, "learning_rate": 1.963058736608792e-05, "loss": 0.0207, "step": 8500 }, { "epoch": 0.21, "learning_rate": 1.9630094815909373e-05, "loss": 0.021, "step": 8510 }, { "epoch": 0.21, "learning_rate": 1.9629602265730822e-05, "loss": 0.0213, "step": 8520 }, { "epoch": 0.21, "learning_rate": 1.962910971555227e-05, "loss": 0.0187, "step": 8530 }, { "epoch": 0.21, "learning_rate": 1.9628617165373724e-05, "loss": 0.0233, "step": 8540 }, { "epoch": 0.21, "learning_rate": 1.9628124615195173e-05, "loss": 0.0304, "step": 8550 }, { "epoch": 0.21, "learning_rate": 1.9627632065016625e-05, "loss": 0.0222, "step": 8560 }, { "epoch": 0.21, "learning_rate": 1.9627139514838078e-05, "loss": 0.0235, "step": 8570 }, { "epoch": 0.21, "learning_rate": 1.9626646964659527e-05, "loss": 0.0193, "step": 8580 }, { "epoch": 0.21, "learning_rate": 1.962615441448098e-05, "loss": 0.0181, "step": 8590 }, { "epoch": 0.21, "learning_rate": 1.962566186430243e-05, "loss": 0.0468, "step": 8600 }, { "epoch": 0.21, "learning_rate": 1.9625169314123878e-05, "loss": 0.02, "step": 8610 }, { "epoch": 0.21, "learning_rate": 1.962467676394533e-05, "loss": 0.0209, "step": 8620 }, { "epoch": 0.21, "learning_rate": 1.962418421376678e-05, "loss": 0.0224, "step": 8630 }, { "epoch": 0.21, "learning_rate": 1.962369166358823e-05, "loss": 0.0233, "step": 8640 }, { "epoch": 0.21, "learning_rate": 1.962319911340968e-05, "loss": 0.0222, "step": 8650 }, { "epoch": 0.21, "learning_rate": 1.962270656323113e-05, "loss": 0.0202, "step": 8660 }, { "epoch": 0.21, "learning_rate": 1.962221401305258e-05, "loss": 0.0205, "step": 8670 }, { "epoch": 0.21, "learning_rate": 1.9621721462874032e-05, "loss": 0.0219, "step": 8680 }, { "epoch": 0.21, "learning_rate": 1.962122891269548e-05, "loss": 0.0191, "step": 8690 }, { "epoch": 0.21, "learning_rate": 1.9620736362516934e-05, "loss": 0.0192, "step": 8700 }, { "epoch": 0.21, "learning_rate": 1.9620243812338383e-05, "loss": 0.0218, "step": 8710 }, { "epoch": 0.21, "learning_rate": 1.9619751262159835e-05, "loss": 0.022, "step": 8720 }, { "epoch": 0.21, "learning_rate": 1.9619258711981284e-05, "loss": 0.0232, "step": 8730 }, { "epoch": 0.21, "learning_rate": 1.9618766161802737e-05, "loss": 0.0212, "step": 8740 }, { "epoch": 0.21, "learning_rate": 1.9618273611624186e-05, "loss": 0.0217, "step": 8750 }, { "epoch": 0.22, "learning_rate": 1.9617781061445635e-05, "loss": 0.0235, "step": 8760 }, { "epoch": 0.22, "learning_rate": 1.9617288511267088e-05, "loss": 0.0288, "step": 8770 }, { "epoch": 0.22, "learning_rate": 1.9616795961088537e-05, "loss": 0.0269, "step": 8780 }, { "epoch": 0.22, "learning_rate": 1.9616303410909986e-05, "loss": 0.0203, "step": 8790 }, { "epoch": 0.22, "learning_rate": 1.961581086073144e-05, "loss": 0.025, "step": 8800 }, { "epoch": 0.22, "learning_rate": 1.9615318310552888e-05, "loss": 0.0338, "step": 8810 }, { "epoch": 0.22, "learning_rate": 1.961482576037434e-05, "loss": 0.0239, "step": 8820 }, { "epoch": 0.22, "learning_rate": 1.961433321019579e-05, "loss": 0.0189, "step": 8830 }, { "epoch": 0.22, "learning_rate": 1.9613840660017242e-05, "loss": 0.0365, "step": 8840 }, { "epoch": 0.22, "learning_rate": 1.961334810983869e-05, "loss": 0.0204, "step": 8850 }, { "epoch": 0.22, "learning_rate": 1.9612855559660144e-05, "loss": 0.0207, "step": 8860 }, { "epoch": 0.22, "learning_rate": 1.9612363009481593e-05, "loss": 0.0191, "step": 8870 }, { "epoch": 0.22, "learning_rate": 1.9611870459303045e-05, "loss": 0.0204, "step": 8880 }, { "epoch": 0.22, "learning_rate": 1.9611377909124495e-05, "loss": 0.0208, "step": 8890 }, { "epoch": 0.22, "learning_rate": 1.9610885358945944e-05, "loss": 0.0262, "step": 8900 }, { "epoch": 0.22, "learning_rate": 1.9610392808767396e-05, "loss": 0.0189, "step": 8910 }, { "epoch": 0.22, "learning_rate": 1.9609900258588845e-05, "loss": 0.024, "step": 8920 }, { "epoch": 0.22, "learning_rate": 1.9609407708410295e-05, "loss": 0.0262, "step": 8930 }, { "epoch": 0.22, "learning_rate": 1.9608915158231747e-05, "loss": 0.03, "step": 8940 }, { "epoch": 0.22, "learning_rate": 1.9608422608053196e-05, "loss": 0.0198, "step": 8950 }, { "epoch": 0.22, "learning_rate": 1.9607930057874645e-05, "loss": 0.0189, "step": 8960 }, { "epoch": 0.22, "learning_rate": 1.9607437507696098e-05, "loss": 0.0215, "step": 8970 }, { "epoch": 0.22, "learning_rate": 1.9606944957517547e-05, "loss": 0.0192, "step": 8980 }, { "epoch": 0.22, "learning_rate": 1.9606452407339e-05, "loss": 0.0183, "step": 8990 }, { "epoch": 0.22, "learning_rate": 1.960595985716045e-05, "loss": 0.0233, "step": 9000 }, { "epoch": 0.22, "learning_rate": 1.96054673069819e-05, "loss": 0.02, "step": 9010 }, { "epoch": 0.22, "learning_rate": 1.960497475680335e-05, "loss": 0.0198, "step": 9020 }, { "epoch": 0.22, "learning_rate": 1.9604482206624803e-05, "loss": 0.0206, "step": 9030 }, { "epoch": 0.22, "learning_rate": 1.9603989656446252e-05, "loss": 0.0219, "step": 9040 }, { "epoch": 0.22, "learning_rate": 1.96034971062677e-05, "loss": 0.0613, "step": 9050 }, { "epoch": 0.22, "learning_rate": 1.9603004556089154e-05, "loss": 0.0191, "step": 9060 }, { "epoch": 0.22, "learning_rate": 1.9602512005910603e-05, "loss": 0.0231, "step": 9070 }, { "epoch": 0.22, "learning_rate": 1.9602019455732055e-05, "loss": 0.0192, "step": 9080 }, { "epoch": 0.22, "learning_rate": 1.9601526905553505e-05, "loss": 0.0189, "step": 9090 }, { "epoch": 0.22, "learning_rate": 1.9601034355374954e-05, "loss": 0.0375, "step": 9100 }, { "epoch": 0.22, "learning_rate": 1.9600541805196406e-05, "loss": 0.021, "step": 9110 }, { "epoch": 0.22, "learning_rate": 1.9600049255017855e-05, "loss": 0.0241, "step": 9120 }, { "epoch": 0.22, "learning_rate": 1.9599556704839308e-05, "loss": 0.019, "step": 9130 }, { "epoch": 0.22, "learning_rate": 1.9599064154660757e-05, "loss": 0.0188, "step": 9140 }, { "epoch": 0.22, "learning_rate": 1.959857160448221e-05, "loss": 0.0219, "step": 9150 }, { "epoch": 0.23, "learning_rate": 1.959807905430366e-05, "loss": 0.0294, "step": 9160 }, { "epoch": 0.23, "learning_rate": 1.959758650412511e-05, "loss": 0.0195, "step": 9170 }, { "epoch": 0.23, "learning_rate": 1.959709395394656e-05, "loss": 0.0195, "step": 9180 }, { "epoch": 0.23, "learning_rate": 1.959660140376801e-05, "loss": 0.018, "step": 9190 }, { "epoch": 0.23, "learning_rate": 1.9596108853589462e-05, "loss": 0.0177, "step": 9200 }, { "epoch": 0.23, "learning_rate": 1.959561630341091e-05, "loss": 0.0193, "step": 9210 }, { "epoch": 0.23, "learning_rate": 1.959512375323236e-05, "loss": 0.0206, "step": 9220 }, { "epoch": 0.23, "learning_rate": 1.9594631203053813e-05, "loss": 0.0207, "step": 9230 }, { "epoch": 0.23, "learning_rate": 1.9594138652875262e-05, "loss": 0.0187, "step": 9240 }, { "epoch": 0.23, "learning_rate": 1.959364610269671e-05, "loss": 0.0203, "step": 9250 }, { "epoch": 0.23, "learning_rate": 1.9593153552518164e-05, "loss": 0.0209, "step": 9260 }, { "epoch": 0.23, "learning_rate": 1.9592661002339613e-05, "loss": 0.0204, "step": 9270 }, { "epoch": 0.23, "learning_rate": 1.9592168452161066e-05, "loss": 0.0191, "step": 9280 }, { "epoch": 0.23, "learning_rate": 1.9591675901982518e-05, "loss": 0.0198, "step": 9290 }, { "epoch": 0.23, "learning_rate": 1.9591183351803967e-05, "loss": 0.0204, "step": 9300 }, { "epoch": 0.23, "learning_rate": 1.959069080162542e-05, "loss": 0.023, "step": 9310 }, { "epoch": 0.23, "learning_rate": 1.959019825144687e-05, "loss": 0.0313, "step": 9320 }, { "epoch": 0.23, "learning_rate": 1.9589705701268318e-05, "loss": 0.0222, "step": 9330 }, { "epoch": 0.23, "learning_rate": 1.958921315108977e-05, "loss": 0.0192, "step": 9340 }, { "epoch": 0.23, "learning_rate": 1.958872060091122e-05, "loss": 0.0225, "step": 9350 }, { "epoch": 0.23, "learning_rate": 1.958822805073267e-05, "loss": 0.0197, "step": 9360 }, { "epoch": 0.23, "learning_rate": 1.958773550055412e-05, "loss": 0.0239, "step": 9370 }, { "epoch": 0.23, "learning_rate": 1.958724295037557e-05, "loss": 0.0172, "step": 9380 }, { "epoch": 0.23, "learning_rate": 1.958675040019702e-05, "loss": 0.0212, "step": 9390 }, { "epoch": 0.23, "learning_rate": 1.9586257850018472e-05, "loss": 0.0174, "step": 9400 }, { "epoch": 0.23, "learning_rate": 1.958576529983992e-05, "loss": 0.0298, "step": 9410 }, { "epoch": 0.23, "learning_rate": 1.9585272749661374e-05, "loss": 0.0252, "step": 9420 }, { "epoch": 0.23, "learning_rate": 1.9584780199482823e-05, "loss": 0.0317, "step": 9430 }, { "epoch": 0.23, "learning_rate": 1.9584287649304276e-05, "loss": 0.0212, "step": 9440 }, { "epoch": 0.23, "learning_rate": 1.9583795099125725e-05, "loss": 0.0244, "step": 9450 }, { "epoch": 0.23, "learning_rate": 1.9583302548947177e-05, "loss": 0.0181, "step": 9460 }, { "epoch": 0.23, "learning_rate": 1.9582809998768626e-05, "loss": 0.03, "step": 9470 }, { "epoch": 0.23, "learning_rate": 1.9582317448590076e-05, "loss": 0.0204, "step": 9480 }, { "epoch": 0.23, "learning_rate": 1.9581824898411528e-05, "loss": 0.0166, "step": 9490 }, { "epoch": 0.23, "learning_rate": 1.9581332348232977e-05, "loss": 0.0202, "step": 9500 }, { "epoch": 0.23, "learning_rate": 1.9580839798054426e-05, "loss": 0.0209, "step": 9510 }, { "epoch": 0.23, "learning_rate": 1.958034724787588e-05, "loss": 0.0184, "step": 9520 }, { "epoch": 0.23, "learning_rate": 1.9579854697697328e-05, "loss": 0.0176, "step": 9530 }, { "epoch": 0.23, "learning_rate": 1.957936214751878e-05, "loss": 0.0201, "step": 9540 }, { "epoch": 0.23, "learning_rate": 1.957886959734023e-05, "loss": 0.0189, "step": 9550 }, { "epoch": 0.23, "learning_rate": 1.9578377047161682e-05, "loss": 0.0218, "step": 9560 }, { "epoch": 0.24, "learning_rate": 1.957788449698313e-05, "loss": 0.0219, "step": 9570 }, { "epoch": 0.24, "learning_rate": 1.9577391946804584e-05, "loss": 0.0197, "step": 9580 }, { "epoch": 0.24, "learning_rate": 1.9576899396626033e-05, "loss": 0.0219, "step": 9590 }, { "epoch": 0.24, "learning_rate": 1.9576406846447486e-05, "loss": 0.0199, "step": 9600 }, { "epoch": 0.24, "learning_rate": 1.9575914296268935e-05, "loss": 0.0176, "step": 9610 }, { "epoch": 0.24, "learning_rate": 1.9575421746090384e-05, "loss": 0.0188, "step": 9620 }, { "epoch": 0.24, "learning_rate": 1.9574929195911837e-05, "loss": 0.0294, "step": 9630 }, { "epoch": 0.24, "learning_rate": 1.9574436645733286e-05, "loss": 0.0207, "step": 9640 }, { "epoch": 0.24, "learning_rate": 1.9573944095554735e-05, "loss": 0.0199, "step": 9650 }, { "epoch": 0.24, "learning_rate": 1.9573451545376187e-05, "loss": 0.0205, "step": 9660 }, { "epoch": 0.24, "learning_rate": 1.9572958995197637e-05, "loss": 0.0166, "step": 9670 }, { "epoch": 0.24, "learning_rate": 1.9572466445019086e-05, "loss": 0.0229, "step": 9680 }, { "epoch": 0.24, "learning_rate": 1.9571973894840538e-05, "loss": 0.0197, "step": 9690 }, { "epoch": 0.24, "learning_rate": 1.9571481344661987e-05, "loss": 0.0192, "step": 9700 }, { "epoch": 0.24, "learning_rate": 1.957098879448344e-05, "loss": 0.0313, "step": 9710 }, { "epoch": 0.24, "learning_rate": 1.957049624430489e-05, "loss": 0.0207, "step": 9720 }, { "epoch": 0.24, "learning_rate": 1.957000369412634e-05, "loss": 0.0221, "step": 9730 }, { "epoch": 0.24, "learning_rate": 1.956951114394779e-05, "loss": 0.0197, "step": 9740 }, { "epoch": 0.24, "learning_rate": 1.9569018593769243e-05, "loss": 0.0201, "step": 9750 }, { "epoch": 0.24, "learning_rate": 1.9568526043590692e-05, "loss": 0.0253, "step": 9760 }, { "epoch": 0.24, "learning_rate": 1.956803349341214e-05, "loss": 0.0281, "step": 9770 }, { "epoch": 0.24, "learning_rate": 1.9567540943233594e-05, "loss": 0.0203, "step": 9780 }, { "epoch": 0.24, "learning_rate": 1.9567048393055043e-05, "loss": 0.0193, "step": 9790 }, { "epoch": 0.24, "learning_rate": 1.9566555842876496e-05, "loss": 0.0221, "step": 9800 }, { "epoch": 0.24, "learning_rate": 1.9566063292697945e-05, "loss": 0.0214, "step": 9810 }, { "epoch": 0.24, "learning_rate": 1.9565570742519394e-05, "loss": 0.032, "step": 9820 }, { "epoch": 0.24, "learning_rate": 1.9565078192340847e-05, "loss": 0.029, "step": 9830 }, { "epoch": 0.24, "learning_rate": 1.9564585642162296e-05, "loss": 0.0296, "step": 9840 }, { "epoch": 0.24, "learning_rate": 1.956409309198375e-05, "loss": 0.028, "step": 9850 }, { "epoch": 0.24, "learning_rate": 1.9563600541805197e-05, "loss": 0.0191, "step": 9860 }, { "epoch": 0.24, "learning_rate": 1.956310799162665e-05, "loss": 0.0197, "step": 9870 }, { "epoch": 0.24, "learning_rate": 1.95626154414481e-05, "loss": 0.0227, "step": 9880 }, { "epoch": 0.24, "learning_rate": 1.9562122891269552e-05, "loss": 0.02, "step": 9890 }, { "epoch": 0.24, "learning_rate": 1.9561630341091e-05, "loss": 0.0329, "step": 9900 }, { "epoch": 0.24, "learning_rate": 1.956113779091245e-05, "loss": 0.0277, "step": 9910 }, { "epoch": 0.24, "learning_rate": 1.9560645240733903e-05, "loss": 0.0251, "step": 9920 }, { "epoch": 0.24, "learning_rate": 1.9560152690555352e-05, "loss": 0.0205, "step": 9930 }, { "epoch": 0.24, "learning_rate": 1.95596601403768e-05, "loss": 0.0238, "step": 9940 }, { "epoch": 0.24, "learning_rate": 1.9559167590198253e-05, "loss": 0.0188, "step": 9950 }, { "epoch": 0.24, "learning_rate": 1.9558675040019703e-05, "loss": 0.0209, "step": 9960 }, { "epoch": 0.24, "learning_rate": 1.955818248984115e-05, "loss": 0.0192, "step": 9970 }, { "epoch": 0.25, "learning_rate": 1.9557689939662604e-05, "loss": 0.0194, "step": 9980 }, { "epoch": 0.25, "learning_rate": 1.9557197389484053e-05, "loss": 0.019, "step": 9990 }, { "epoch": 0.25, "learning_rate": 1.9556704839305506e-05, "loss": 0.0184, "step": 10000 }, { "epoch": 0.25, "eval_bleu": 96.8199, "eval_gen_len_ratio": 1.0001, "eval_loss": 0.012881733477115631, "eval_runtime": 254.3308, "eval_samples_per_second": 7.337, "eval_steps_per_second": 0.232, "step": 10000 }, { "epoch": 0.25, "learning_rate": 1.955621228912696e-05, "loss": 0.0211, "step": 10010 }, { "epoch": 0.25, "learning_rate": 1.9555719738948408e-05, "loss": 0.0197, "step": 10020 }, { "epoch": 0.25, "learning_rate": 1.955522718876986e-05, "loss": 0.0193, "step": 10030 }, { "epoch": 0.25, "learning_rate": 1.955473463859131e-05, "loss": 0.0196, "step": 10040 }, { "epoch": 0.25, "learning_rate": 1.955424208841276e-05, "loss": 0.0191, "step": 10050 }, { "epoch": 0.25, "learning_rate": 1.955374953823421e-05, "loss": 0.0196, "step": 10060 }, { "epoch": 0.25, "learning_rate": 1.955325698805566e-05, "loss": 0.0217, "step": 10070 }, { "epoch": 0.25, "learning_rate": 1.955276443787711e-05, "loss": 0.0199, "step": 10080 }, { "epoch": 0.25, "learning_rate": 1.9552271887698562e-05, "loss": 0.0187, "step": 10090 }, { "epoch": 0.25, "learning_rate": 1.955177933752001e-05, "loss": 0.0212, "step": 10100 }, { "epoch": 0.25, "learning_rate": 1.955128678734146e-05, "loss": 0.0187, "step": 10110 }, { "epoch": 0.25, "learning_rate": 1.9550794237162913e-05, "loss": 0.0214, "step": 10120 }, { "epoch": 0.25, "learning_rate": 1.9550301686984362e-05, "loss": 0.0204, "step": 10130 }, { "epoch": 0.25, "learning_rate": 1.9549809136805814e-05, "loss": 0.0204, "step": 10140 }, { "epoch": 0.25, "learning_rate": 1.9549316586627263e-05, "loss": 0.0172, "step": 10150 }, { "epoch": 0.25, "learning_rate": 1.9548824036448716e-05, "loss": 0.0208, "step": 10160 }, { "epoch": 0.25, "learning_rate": 1.9548331486270165e-05, "loss": 0.0223, "step": 10170 }, { "epoch": 0.25, "learning_rate": 1.9547838936091618e-05, "loss": 0.0179, "step": 10180 }, { "epoch": 0.25, "learning_rate": 1.9547346385913067e-05, "loss": 0.0201, "step": 10190 }, { "epoch": 0.25, "learning_rate": 1.9546853835734516e-05, "loss": 0.0167, "step": 10200 }, { "epoch": 0.25, "learning_rate": 1.954636128555597e-05, "loss": 0.019, "step": 10210 }, { "epoch": 0.25, "learning_rate": 1.9545868735377418e-05, "loss": 0.0195, "step": 10220 }, { "epoch": 0.25, "learning_rate": 1.9545376185198867e-05, "loss": 0.0189, "step": 10230 }, { "epoch": 0.25, "learning_rate": 1.954488363502032e-05, "loss": 0.0282, "step": 10240 }, { "epoch": 0.25, "learning_rate": 1.954439108484177e-05, "loss": 0.0202, "step": 10250 }, { "epoch": 0.25, "learning_rate": 1.954389853466322e-05, "loss": 0.0193, "step": 10260 }, { "epoch": 0.25, "learning_rate": 1.954340598448467e-05, "loss": 0.02, "step": 10270 }, { "epoch": 0.25, "learning_rate": 1.9542913434306123e-05, "loss": 0.0204, "step": 10280 }, { "epoch": 0.25, "learning_rate": 1.9542420884127572e-05, "loss": 0.0285, "step": 10290 }, { "epoch": 0.25, "learning_rate": 1.9541928333949024e-05, "loss": 0.0203, "step": 10300 }, { "epoch": 0.25, "learning_rate": 1.9541435783770474e-05, "loss": 0.0176, "step": 10310 }, { "epoch": 0.25, "learning_rate": 1.9540943233591926e-05, "loss": 0.0219, "step": 10320 }, { "epoch": 0.25, "learning_rate": 1.9540450683413375e-05, "loss": 0.0193, "step": 10330 }, { "epoch": 0.25, "learning_rate": 1.9539958133234824e-05, "loss": 0.0206, "step": 10340 }, { "epoch": 0.25, "learning_rate": 1.9539465583056277e-05, "loss": 0.0322, "step": 10350 }, { "epoch": 0.25, "learning_rate": 1.9538973032877726e-05, "loss": 0.0193, "step": 10360 }, { "epoch": 0.25, "learning_rate": 1.9538480482699175e-05, "loss": 0.0197, "step": 10370 }, { "epoch": 0.26, "learning_rate": 1.9537987932520628e-05, "loss": 0.0198, "step": 10380 }, { "epoch": 0.26, "learning_rate": 1.9537495382342077e-05, "loss": 0.0195, "step": 10390 }, { "epoch": 0.26, "learning_rate": 1.9537002832163526e-05, "loss": 0.0215, "step": 10400 }, { "epoch": 0.26, "learning_rate": 1.953651028198498e-05, "loss": 0.0195, "step": 10410 }, { "epoch": 0.26, "learning_rate": 1.9536017731806428e-05, "loss": 0.0175, "step": 10420 }, { "epoch": 0.26, "learning_rate": 1.953552518162788e-05, "loss": 0.0315, "step": 10430 }, { "epoch": 0.26, "learning_rate": 1.953503263144933e-05, "loss": 0.0222, "step": 10440 }, { "epoch": 0.26, "learning_rate": 1.9534540081270782e-05, "loss": 0.0189, "step": 10450 }, { "epoch": 0.26, "learning_rate": 1.953404753109223e-05, "loss": 0.0181, "step": 10460 }, { "epoch": 0.26, "learning_rate": 1.9533554980913684e-05, "loss": 0.0204, "step": 10470 }, { "epoch": 0.26, "learning_rate": 1.9533062430735133e-05, "loss": 0.0226, "step": 10480 }, { "epoch": 0.26, "learning_rate": 1.9532569880556582e-05, "loss": 0.0172, "step": 10490 }, { "epoch": 0.26, "learning_rate": 1.9532077330378034e-05, "loss": 0.0332, "step": 10500 }, { "epoch": 0.26, "learning_rate": 1.9531584780199484e-05, "loss": 0.0202, "step": 10510 }, { "epoch": 0.26, "learning_rate": 1.9531092230020936e-05, "loss": 0.0212, "step": 10520 }, { "epoch": 0.26, "learning_rate": 1.9530599679842385e-05, "loss": 0.0209, "step": 10530 }, { "epoch": 0.26, "learning_rate": 1.9530107129663834e-05, "loss": 0.0178, "step": 10540 }, { "epoch": 0.26, "learning_rate": 1.9529614579485287e-05, "loss": 0.0276, "step": 10550 }, { "epoch": 0.26, "learning_rate": 1.9529122029306736e-05, "loss": 0.0189, "step": 10560 }, { "epoch": 0.26, "learning_rate": 1.952862947912819e-05, "loss": 0.0187, "step": 10570 }, { "epoch": 0.26, "learning_rate": 1.9528136928949638e-05, "loss": 0.0176, "step": 10580 }, { "epoch": 0.26, "learning_rate": 1.952764437877109e-05, "loss": 0.0194, "step": 10590 }, { "epoch": 0.26, "learning_rate": 1.952715182859254e-05, "loss": 0.0222, "step": 10600 }, { "epoch": 0.26, "learning_rate": 1.9526659278413992e-05, "loss": 0.0206, "step": 10610 }, { "epoch": 0.26, "learning_rate": 1.952616672823544e-05, "loss": 0.02, "step": 10620 }, { "epoch": 0.26, "learning_rate": 1.952567417805689e-05, "loss": 0.0205, "step": 10630 }, { "epoch": 0.26, "learning_rate": 1.9525181627878343e-05, "loss": 0.0185, "step": 10640 }, { "epoch": 0.26, "learning_rate": 1.9524689077699792e-05, "loss": 0.0188, "step": 10650 }, { "epoch": 0.26, "learning_rate": 1.952419652752124e-05, "loss": 0.0201, "step": 10660 }, { "epoch": 0.26, "learning_rate": 1.9523703977342694e-05, "loss": 0.0246, "step": 10670 }, { "epoch": 0.26, "learning_rate": 1.9523211427164143e-05, "loss": 0.0235, "step": 10680 }, { "epoch": 0.26, "learning_rate": 1.9522718876985592e-05, "loss": 0.0353, "step": 10690 }, { "epoch": 0.26, "learning_rate": 1.9522226326807045e-05, "loss": 0.0201, "step": 10700 }, { "epoch": 0.26, "learning_rate": 1.9521733776628494e-05, "loss": 0.0267, "step": 10710 }, { "epoch": 0.26, "learning_rate": 1.9521241226449946e-05, "loss": 0.0197, "step": 10720 }, { "epoch": 0.26, "learning_rate": 1.95207486762714e-05, "loss": 0.0447, "step": 10730 }, { "epoch": 0.26, "learning_rate": 1.9520256126092848e-05, "loss": 0.0205, "step": 10740 }, { "epoch": 0.26, "learning_rate": 1.95197635759143e-05, "loss": 0.0268, "step": 10750 }, { "epoch": 0.26, "learning_rate": 1.951927102573575e-05, "loss": 0.0201, "step": 10760 }, { "epoch": 0.26, "learning_rate": 1.95187784755572e-05, "loss": 0.0199, "step": 10770 }, { "epoch": 0.26, "learning_rate": 1.951828592537865e-05, "loss": 0.0185, "step": 10780 }, { "epoch": 0.27, "learning_rate": 1.95177933752001e-05, "loss": 0.0188, "step": 10790 }, { "epoch": 0.27, "learning_rate": 1.951730082502155e-05, "loss": 0.0205, "step": 10800 }, { "epoch": 0.27, "learning_rate": 1.9516808274843002e-05, "loss": 0.0226, "step": 10810 }, { "epoch": 0.27, "learning_rate": 1.951631572466445e-05, "loss": 0.0179, "step": 10820 }, { "epoch": 0.27, "learning_rate": 1.95158231744859e-05, "loss": 0.0179, "step": 10830 }, { "epoch": 0.27, "learning_rate": 1.9515330624307353e-05, "loss": 0.0172, "step": 10840 }, { "epoch": 0.27, "learning_rate": 1.9514838074128802e-05, "loss": 0.0195, "step": 10850 }, { "epoch": 0.27, "learning_rate": 1.9514345523950255e-05, "loss": 0.0249, "step": 10860 }, { "epoch": 0.27, "learning_rate": 1.9513852973771704e-05, "loss": 0.0208, "step": 10870 }, { "epoch": 0.27, "learning_rate": 1.9513360423593156e-05, "loss": 0.0188, "step": 10880 }, { "epoch": 0.27, "learning_rate": 1.9512867873414605e-05, "loss": 0.025, "step": 10890 }, { "epoch": 0.27, "learning_rate": 1.9512375323236058e-05, "loss": 0.0201, "step": 10900 }, { "epoch": 0.27, "learning_rate": 1.9511882773057507e-05, "loss": 0.0271, "step": 10910 }, { "epoch": 0.27, "learning_rate": 1.9511390222878956e-05, "loss": 0.0179, "step": 10920 }, { "epoch": 0.27, "learning_rate": 1.951089767270041e-05, "loss": 0.0271, "step": 10930 }, { "epoch": 0.27, "learning_rate": 1.9510405122521858e-05, "loss": 0.0202, "step": 10940 }, { "epoch": 0.27, "learning_rate": 1.9509912572343307e-05, "loss": 0.0249, "step": 10950 }, { "epoch": 0.27, "learning_rate": 1.950942002216476e-05, "loss": 0.0196, "step": 10960 }, { "epoch": 0.27, "learning_rate": 1.950892747198621e-05, "loss": 0.0199, "step": 10970 }, { "epoch": 0.27, "learning_rate": 1.950843492180766e-05, "loss": 0.022, "step": 10980 }, { "epoch": 0.27, "learning_rate": 1.950794237162911e-05, "loss": 0.0261, "step": 10990 }, { "epoch": 0.27, "learning_rate": 1.9507449821450563e-05, "loss": 0.0228, "step": 11000 }, { "epoch": 0.27, "learning_rate": 1.9506957271272012e-05, "loss": 0.0199, "step": 11010 }, { "epoch": 0.27, "learning_rate": 1.9506464721093465e-05, "loss": 0.0179, "step": 11020 }, { "epoch": 0.27, "learning_rate": 1.9505972170914914e-05, "loss": 0.0182, "step": 11030 }, { "epoch": 0.27, "learning_rate": 1.9505479620736366e-05, "loss": 0.0182, "step": 11040 }, { "epoch": 0.27, "learning_rate": 1.9504987070557816e-05, "loss": 0.0267, "step": 11050 }, { "epoch": 0.27, "learning_rate": 1.9504494520379265e-05, "loss": 0.0356, "step": 11060 }, { "epoch": 0.27, "learning_rate": 1.9504001970200717e-05, "loss": 0.019, "step": 11070 }, { "epoch": 0.27, "learning_rate": 1.9503509420022166e-05, "loss": 0.0268, "step": 11080 }, { "epoch": 0.27, "learning_rate": 1.9503016869843616e-05, "loss": 0.0203, "step": 11090 }, { "epoch": 0.27, "learning_rate": 1.9502524319665068e-05, "loss": 0.0284, "step": 11100 }, { "epoch": 0.27, "learning_rate": 1.9502031769486517e-05, "loss": 0.0198, "step": 11110 }, { "epoch": 0.27, "learning_rate": 1.9501539219307966e-05, "loss": 0.0175, "step": 11120 }, { "epoch": 0.27, "learning_rate": 1.950104666912942e-05, "loss": 0.0196, "step": 11130 }, { "epoch": 0.27, "learning_rate": 1.9500554118950868e-05, "loss": 0.0159, "step": 11140 }, { "epoch": 0.27, "learning_rate": 1.950006156877232e-05, "loss": 0.0188, "step": 11150 }, { "epoch": 0.27, "learning_rate": 1.949956901859377e-05, "loss": 0.0191, "step": 11160 }, { "epoch": 0.27, "learning_rate": 1.9499076468415222e-05, "loss": 0.02, "step": 11170 }, { "epoch": 0.27, "learning_rate": 1.949858391823667e-05, "loss": 0.018, "step": 11180 }, { "epoch": 0.27, "learning_rate": 1.9498091368058124e-05, "loss": 0.0212, "step": 11190 }, { "epoch": 0.28, "learning_rate": 1.9497598817879573e-05, "loss": 0.0247, "step": 11200 }, { "epoch": 0.28, "learning_rate": 1.9497106267701022e-05, "loss": 0.0288, "step": 11210 }, { "epoch": 0.28, "learning_rate": 1.9496613717522475e-05, "loss": 0.0222, "step": 11220 }, { "epoch": 0.28, "learning_rate": 1.9496121167343924e-05, "loss": 0.0171, "step": 11230 }, { "epoch": 0.28, "learning_rate": 1.9495628617165377e-05, "loss": 0.0203, "step": 11240 }, { "epoch": 0.28, "learning_rate": 1.9495136066986826e-05, "loss": 0.0191, "step": 11250 }, { "epoch": 0.28, "learning_rate": 1.9494643516808275e-05, "loss": 0.0184, "step": 11260 }, { "epoch": 0.28, "learning_rate": 1.9494150966629727e-05, "loss": 0.0257, "step": 11270 }, { "epoch": 0.28, "learning_rate": 1.9493658416451176e-05, "loss": 0.0189, "step": 11280 }, { "epoch": 0.28, "learning_rate": 1.949316586627263e-05, "loss": 0.0185, "step": 11290 }, { "epoch": 0.28, "learning_rate": 1.9492673316094078e-05, "loss": 0.0188, "step": 11300 }, { "epoch": 0.28, "learning_rate": 1.949218076591553e-05, "loss": 0.0196, "step": 11310 }, { "epoch": 0.28, "learning_rate": 1.949168821573698e-05, "loss": 0.0158, "step": 11320 }, { "epoch": 0.28, "learning_rate": 1.9491195665558432e-05, "loss": 0.0185, "step": 11330 }, { "epoch": 0.28, "learning_rate": 1.949070311537988e-05, "loss": 0.0182, "step": 11340 }, { "epoch": 0.28, "learning_rate": 1.949021056520133e-05, "loss": 0.0201, "step": 11350 }, { "epoch": 0.28, "learning_rate": 1.9489718015022783e-05, "loss": 0.0195, "step": 11360 }, { "epoch": 0.28, "learning_rate": 1.9489225464844232e-05, "loss": 0.0187, "step": 11370 }, { "epoch": 0.28, "learning_rate": 1.948873291466568e-05, "loss": 0.0191, "step": 11380 }, { "epoch": 0.28, "learning_rate": 1.9488240364487134e-05, "loss": 0.0219, "step": 11390 }, { "epoch": 0.28, "learning_rate": 1.9487747814308583e-05, "loss": 0.03, "step": 11400 }, { "epoch": 0.28, "learning_rate": 1.9487255264130032e-05, "loss": 0.0355, "step": 11410 }, { "epoch": 0.28, "learning_rate": 1.9486762713951485e-05, "loss": 0.0235, "step": 11420 }, { "epoch": 0.28, "learning_rate": 1.9486270163772934e-05, "loss": 0.028, "step": 11430 }, { "epoch": 0.28, "learning_rate": 1.9485777613594387e-05, "loss": 0.0252, "step": 11440 }, { "epoch": 0.28, "learning_rate": 1.948528506341584e-05, "loss": 0.0174, "step": 11450 }, { "epoch": 0.28, "learning_rate": 1.9484792513237288e-05, "loss": 0.0203, "step": 11460 }, { "epoch": 0.28, "learning_rate": 1.948429996305874e-05, "loss": 0.0196, "step": 11470 }, { "epoch": 0.28, "learning_rate": 1.948380741288019e-05, "loss": 0.0252, "step": 11480 }, { "epoch": 0.28, "learning_rate": 1.948331486270164e-05, "loss": 0.0186, "step": 11490 }, { "epoch": 0.28, "learning_rate": 1.948282231252309e-05, "loss": 0.0165, "step": 11500 }, { "epoch": 0.28, "learning_rate": 1.948232976234454e-05, "loss": 0.0283, "step": 11510 }, { "epoch": 0.28, "learning_rate": 1.948183721216599e-05, "loss": 0.0323, "step": 11520 }, { "epoch": 0.28, "learning_rate": 1.9481344661987442e-05, "loss": 0.0293, "step": 11530 }, { "epoch": 0.28, "learning_rate": 1.948085211180889e-05, "loss": 0.017, "step": 11540 }, { "epoch": 0.28, "learning_rate": 1.948035956163034e-05, "loss": 0.0158, "step": 11550 }, { "epoch": 0.28, "learning_rate": 1.9479867011451793e-05, "loss": 0.034, "step": 11560 }, { "epoch": 0.28, "learning_rate": 1.9479374461273242e-05, "loss": 0.0206, "step": 11570 }, { "epoch": 0.28, "learning_rate": 1.9478881911094695e-05, "loss": 0.0171, "step": 11580 }, { "epoch": 0.28, "learning_rate": 1.9478389360916144e-05, "loss": 0.0202, "step": 11590 }, { "epoch": 0.28, "learning_rate": 1.9477896810737597e-05, "loss": 0.0172, "step": 11600 }, { "epoch": 0.29, "learning_rate": 1.9477404260559046e-05, "loss": 0.03, "step": 11610 }, { "epoch": 0.29, "learning_rate": 1.94769117103805e-05, "loss": 0.0186, "step": 11620 }, { "epoch": 0.29, "learning_rate": 1.9476419160201948e-05, "loss": 0.0302, "step": 11630 }, { "epoch": 0.29, "learning_rate": 1.9475926610023397e-05, "loss": 0.017, "step": 11640 }, { "epoch": 0.29, "learning_rate": 1.947543405984485e-05, "loss": 0.0194, "step": 11650 }, { "epoch": 0.29, "learning_rate": 1.94749415096663e-05, "loss": 0.0358, "step": 11660 }, { "epoch": 0.29, "learning_rate": 1.9474448959487747e-05, "loss": 0.0164, "step": 11670 }, { "epoch": 0.29, "learning_rate": 1.94739564093092e-05, "loss": 0.0178, "step": 11680 }, { "epoch": 0.29, "learning_rate": 1.947346385913065e-05, "loss": 0.0331, "step": 11690 }, { "epoch": 0.29, "learning_rate": 1.9472971308952102e-05, "loss": 0.0172, "step": 11700 }, { "epoch": 0.29, "learning_rate": 1.947247875877355e-05, "loss": 0.0182, "step": 11710 }, { "epoch": 0.29, "learning_rate": 1.9471986208595003e-05, "loss": 0.0179, "step": 11720 }, { "epoch": 0.29, "learning_rate": 1.9471493658416453e-05, "loss": 0.0179, "step": 11730 }, { "epoch": 0.29, "learning_rate": 1.9471001108237905e-05, "loss": 0.0205, "step": 11740 }, { "epoch": 0.29, "learning_rate": 1.9470508558059354e-05, "loss": 0.018, "step": 11750 }, { "epoch": 0.29, "learning_rate": 1.9470016007880807e-05, "loss": 0.0184, "step": 11760 }, { "epoch": 0.29, "learning_rate": 1.9469523457702256e-05, "loss": 0.0222, "step": 11770 }, { "epoch": 0.29, "learning_rate": 1.9469030907523705e-05, "loss": 0.0178, "step": 11780 }, { "epoch": 0.29, "learning_rate": 1.9468538357345158e-05, "loss": 0.0492, "step": 11790 }, { "epoch": 0.29, "learning_rate": 1.9468045807166607e-05, "loss": 0.0229, "step": 11800 }, { "epoch": 0.29, "learning_rate": 1.9467553256988056e-05, "loss": 0.0202, "step": 11810 }, { "epoch": 0.29, "learning_rate": 1.946706070680951e-05, "loss": 0.0195, "step": 11820 }, { "epoch": 0.29, "learning_rate": 1.9466568156630958e-05, "loss": 0.0236, "step": 11830 }, { "epoch": 0.29, "learning_rate": 1.9466075606452407e-05, "loss": 0.0262, "step": 11840 }, { "epoch": 0.29, "learning_rate": 1.946558305627386e-05, "loss": 0.0257, "step": 11850 }, { "epoch": 0.29, "learning_rate": 1.946509050609531e-05, "loss": 0.0174, "step": 11860 }, { "epoch": 0.29, "learning_rate": 1.946459795591676e-05, "loss": 0.0196, "step": 11870 }, { "epoch": 0.29, "learning_rate": 1.946410540573821e-05, "loss": 0.0192, "step": 11880 }, { "epoch": 0.29, "learning_rate": 1.9463612855559663e-05, "loss": 0.0369, "step": 11890 }, { "epoch": 0.29, "learning_rate": 1.9463120305381112e-05, "loss": 0.0187, "step": 11900 }, { "epoch": 0.29, "learning_rate": 1.9462627755202564e-05, "loss": 0.0154, "step": 11910 }, { "epoch": 0.29, "learning_rate": 1.9462135205024013e-05, "loss": 0.0212, "step": 11920 }, { "epoch": 0.29, "learning_rate": 1.9461642654845463e-05, "loss": 0.0188, "step": 11930 }, { "epoch": 0.29, "learning_rate": 1.9461150104666915e-05, "loss": 0.0277, "step": 11940 }, { "epoch": 0.29, "learning_rate": 1.9460657554488364e-05, "loss": 0.0182, "step": 11950 }, { "epoch": 0.29, "learning_rate": 1.9460165004309817e-05, "loss": 0.0204, "step": 11960 }, { "epoch": 0.29, "learning_rate": 1.9459672454131266e-05, "loss": 0.0188, "step": 11970 }, { "epoch": 0.29, "learning_rate": 1.9459179903952715e-05, "loss": 0.0196, "step": 11980 }, { "epoch": 0.29, "learning_rate": 1.9458687353774168e-05, "loss": 0.0205, "step": 11990 }, { "epoch": 0.29, "learning_rate": 1.9458194803595617e-05, "loss": 0.019, "step": 12000 }, { "epoch": 0.3, "learning_rate": 1.945770225341707e-05, "loss": 0.0189, "step": 12010 }, { "epoch": 0.3, "learning_rate": 1.945720970323852e-05, "loss": 0.0185, "step": 12020 }, { "epoch": 0.3, "learning_rate": 1.945671715305997e-05, "loss": 0.021, "step": 12030 }, { "epoch": 0.3, "learning_rate": 1.945622460288142e-05, "loss": 0.0185, "step": 12040 }, { "epoch": 0.3, "learning_rate": 1.9455732052702873e-05, "loss": 0.0198, "step": 12050 }, { "epoch": 0.3, "learning_rate": 1.9455239502524322e-05, "loss": 0.019, "step": 12060 }, { "epoch": 0.3, "learning_rate": 1.945474695234577e-05, "loss": 0.0212, "step": 12070 }, { "epoch": 0.3, "learning_rate": 1.9454254402167224e-05, "loss": 0.026, "step": 12080 }, { "epoch": 0.3, "learning_rate": 1.9453761851988673e-05, "loss": 0.0171, "step": 12090 }, { "epoch": 0.3, "learning_rate": 1.9453269301810122e-05, "loss": 0.017, "step": 12100 }, { "epoch": 0.3, "learning_rate": 1.9452776751631574e-05, "loss": 0.025, "step": 12110 }, { "epoch": 0.3, "learning_rate": 1.9452284201453024e-05, "loss": 0.025, "step": 12120 }, { "epoch": 0.3, "learning_rate": 1.9451791651274473e-05, "loss": 0.0168, "step": 12130 }, { "epoch": 0.3, "learning_rate": 1.9451299101095925e-05, "loss": 0.0219, "step": 12140 }, { "epoch": 0.3, "learning_rate": 1.9450806550917374e-05, "loss": 0.0292, "step": 12150 }, { "epoch": 0.3, "learning_rate": 1.9450314000738827e-05, "loss": 0.0254, "step": 12160 }, { "epoch": 0.3, "learning_rate": 1.944982145056028e-05, "loss": 0.0269, "step": 12170 }, { "epoch": 0.3, "learning_rate": 1.944932890038173e-05, "loss": 0.0224, "step": 12180 }, { "epoch": 0.3, "learning_rate": 1.944883635020318e-05, "loss": 0.0186, "step": 12190 }, { "epoch": 0.3, "learning_rate": 1.944834380002463e-05, "loss": 0.0189, "step": 12200 }, { "epoch": 0.3, "learning_rate": 1.944785124984608e-05, "loss": 0.0178, "step": 12210 }, { "epoch": 0.3, "learning_rate": 1.9447358699667532e-05, "loss": 0.0172, "step": 12220 }, { "epoch": 0.3, "learning_rate": 1.944686614948898e-05, "loss": 0.0187, "step": 12230 }, { "epoch": 0.3, "learning_rate": 1.944637359931043e-05, "loss": 0.0199, "step": 12240 }, { "epoch": 0.3, "learning_rate": 1.9445881049131883e-05, "loss": 0.0185, "step": 12250 }, { "epoch": 0.3, "learning_rate": 1.9445388498953332e-05, "loss": 0.0231, "step": 12260 }, { "epoch": 0.3, "learning_rate": 1.944489594877478e-05, "loss": 0.0215, "step": 12270 }, { "epoch": 0.3, "learning_rate": 1.9444403398596234e-05, "loss": 0.0191, "step": 12280 }, { "epoch": 0.3, "learning_rate": 1.9443910848417683e-05, "loss": 0.0186, "step": 12290 }, { "epoch": 0.3, "learning_rate": 1.9443418298239135e-05, "loss": 0.0334, "step": 12300 }, { "epoch": 0.3, "learning_rate": 1.9442925748060584e-05, "loss": 0.0161, "step": 12310 }, { "epoch": 0.3, "learning_rate": 1.9442433197882037e-05, "loss": 0.017, "step": 12320 }, { "epoch": 0.3, "learning_rate": 1.9441940647703486e-05, "loss": 0.0228, "step": 12330 }, { "epoch": 0.3, "learning_rate": 1.944144809752494e-05, "loss": 0.0173, "step": 12340 }, { "epoch": 0.3, "learning_rate": 1.9440955547346388e-05, "loss": 0.027, "step": 12350 }, { "epoch": 0.3, "learning_rate": 1.9440462997167837e-05, "loss": 0.0273, "step": 12360 }, { "epoch": 0.3, "learning_rate": 1.943997044698929e-05, "loss": 0.0242, "step": 12370 }, { "epoch": 0.3, "learning_rate": 1.943947789681074e-05, "loss": 0.0191, "step": 12380 }, { "epoch": 0.3, "learning_rate": 1.9438985346632188e-05, "loss": 0.0233, "step": 12390 }, { "epoch": 0.3, "learning_rate": 1.943849279645364e-05, "loss": 0.0186, "step": 12400 }, { "epoch": 0.3, "learning_rate": 1.943800024627509e-05, "loss": 0.0196, "step": 12410 }, { "epoch": 0.31, "learning_rate": 1.9437507696096542e-05, "loss": 0.0171, "step": 12420 }, { "epoch": 0.31, "learning_rate": 1.943701514591799e-05, "loss": 0.0158, "step": 12430 }, { "epoch": 0.31, "learning_rate": 1.9436522595739444e-05, "loss": 0.0172, "step": 12440 }, { "epoch": 0.31, "learning_rate": 1.9436030045560893e-05, "loss": 0.0252, "step": 12450 }, { "epoch": 0.31, "learning_rate": 1.9435537495382345e-05, "loss": 0.0266, "step": 12460 }, { "epoch": 0.31, "learning_rate": 1.9435044945203795e-05, "loss": 0.0199, "step": 12470 }, { "epoch": 0.31, "learning_rate": 1.9434552395025247e-05, "loss": 0.0533, "step": 12480 }, { "epoch": 0.31, "learning_rate": 1.9434059844846696e-05, "loss": 0.0177, "step": 12490 }, { "epoch": 0.31, "learning_rate": 1.9433567294668145e-05, "loss": 0.0173, "step": 12500 }, { "epoch": 0.31, "learning_rate": 1.9433074744489598e-05, "loss": 0.0212, "step": 12510 }, { "epoch": 0.31, "learning_rate": 1.9432582194311047e-05, "loss": 0.0175, "step": 12520 }, { "epoch": 0.31, "learning_rate": 1.9432089644132496e-05, "loss": 0.0181, "step": 12530 }, { "epoch": 0.31, "learning_rate": 1.943159709395395e-05, "loss": 0.0205, "step": 12540 }, { "epoch": 0.31, "learning_rate": 1.9431104543775398e-05, "loss": 0.0173, "step": 12550 }, { "epoch": 0.31, "learning_rate": 1.9430611993596847e-05, "loss": 0.0171, "step": 12560 }, { "epoch": 0.31, "learning_rate": 1.94301194434183e-05, "loss": 0.017, "step": 12570 }, { "epoch": 0.31, "learning_rate": 1.942962689323975e-05, "loss": 0.0174, "step": 12580 }, { "epoch": 0.31, "learning_rate": 1.94291343430612e-05, "loss": 0.016, "step": 12590 }, { "epoch": 0.31, "learning_rate": 1.942864179288265e-05, "loss": 0.0161, "step": 12600 }, { "epoch": 0.31, "learning_rate": 1.9428149242704103e-05, "loss": 0.0257, "step": 12610 }, { "epoch": 0.31, "learning_rate": 1.9427656692525552e-05, "loss": 0.014, "step": 12620 }, { "epoch": 0.31, "learning_rate": 1.9427164142347005e-05, "loss": 0.0188, "step": 12630 }, { "epoch": 0.31, "learning_rate": 1.9426671592168454e-05, "loss": 0.0172, "step": 12640 }, { "epoch": 0.31, "learning_rate": 1.9426179041989903e-05, "loss": 0.0233, "step": 12650 }, { "epoch": 0.31, "learning_rate": 1.9425686491811356e-05, "loss": 0.0226, "step": 12660 }, { "epoch": 0.31, "learning_rate": 1.9425193941632805e-05, "loss": 0.0179, "step": 12670 }, { "epoch": 0.31, "learning_rate": 1.9424701391454257e-05, "loss": 0.0296, "step": 12680 }, { "epoch": 0.31, "learning_rate": 1.9424208841275706e-05, "loss": 0.0172, "step": 12690 }, { "epoch": 0.31, "learning_rate": 1.9423716291097155e-05, "loss": 0.0216, "step": 12700 }, { "epoch": 0.31, "learning_rate": 1.9423223740918608e-05, "loss": 0.0279, "step": 12710 }, { "epoch": 0.31, "learning_rate": 1.9422731190740057e-05, "loss": 0.0183, "step": 12720 }, { "epoch": 0.31, "learning_rate": 1.942223864056151e-05, "loss": 0.0174, "step": 12730 }, { "epoch": 0.31, "learning_rate": 1.942174609038296e-05, "loss": 0.0193, "step": 12740 }, { "epoch": 0.31, "learning_rate": 1.942125354020441e-05, "loss": 0.0247, "step": 12750 }, { "epoch": 0.31, "learning_rate": 1.942076099002586e-05, "loss": 0.0195, "step": 12760 }, { "epoch": 0.31, "learning_rate": 1.9420268439847313e-05, "loss": 0.0176, "step": 12770 }, { "epoch": 0.31, "learning_rate": 1.9419775889668762e-05, "loss": 0.0177, "step": 12780 }, { "epoch": 0.31, "learning_rate": 1.941928333949021e-05, "loss": 0.0173, "step": 12790 }, { "epoch": 0.31, "learning_rate": 1.9418790789311664e-05, "loss": 0.0249, "step": 12800 }, { "epoch": 0.31, "learning_rate": 1.9418298239133113e-05, "loss": 0.0154, "step": 12810 }, { "epoch": 0.31, "learning_rate": 1.9417805688954562e-05, "loss": 0.0187, "step": 12820 }, { "epoch": 0.32, "learning_rate": 1.9417313138776015e-05, "loss": 0.0194, "step": 12830 }, { "epoch": 0.32, "learning_rate": 1.9416820588597464e-05, "loss": 0.0153, "step": 12840 }, { "epoch": 0.32, "learning_rate": 1.9416328038418913e-05, "loss": 0.0161, "step": 12850 }, { "epoch": 0.32, "learning_rate": 1.9415835488240366e-05, "loss": 0.0167, "step": 12860 }, { "epoch": 0.32, "learning_rate": 1.9415342938061815e-05, "loss": 0.017, "step": 12870 }, { "epoch": 0.32, "learning_rate": 1.9414850387883267e-05, "loss": 0.0158, "step": 12880 }, { "epoch": 0.32, "learning_rate": 1.941435783770472e-05, "loss": 0.0169, "step": 12890 }, { "epoch": 0.32, "learning_rate": 1.941386528752617e-05, "loss": 0.0201, "step": 12900 }, { "epoch": 0.32, "learning_rate": 1.941337273734762e-05, "loss": 0.0245, "step": 12910 }, { "epoch": 0.32, "learning_rate": 1.941288018716907e-05, "loss": 0.0217, "step": 12920 }, { "epoch": 0.32, "learning_rate": 1.941238763699052e-05, "loss": 0.0172, "step": 12930 }, { "epoch": 0.32, "learning_rate": 1.9411895086811972e-05, "loss": 0.0197, "step": 12940 }, { "epoch": 0.32, "learning_rate": 1.941140253663342e-05, "loss": 0.017, "step": 12950 }, { "epoch": 0.32, "learning_rate": 1.941090998645487e-05, "loss": 0.0188, "step": 12960 }, { "epoch": 0.32, "learning_rate": 1.9410417436276323e-05, "loss": 0.028, "step": 12970 }, { "epoch": 0.32, "learning_rate": 1.9409924886097772e-05, "loss": 0.0214, "step": 12980 }, { "epoch": 0.32, "learning_rate": 1.940943233591922e-05, "loss": 0.0161, "step": 12990 }, { "epoch": 0.32, "learning_rate": 1.9408939785740674e-05, "loss": 0.0195, "step": 13000 }, { "epoch": 0.32, "learning_rate": 1.9408447235562123e-05, "loss": 0.021, "step": 13010 }, { "epoch": 0.32, "learning_rate": 1.9407954685383576e-05, "loss": 0.0209, "step": 13020 }, { "epoch": 0.32, "learning_rate": 1.9407462135205025e-05, "loss": 0.0179, "step": 13030 }, { "epoch": 0.32, "learning_rate": 1.9406969585026477e-05, "loss": 0.0331, "step": 13040 }, { "epoch": 0.32, "learning_rate": 1.9406477034847927e-05, "loss": 0.0171, "step": 13050 }, { "epoch": 0.32, "learning_rate": 1.940598448466938e-05, "loss": 0.0184, "step": 13060 }, { "epoch": 0.32, "learning_rate": 1.9405491934490828e-05, "loss": 0.0211, "step": 13070 }, { "epoch": 0.32, "learning_rate": 1.9404999384312277e-05, "loss": 0.0179, "step": 13080 }, { "epoch": 0.32, "learning_rate": 1.940450683413373e-05, "loss": 0.0285, "step": 13090 }, { "epoch": 0.32, "learning_rate": 1.940401428395518e-05, "loss": 0.0181, "step": 13100 }, { "epoch": 0.32, "learning_rate": 1.9403521733776628e-05, "loss": 0.0166, "step": 13110 }, { "epoch": 0.32, "learning_rate": 1.940302918359808e-05, "loss": 0.034, "step": 13120 }, { "epoch": 0.32, "learning_rate": 1.940253663341953e-05, "loss": 0.0175, "step": 13130 }, { "epoch": 0.32, "learning_rate": 1.940204408324098e-05, "loss": 0.0165, "step": 13140 }, { "epoch": 0.32, "learning_rate": 1.940155153306243e-05, "loss": 0.0194, "step": 13150 }, { "epoch": 0.32, "learning_rate": 1.9401058982883884e-05, "loss": 0.0164, "step": 13160 }, { "epoch": 0.32, "learning_rate": 1.9400566432705333e-05, "loss": 0.0211, "step": 13170 }, { "epoch": 0.32, "learning_rate": 1.9400073882526786e-05, "loss": 0.0146, "step": 13180 }, { "epoch": 0.32, "learning_rate": 1.9399581332348235e-05, "loss": 0.0199, "step": 13190 }, { "epoch": 0.32, "learning_rate": 1.9399088782169687e-05, "loss": 0.03, "step": 13200 }, { "epoch": 0.32, "learning_rate": 1.9398596231991137e-05, "loss": 0.0175, "step": 13210 }, { "epoch": 0.32, "learning_rate": 1.9398103681812586e-05, "loss": 0.0181, "step": 13220 }, { "epoch": 0.33, "learning_rate": 1.9397611131634038e-05, "loss": 0.0269, "step": 13230 }, { "epoch": 0.33, "learning_rate": 1.9397118581455487e-05, "loss": 0.0153, "step": 13240 }, { "epoch": 0.33, "learning_rate": 1.9396626031276937e-05, "loss": 0.0189, "step": 13250 }, { "epoch": 0.33, "learning_rate": 1.939613348109839e-05, "loss": 0.0354, "step": 13260 }, { "epoch": 0.33, "learning_rate": 1.9395640930919838e-05, "loss": 0.0176, "step": 13270 }, { "epoch": 0.33, "learning_rate": 1.9395148380741287e-05, "loss": 0.0196, "step": 13280 }, { "epoch": 0.33, "learning_rate": 1.939465583056274e-05, "loss": 0.0162, "step": 13290 }, { "epoch": 0.33, "learning_rate": 1.939416328038419e-05, "loss": 0.0283, "step": 13300 }, { "epoch": 0.33, "learning_rate": 1.939367073020564e-05, "loss": 0.0174, "step": 13310 }, { "epoch": 0.33, "learning_rate": 1.939317818002709e-05, "loss": 0.0181, "step": 13320 }, { "epoch": 0.33, "learning_rate": 1.9392685629848543e-05, "loss": 0.0183, "step": 13330 }, { "epoch": 0.33, "learning_rate": 1.9392193079669992e-05, "loss": 0.0158, "step": 13340 }, { "epoch": 0.33, "learning_rate": 1.9391700529491445e-05, "loss": 0.0191, "step": 13350 }, { "epoch": 0.33, "learning_rate": 1.9391207979312894e-05, "loss": 0.0171, "step": 13360 }, { "epoch": 0.33, "learning_rate": 1.9390715429134343e-05, "loss": 0.0207, "step": 13370 }, { "epoch": 0.33, "learning_rate": 1.9390222878955796e-05, "loss": 0.0187, "step": 13380 }, { "epoch": 0.33, "learning_rate": 1.9389730328777245e-05, "loss": 0.0429, "step": 13390 }, { "epoch": 0.33, "learning_rate": 1.9389237778598698e-05, "loss": 0.0178, "step": 13400 }, { "epoch": 0.33, "learning_rate": 1.9388745228420147e-05, "loss": 0.0308, "step": 13410 }, { "epoch": 0.33, "learning_rate": 1.9388252678241596e-05, "loss": 0.0171, "step": 13420 }, { "epoch": 0.33, "learning_rate": 1.938776012806305e-05, "loss": 0.0172, "step": 13430 }, { "epoch": 0.33, "learning_rate": 1.9387267577884498e-05, "loss": 0.0194, "step": 13440 }, { "epoch": 0.33, "learning_rate": 1.938677502770595e-05, "loss": 0.0237, "step": 13450 }, { "epoch": 0.33, "learning_rate": 1.93862824775274e-05, "loss": 0.0153, "step": 13460 }, { "epoch": 0.33, "learning_rate": 1.9385789927348852e-05, "loss": 0.0258, "step": 13470 }, { "epoch": 0.33, "learning_rate": 1.93852973771703e-05, "loss": 0.0239, "step": 13480 }, { "epoch": 0.33, "learning_rate": 1.9384804826991753e-05, "loss": 0.0176, "step": 13490 }, { "epoch": 0.33, "learning_rate": 1.9384312276813203e-05, "loss": 0.024, "step": 13500 }, { "epoch": 0.33, "learning_rate": 1.9383819726634652e-05, "loss": 0.0276, "step": 13510 }, { "epoch": 0.33, "learning_rate": 1.9383327176456104e-05, "loss": 0.0185, "step": 13520 }, { "epoch": 0.33, "learning_rate": 1.9382834626277553e-05, "loss": 0.0181, "step": 13530 }, { "epoch": 0.33, "learning_rate": 1.9382342076099003e-05, "loss": 0.0253, "step": 13540 }, { "epoch": 0.33, "learning_rate": 1.9381849525920455e-05, "loss": 0.0321, "step": 13550 }, { "epoch": 0.33, "learning_rate": 1.9381356975741904e-05, "loss": 0.0189, "step": 13560 }, { "epoch": 0.33, "learning_rate": 1.9380864425563353e-05, "loss": 0.0306, "step": 13570 }, { "epoch": 0.33, "learning_rate": 1.9380371875384806e-05, "loss": 0.0184, "step": 13580 }, { "epoch": 0.33, "learning_rate": 1.9379879325206255e-05, "loss": 0.0182, "step": 13590 }, { "epoch": 0.33, "learning_rate": 1.9379386775027708e-05, "loss": 0.0235, "step": 13600 }, { "epoch": 0.33, "learning_rate": 1.937889422484916e-05, "loss": 0.0228, "step": 13610 }, { "epoch": 0.33, "learning_rate": 1.937840167467061e-05, "loss": 0.0201, "step": 13620 }, { "epoch": 0.33, "learning_rate": 1.9377909124492062e-05, "loss": 0.0187, "step": 13630 }, { "epoch": 0.34, "learning_rate": 1.937741657431351e-05, "loss": 0.0184, "step": 13640 }, { "epoch": 0.34, "learning_rate": 1.937692402413496e-05, "loss": 0.0184, "step": 13650 }, { "epoch": 0.34, "learning_rate": 1.9376431473956413e-05, "loss": 0.0268, "step": 13660 }, { "epoch": 0.34, "learning_rate": 1.9375938923777862e-05, "loss": 0.0196, "step": 13670 }, { "epoch": 0.34, "learning_rate": 1.937544637359931e-05, "loss": 0.0171, "step": 13680 }, { "epoch": 0.34, "learning_rate": 1.9374953823420764e-05, "loss": 0.0202, "step": 13690 }, { "epoch": 0.34, "learning_rate": 1.9374461273242213e-05, "loss": 0.0164, "step": 13700 }, { "epoch": 0.34, "learning_rate": 1.9373968723063662e-05, "loss": 0.0175, "step": 13710 }, { "epoch": 0.34, "learning_rate": 1.9373476172885114e-05, "loss": 0.0188, "step": 13720 }, { "epoch": 0.34, "learning_rate": 1.9372983622706563e-05, "loss": 0.0169, "step": 13730 }, { "epoch": 0.34, "learning_rate": 1.9372491072528016e-05, "loss": 0.0184, "step": 13740 }, { "epoch": 0.34, "learning_rate": 1.9371998522349465e-05, "loss": 0.0207, "step": 13750 }, { "epoch": 0.34, "learning_rate": 1.9371505972170918e-05, "loss": 0.0173, "step": 13760 }, { "epoch": 0.34, "learning_rate": 1.9371013421992367e-05, "loss": 0.0178, "step": 13770 }, { "epoch": 0.34, "learning_rate": 1.937052087181382e-05, "loss": 0.0228, "step": 13780 }, { "epoch": 0.34, "learning_rate": 1.937002832163527e-05, "loss": 0.018, "step": 13790 }, { "epoch": 0.34, "learning_rate": 1.9369535771456718e-05, "loss": 0.0167, "step": 13800 }, { "epoch": 0.34, "learning_rate": 1.936904322127817e-05, "loss": 0.0294, "step": 13810 }, { "epoch": 0.34, "learning_rate": 1.936855067109962e-05, "loss": 0.0195, "step": 13820 }, { "epoch": 0.34, "learning_rate": 1.936805812092107e-05, "loss": 0.0217, "step": 13830 }, { "epoch": 0.34, "learning_rate": 1.936756557074252e-05, "loss": 0.0273, "step": 13840 }, { "epoch": 0.34, "learning_rate": 1.936707302056397e-05, "loss": 0.0165, "step": 13850 }, { "epoch": 0.34, "learning_rate": 1.936658047038542e-05, "loss": 0.019, "step": 13860 }, { "epoch": 0.34, "learning_rate": 1.9366087920206872e-05, "loss": 0.0186, "step": 13870 }, { "epoch": 0.34, "learning_rate": 1.9365595370028324e-05, "loss": 0.0174, "step": 13880 }, { "epoch": 0.34, "learning_rate": 1.9365102819849774e-05, "loss": 0.0177, "step": 13890 }, { "epoch": 0.34, "learning_rate": 1.9364610269671226e-05, "loss": 0.0268, "step": 13900 }, { "epoch": 0.34, "learning_rate": 1.9364117719492675e-05, "loss": 0.0167, "step": 13910 }, { "epoch": 0.34, "learning_rate": 1.9363625169314128e-05, "loss": 0.0174, "step": 13920 }, { "epoch": 0.34, "learning_rate": 1.9363132619135577e-05, "loss": 0.0171, "step": 13930 }, { "epoch": 0.34, "learning_rate": 1.9362640068957026e-05, "loss": 0.0185, "step": 13940 }, { "epoch": 0.34, "learning_rate": 1.936214751877848e-05, "loss": 0.0155, "step": 13950 }, { "epoch": 0.34, "learning_rate": 1.9361654968599928e-05, "loss": 0.0165, "step": 13960 }, { "epoch": 0.34, "learning_rate": 1.9361162418421377e-05, "loss": 0.0298, "step": 13970 }, { "epoch": 0.34, "learning_rate": 1.936066986824283e-05, "loss": 0.0185, "step": 13980 }, { "epoch": 0.34, "learning_rate": 1.936017731806428e-05, "loss": 0.0196, "step": 13990 }, { "epoch": 0.34, "learning_rate": 1.9359684767885728e-05, "loss": 0.0168, "step": 14000 }, { "epoch": 0.34, "learning_rate": 1.935919221770718e-05, "loss": 0.0178, "step": 14010 }, { "epoch": 0.34, "learning_rate": 1.935869966752863e-05, "loss": 0.0177, "step": 14020 }, { "epoch": 0.34, "learning_rate": 1.9358207117350082e-05, "loss": 0.0175, "step": 14030 }, { "epoch": 0.34, "learning_rate": 1.935771456717153e-05, "loss": 0.018, "step": 14040 }, { "epoch": 0.35, "learning_rate": 1.9357222016992984e-05, "loss": 0.0161, "step": 14050 }, { "epoch": 0.35, "learning_rate": 1.9356729466814433e-05, "loss": 0.0179, "step": 14060 }, { "epoch": 0.35, "learning_rate": 1.9356236916635885e-05, "loss": 0.0316, "step": 14070 }, { "epoch": 0.35, "learning_rate": 1.9355744366457335e-05, "loss": 0.0168, "step": 14080 }, { "epoch": 0.35, "learning_rate": 1.9355251816278784e-05, "loss": 0.0208, "step": 14090 }, { "epoch": 0.35, "learning_rate": 1.9354759266100236e-05, "loss": 0.0151, "step": 14100 }, { "epoch": 0.35, "learning_rate": 1.9354266715921685e-05, "loss": 0.0138, "step": 14110 }, { "epoch": 0.35, "learning_rate": 1.9353774165743138e-05, "loss": 0.0184, "step": 14120 }, { "epoch": 0.35, "learning_rate": 1.9353281615564587e-05, "loss": 0.0162, "step": 14130 }, { "epoch": 0.35, "learning_rate": 1.9352789065386036e-05, "loss": 0.0203, "step": 14140 }, { "epoch": 0.35, "learning_rate": 1.935229651520749e-05, "loss": 0.018, "step": 14150 }, { "epoch": 0.35, "learning_rate": 1.9351803965028938e-05, "loss": 0.0263, "step": 14160 }, { "epoch": 0.35, "learning_rate": 1.935131141485039e-05, "loss": 0.0186, "step": 14170 }, { "epoch": 0.35, "learning_rate": 1.935081886467184e-05, "loss": 0.0214, "step": 14180 }, { "epoch": 0.35, "learning_rate": 1.9350326314493292e-05, "loss": 0.0197, "step": 14190 }, { "epoch": 0.35, "learning_rate": 1.934983376431474e-05, "loss": 0.0228, "step": 14200 }, { "epoch": 0.35, "learning_rate": 1.9349341214136194e-05, "loss": 0.0179, "step": 14210 }, { "epoch": 0.35, "learning_rate": 1.9348848663957643e-05, "loss": 0.0186, "step": 14220 }, { "epoch": 0.35, "learning_rate": 1.9348356113779092e-05, "loss": 0.0177, "step": 14230 }, { "epoch": 0.35, "learning_rate": 1.9347863563600545e-05, "loss": 0.0181, "step": 14240 }, { "epoch": 0.35, "learning_rate": 1.9347371013421994e-05, "loss": 0.0184, "step": 14250 }, { "epoch": 0.35, "learning_rate": 1.9346878463243443e-05, "loss": 0.0167, "step": 14260 }, { "epoch": 0.35, "learning_rate": 1.9346385913064895e-05, "loss": 0.0265, "step": 14270 }, { "epoch": 0.35, "learning_rate": 1.9345893362886345e-05, "loss": 0.0181, "step": 14280 }, { "epoch": 0.35, "learning_rate": 1.9345400812707794e-05, "loss": 0.0229, "step": 14290 }, { "epoch": 0.35, "learning_rate": 1.9344908262529246e-05, "loss": 0.0153, "step": 14300 }, { "epoch": 0.35, "learning_rate": 1.9344415712350695e-05, "loss": 0.017, "step": 14310 }, { "epoch": 0.35, "learning_rate": 1.9343923162172148e-05, "loss": 0.0162, "step": 14320 }, { "epoch": 0.35, "learning_rate": 1.93434306119936e-05, "loss": 0.0165, "step": 14330 }, { "epoch": 0.35, "learning_rate": 1.934293806181505e-05, "loss": 0.0189, "step": 14340 }, { "epoch": 0.35, "learning_rate": 1.9342445511636502e-05, "loss": 0.0181, "step": 14350 }, { "epoch": 0.35, "learning_rate": 1.934195296145795e-05, "loss": 0.0186, "step": 14360 }, { "epoch": 0.35, "learning_rate": 1.93414604112794e-05, "loss": 0.0216, "step": 14370 }, { "epoch": 0.35, "learning_rate": 1.9340967861100853e-05, "loss": 0.0169, "step": 14380 }, { "epoch": 0.35, "learning_rate": 1.9340475310922302e-05, "loss": 0.0198, "step": 14390 }, { "epoch": 0.35, "learning_rate": 1.933998276074375e-05, "loss": 0.0194, "step": 14400 }, { "epoch": 0.35, "learning_rate": 1.9339490210565204e-05, "loss": 0.0176, "step": 14410 }, { "epoch": 0.35, "learning_rate": 1.9338997660386653e-05, "loss": 0.0147, "step": 14420 }, { "epoch": 0.35, "learning_rate": 1.9338505110208102e-05, "loss": 0.0159, "step": 14430 }, { "epoch": 0.35, "learning_rate": 1.9338012560029555e-05, "loss": 0.0165, "step": 14440 }, { "epoch": 0.35, "learning_rate": 1.9337520009851004e-05, "loss": 0.0181, "step": 14450 }, { "epoch": 0.36, "learning_rate": 1.9337027459672456e-05, "loss": 0.0168, "step": 14460 }, { "epoch": 0.36, "learning_rate": 1.9336534909493906e-05, "loss": 0.0152, "step": 14470 }, { "epoch": 0.36, "learning_rate": 1.9336042359315358e-05, "loss": 0.0156, "step": 14480 }, { "epoch": 0.36, "learning_rate": 1.9335549809136807e-05, "loss": 0.0163, "step": 14490 }, { "epoch": 0.36, "learning_rate": 1.933505725895826e-05, "loss": 0.0388, "step": 14500 }, { "epoch": 0.36, "learning_rate": 1.933456470877971e-05, "loss": 0.0179, "step": 14510 }, { "epoch": 0.36, "learning_rate": 1.9334072158601158e-05, "loss": 0.0221, "step": 14520 }, { "epoch": 0.36, "learning_rate": 1.933357960842261e-05, "loss": 0.0202, "step": 14530 }, { "epoch": 0.36, "learning_rate": 1.933308705824406e-05, "loss": 0.0181, "step": 14540 }, { "epoch": 0.36, "learning_rate": 1.933259450806551e-05, "loss": 0.0183, "step": 14550 }, { "epoch": 0.36, "learning_rate": 1.933210195788696e-05, "loss": 0.02, "step": 14560 }, { "epoch": 0.36, "learning_rate": 1.933160940770841e-05, "loss": 0.0182, "step": 14570 }, { "epoch": 0.36, "learning_rate": 1.933111685752986e-05, "loss": 0.0339, "step": 14580 }, { "epoch": 0.36, "learning_rate": 1.9330624307351312e-05, "loss": 0.0173, "step": 14590 }, { "epoch": 0.36, "learning_rate": 1.9330131757172765e-05, "loss": 0.024, "step": 14600 }, { "epoch": 0.36, "learning_rate": 1.9329639206994214e-05, "loss": 0.0173, "step": 14610 }, { "epoch": 0.36, "learning_rate": 1.9329146656815666e-05, "loss": 0.0222, "step": 14620 }, { "epoch": 0.36, "learning_rate": 1.9328654106637116e-05, "loss": 0.0168, "step": 14630 }, { "epoch": 0.36, "learning_rate": 1.9328161556458568e-05, "loss": 0.0165, "step": 14640 }, { "epoch": 0.36, "learning_rate": 1.9327669006280017e-05, "loss": 0.0164, "step": 14650 }, { "epoch": 0.36, "learning_rate": 1.9327176456101466e-05, "loss": 0.016, "step": 14660 }, { "epoch": 0.36, "learning_rate": 1.932668390592292e-05, "loss": 0.0257, "step": 14670 }, { "epoch": 0.36, "learning_rate": 1.9326191355744368e-05, "loss": 0.0169, "step": 14680 }, { "epoch": 0.36, "learning_rate": 1.9325698805565817e-05, "loss": 0.0163, "step": 14690 }, { "epoch": 0.36, "learning_rate": 1.932520625538727e-05, "loss": 0.0196, "step": 14700 }, { "epoch": 0.36, "learning_rate": 1.932471370520872e-05, "loss": 0.0185, "step": 14710 }, { "epoch": 0.36, "learning_rate": 1.9324221155030168e-05, "loss": 0.0183, "step": 14720 }, { "epoch": 0.36, "learning_rate": 1.932372860485162e-05, "loss": 0.0177, "step": 14730 }, { "epoch": 0.36, "learning_rate": 1.932323605467307e-05, "loss": 0.0166, "step": 14740 }, { "epoch": 0.36, "learning_rate": 1.9322743504494522e-05, "loss": 0.0167, "step": 14750 }, { "epoch": 0.36, "learning_rate": 1.932225095431597e-05, "loss": 0.0257, "step": 14760 }, { "epoch": 0.36, "learning_rate": 1.9321758404137424e-05, "loss": 0.0162, "step": 14770 }, { "epoch": 0.36, "learning_rate": 1.9321265853958873e-05, "loss": 0.0159, "step": 14780 }, { "epoch": 0.36, "learning_rate": 1.9320773303780326e-05, "loss": 0.0207, "step": 14790 }, { "epoch": 0.36, "learning_rate": 1.9320280753601775e-05, "loss": 0.0259, "step": 14800 }, { "epoch": 0.36, "learning_rate": 1.9319788203423224e-05, "loss": 0.0194, "step": 14810 }, { "epoch": 0.36, "learning_rate": 1.9319295653244677e-05, "loss": 0.0174, "step": 14820 }, { "epoch": 0.36, "learning_rate": 1.9318803103066126e-05, "loss": 0.0201, "step": 14830 }, { "epoch": 0.36, "learning_rate": 1.9318310552887578e-05, "loss": 0.0247, "step": 14840 }, { "epoch": 0.36, "learning_rate": 1.9317818002709027e-05, "loss": 0.0204, "step": 14850 }, { "epoch": 0.37, "learning_rate": 1.9317325452530477e-05, "loss": 0.0187, "step": 14860 }, { "epoch": 0.37, "learning_rate": 1.931683290235193e-05, "loss": 0.0153, "step": 14870 }, { "epoch": 0.37, "learning_rate": 1.9316340352173378e-05, "loss": 0.0234, "step": 14880 }, { "epoch": 0.37, "learning_rate": 1.931584780199483e-05, "loss": 0.019, "step": 14890 }, { "epoch": 0.37, "learning_rate": 1.931535525181628e-05, "loss": 0.0179, "step": 14900 }, { "epoch": 0.37, "learning_rate": 1.9314862701637732e-05, "loss": 0.0158, "step": 14910 }, { "epoch": 0.37, "learning_rate": 1.931437015145918e-05, "loss": 0.0164, "step": 14920 }, { "epoch": 0.37, "learning_rate": 1.9313877601280634e-05, "loss": 0.0164, "step": 14930 }, { "epoch": 0.37, "learning_rate": 1.9313385051102083e-05, "loss": 0.0164, "step": 14940 }, { "epoch": 0.37, "learning_rate": 1.9312892500923532e-05, "loss": 0.0159, "step": 14950 }, { "epoch": 0.37, "learning_rate": 1.9312399950744985e-05, "loss": 0.0196, "step": 14960 }, { "epoch": 0.37, "learning_rate": 1.9311907400566434e-05, "loss": 0.0185, "step": 14970 }, { "epoch": 0.37, "learning_rate": 1.9311414850387883e-05, "loss": 0.0226, "step": 14980 }, { "epoch": 0.37, "learning_rate": 1.9310922300209336e-05, "loss": 0.0187, "step": 14990 }, { "epoch": 0.37, "learning_rate": 1.9310429750030785e-05, "loss": 0.0185, "step": 15000 }, { "epoch": 0.37, "eval_bleu": 97.0976, "eval_gen_len_ratio": 1.0, "eval_loss": 0.012027919292449951, "eval_runtime": 254.6626, "eval_samples_per_second": 7.327, "eval_steps_per_second": 0.232, "step": 15000 }, { "epoch": 0.37, "learning_rate": 1.9309937199852234e-05, "loss": 0.0242, "step": 15010 }, { "epoch": 0.37, "learning_rate": 1.9309444649673687e-05, "loss": 0.0165, "step": 15020 }, { "epoch": 0.37, "learning_rate": 1.9308952099495136e-05, "loss": 0.0182, "step": 15030 }, { "epoch": 0.37, "learning_rate": 1.9308459549316588e-05, "loss": 0.0234, "step": 15040 }, { "epoch": 0.37, "learning_rate": 1.930796699913804e-05, "loss": 0.0174, "step": 15050 }, { "epoch": 0.37, "learning_rate": 1.930747444895949e-05, "loss": 0.0244, "step": 15060 }, { "epoch": 0.37, "learning_rate": 1.9306981898780943e-05, "loss": 0.018, "step": 15070 }, { "epoch": 0.37, "learning_rate": 1.930648934860239e-05, "loss": 0.0164, "step": 15080 }, { "epoch": 0.37, "learning_rate": 1.930599679842384e-05, "loss": 0.0267, "step": 15090 }, { "epoch": 0.37, "learning_rate": 1.9305504248245293e-05, "loss": 0.0171, "step": 15100 }, { "epoch": 0.37, "learning_rate": 1.9305011698066742e-05, "loss": 0.0156, "step": 15110 }, { "epoch": 0.37, "learning_rate": 1.930451914788819e-05, "loss": 0.0157, "step": 15120 }, { "epoch": 0.37, "learning_rate": 1.9304026597709644e-05, "loss": 0.0233, "step": 15130 }, { "epoch": 0.37, "learning_rate": 1.9303534047531093e-05, "loss": 0.0167, "step": 15140 }, { "epoch": 0.37, "learning_rate": 1.9303041497352542e-05, "loss": 0.0164, "step": 15150 }, { "epoch": 0.37, "learning_rate": 1.9302548947173995e-05, "loss": 0.0214, "step": 15160 }, { "epoch": 0.37, "learning_rate": 1.9302056396995444e-05, "loss": 0.0188, "step": 15170 }, { "epoch": 0.37, "learning_rate": 1.9301563846816897e-05, "loss": 0.0245, "step": 15180 }, { "epoch": 0.37, "learning_rate": 1.9301071296638346e-05, "loss": 0.0148, "step": 15190 }, { "epoch": 0.37, "learning_rate": 1.93005787464598e-05, "loss": 0.0168, "step": 15200 }, { "epoch": 0.37, "learning_rate": 1.9300086196281248e-05, "loss": 0.0251, "step": 15210 }, { "epoch": 0.37, "learning_rate": 1.92995936461027e-05, "loss": 0.0155, "step": 15220 }, { "epoch": 0.37, "learning_rate": 1.929910109592415e-05, "loss": 0.0196, "step": 15230 }, { "epoch": 0.37, "learning_rate": 1.92986085457456e-05, "loss": 0.0152, "step": 15240 }, { "epoch": 0.37, "learning_rate": 1.929811599556705e-05, "loss": 0.0287, "step": 15250 }, { "epoch": 0.37, "learning_rate": 1.92976234453885e-05, "loss": 0.0151, "step": 15260 }, { "epoch": 0.38, "learning_rate": 1.929713089520995e-05, "loss": 0.0165, "step": 15270 }, { "epoch": 0.38, "learning_rate": 1.9296638345031402e-05, "loss": 0.0179, "step": 15280 }, { "epoch": 0.38, "learning_rate": 1.929614579485285e-05, "loss": 0.0185, "step": 15290 }, { "epoch": 0.38, "learning_rate": 1.92956532446743e-05, "loss": 0.0182, "step": 15300 }, { "epoch": 0.38, "learning_rate": 1.9295160694495753e-05, "loss": 0.0249, "step": 15310 }, { "epoch": 0.38, "learning_rate": 1.9294668144317205e-05, "loss": 0.016, "step": 15320 }, { "epoch": 0.38, "learning_rate": 1.9294175594138654e-05, "loss": 0.0263, "step": 15330 }, { "epoch": 0.38, "learning_rate": 1.9293683043960107e-05, "loss": 0.0204, "step": 15340 }, { "epoch": 0.38, "learning_rate": 1.9293190493781556e-05, "loss": 0.0143, "step": 15350 }, { "epoch": 0.38, "learning_rate": 1.929269794360301e-05, "loss": 0.017, "step": 15360 }, { "epoch": 0.38, "learning_rate": 1.9292205393424458e-05, "loss": 0.0237, "step": 15370 }, { "epoch": 0.38, "learning_rate": 1.9291712843245907e-05, "loss": 0.0162, "step": 15380 }, { "epoch": 0.38, "learning_rate": 1.929122029306736e-05, "loss": 0.0223, "step": 15390 }, { "epoch": 0.38, "learning_rate": 1.929072774288881e-05, "loss": 0.0194, "step": 15400 }, { "epoch": 0.38, "learning_rate": 1.9290235192710258e-05, "loss": 0.03, "step": 15410 }, { "epoch": 0.38, "learning_rate": 1.928974264253171e-05, "loss": 0.019, "step": 15420 }, { "epoch": 0.38, "learning_rate": 1.928925009235316e-05, "loss": 0.018, "step": 15430 }, { "epoch": 0.38, "learning_rate": 1.928875754217461e-05, "loss": 0.0166, "step": 15440 }, { "epoch": 0.38, "learning_rate": 1.928826499199606e-05, "loss": 0.0261, "step": 15450 }, { "epoch": 0.38, "learning_rate": 1.928777244181751e-05, "loss": 0.0237, "step": 15460 }, { "epoch": 0.38, "learning_rate": 1.9287279891638963e-05, "loss": 0.019, "step": 15470 }, { "epoch": 0.38, "learning_rate": 1.9286787341460412e-05, "loss": 0.0274, "step": 15480 }, { "epoch": 0.38, "learning_rate": 1.9286294791281864e-05, "loss": 0.0158, "step": 15490 }, { "epoch": 0.38, "learning_rate": 1.9285802241103314e-05, "loss": 0.0185, "step": 15500 }, { "epoch": 0.38, "learning_rate": 1.9285309690924766e-05, "loss": 0.0143, "step": 15510 }, { "epoch": 0.38, "learning_rate": 1.9284817140746215e-05, "loss": 0.0176, "step": 15520 }, { "epoch": 0.38, "learning_rate": 1.9284324590567664e-05, "loss": 0.0203, "step": 15530 }, { "epoch": 0.38, "learning_rate": 1.9283832040389117e-05, "loss": 0.0311, "step": 15540 }, { "epoch": 0.38, "learning_rate": 1.9283339490210566e-05, "loss": 0.0164, "step": 15550 }, { "epoch": 0.38, "learning_rate": 1.928284694003202e-05, "loss": 0.0209, "step": 15560 }, { "epoch": 0.38, "learning_rate": 1.9282354389853468e-05, "loss": 0.0169, "step": 15570 }, { "epoch": 0.38, "learning_rate": 1.9281861839674917e-05, "loss": 0.0184, "step": 15580 }, { "epoch": 0.38, "learning_rate": 1.928136928949637e-05, "loss": 0.0158, "step": 15590 }, { "epoch": 0.38, "learning_rate": 1.928087673931782e-05, "loss": 0.0194, "step": 15600 }, { "epoch": 0.38, "learning_rate": 1.928038418913927e-05, "loss": 0.0186, "step": 15610 }, { "epoch": 0.38, "learning_rate": 1.927989163896072e-05, "loss": 0.0154, "step": 15620 }, { "epoch": 0.38, "learning_rate": 1.9279399088782173e-05, "loss": 0.0187, "step": 15630 }, { "epoch": 0.38, "learning_rate": 1.9278906538603622e-05, "loss": 0.0167, "step": 15640 }, { "epoch": 0.38, "learning_rate": 1.9278413988425074e-05, "loss": 0.0185, "step": 15650 }, { "epoch": 0.38, "learning_rate": 1.9277921438246524e-05, "loss": 0.0191, "step": 15660 }, { "epoch": 0.38, "learning_rate": 1.9277428888067973e-05, "loss": 0.0283, "step": 15670 }, { "epoch": 0.39, "learning_rate": 1.9276936337889425e-05, "loss": 0.0167, "step": 15680 }, { "epoch": 0.39, "learning_rate": 1.9276443787710874e-05, "loss": 0.0174, "step": 15690 }, { "epoch": 0.39, "learning_rate": 1.9275951237532324e-05, "loss": 0.015, "step": 15700 }, { "epoch": 0.39, "learning_rate": 1.9275458687353776e-05, "loss": 0.0216, "step": 15710 }, { "epoch": 0.39, "learning_rate": 1.9274966137175225e-05, "loss": 0.0176, "step": 15720 }, { "epoch": 0.39, "learning_rate": 1.9274473586996674e-05, "loss": 0.0156, "step": 15730 }, { "epoch": 0.39, "learning_rate": 1.9273981036818127e-05, "loss": 0.0235, "step": 15740 }, { "epoch": 0.39, "learning_rate": 1.9273488486639576e-05, "loss": 0.0181, "step": 15750 }, { "epoch": 0.39, "learning_rate": 1.927299593646103e-05, "loss": 0.0157, "step": 15760 }, { "epoch": 0.39, "learning_rate": 1.927250338628248e-05, "loss": 0.0176, "step": 15770 }, { "epoch": 0.39, "learning_rate": 1.927201083610393e-05, "loss": 0.0158, "step": 15780 }, { "epoch": 0.39, "learning_rate": 1.9271518285925383e-05, "loss": 0.0207, "step": 15790 }, { "epoch": 0.39, "learning_rate": 1.9271025735746832e-05, "loss": 0.0205, "step": 15800 }, { "epoch": 0.39, "learning_rate": 1.927053318556828e-05, "loss": 0.033, "step": 15810 }, { "epoch": 0.39, "learning_rate": 1.9270040635389734e-05, "loss": 0.0164, "step": 15820 }, { "epoch": 0.39, "learning_rate": 1.9269548085211183e-05, "loss": 0.019, "step": 15830 }, { "epoch": 0.39, "learning_rate": 1.9269055535032632e-05, "loss": 0.0176, "step": 15840 }, { "epoch": 0.39, "learning_rate": 1.9268562984854085e-05, "loss": 0.0152, "step": 15850 }, { "epoch": 0.39, "learning_rate": 1.9268070434675534e-05, "loss": 0.0161, "step": 15860 }, { "epoch": 0.39, "learning_rate": 1.9267577884496983e-05, "loss": 0.0264, "step": 15870 }, { "epoch": 0.39, "learning_rate": 1.9267085334318435e-05, "loss": 0.0248, "step": 15880 }, { "epoch": 0.39, "learning_rate": 1.9266592784139885e-05, "loss": 0.0229, "step": 15890 }, { "epoch": 0.39, "learning_rate": 1.9266100233961337e-05, "loss": 0.0174, "step": 15900 }, { "epoch": 0.39, "learning_rate": 1.9265607683782786e-05, "loss": 0.0158, "step": 15910 }, { "epoch": 0.39, "learning_rate": 1.926511513360424e-05, "loss": 0.0166, "step": 15920 }, { "epoch": 0.39, "learning_rate": 1.9264622583425688e-05, "loss": 0.0327, "step": 15930 }, { "epoch": 0.39, "learning_rate": 1.926413003324714e-05, "loss": 0.0174, "step": 15940 }, { "epoch": 0.39, "learning_rate": 1.926363748306859e-05, "loss": 0.0151, "step": 15950 }, { "epoch": 0.39, "learning_rate": 1.926314493289004e-05, "loss": 0.0149, "step": 15960 }, { "epoch": 0.39, "learning_rate": 1.926265238271149e-05, "loss": 0.0176, "step": 15970 }, { "epoch": 0.39, "learning_rate": 1.926215983253294e-05, "loss": 0.0192, "step": 15980 }, { "epoch": 0.39, "learning_rate": 1.926166728235439e-05, "loss": 0.0177, "step": 15990 }, { "epoch": 0.39, "learning_rate": 1.9261174732175842e-05, "loss": 0.0213, "step": 16000 }, { "epoch": 0.39, "learning_rate": 1.926068218199729e-05, "loss": 0.0178, "step": 16010 }, { "epoch": 0.39, "learning_rate": 1.926018963181874e-05, "loss": 0.0149, "step": 16020 }, { "epoch": 0.39, "learning_rate": 1.9259697081640193e-05, "loss": 0.0174, "step": 16030 }, { "epoch": 0.39, "learning_rate": 1.9259204531461645e-05, "loss": 0.0175, "step": 16040 }, { "epoch": 0.39, "learning_rate": 1.9258711981283095e-05, "loss": 0.0173, "step": 16050 }, { "epoch": 0.39, "learning_rate": 1.9258219431104547e-05, "loss": 0.0159, "step": 16060 }, { "epoch": 0.39, "learning_rate": 1.9257726880925996e-05, "loss": 0.0258, "step": 16070 }, { "epoch": 0.4, "learning_rate": 1.925723433074745e-05, "loss": 0.0168, "step": 16080 }, { "epoch": 0.4, "learning_rate": 1.9256741780568898e-05, "loss": 0.0163, "step": 16090 }, { "epoch": 0.4, "learning_rate": 1.9256249230390347e-05, "loss": 0.0165, "step": 16100 }, { "epoch": 0.4, "learning_rate": 1.92557566802118e-05, "loss": 0.0163, "step": 16110 }, { "epoch": 0.4, "learning_rate": 1.925526413003325e-05, "loss": 0.0158, "step": 16120 }, { "epoch": 0.4, "learning_rate": 1.9254771579854698e-05, "loss": 0.0274, "step": 16130 }, { "epoch": 0.4, "learning_rate": 1.925427902967615e-05, "loss": 0.0177, "step": 16140 }, { "epoch": 0.4, "learning_rate": 1.92537864794976e-05, "loss": 0.0171, "step": 16150 }, { "epoch": 0.4, "learning_rate": 1.925329392931905e-05, "loss": 0.0176, "step": 16160 }, { "epoch": 0.4, "learning_rate": 1.92528013791405e-05, "loss": 0.018, "step": 16170 }, { "epoch": 0.4, "learning_rate": 1.925230882896195e-05, "loss": 0.0226, "step": 16180 }, { "epoch": 0.4, "learning_rate": 1.9251816278783403e-05, "loss": 0.0151, "step": 16190 }, { "epoch": 0.4, "learning_rate": 1.9251323728604852e-05, "loss": 0.0151, "step": 16200 }, { "epoch": 0.4, "learning_rate": 1.9250831178426305e-05, "loss": 0.0172, "step": 16210 }, { "epoch": 0.4, "learning_rate": 1.9250338628247754e-05, "loss": 0.0168, "step": 16220 }, { "epoch": 0.4, "learning_rate": 1.9249846078069206e-05, "loss": 0.0185, "step": 16230 }, { "epoch": 0.4, "learning_rate": 1.9249353527890656e-05, "loss": 0.033, "step": 16240 }, { "epoch": 0.4, "learning_rate": 1.9248860977712105e-05, "loss": 0.0236, "step": 16250 }, { "epoch": 0.4, "learning_rate": 1.9248368427533557e-05, "loss": 0.0188, "step": 16260 }, { "epoch": 0.4, "learning_rate": 1.9247875877355006e-05, "loss": 0.0183, "step": 16270 }, { "epoch": 0.4, "learning_rate": 1.924738332717646e-05, "loss": 0.0178, "step": 16280 }, { "epoch": 0.4, "learning_rate": 1.9246890776997908e-05, "loss": 0.0135, "step": 16290 }, { "epoch": 0.4, "learning_rate": 1.9246398226819357e-05, "loss": 0.0166, "step": 16300 }, { "epoch": 0.4, "learning_rate": 1.924590567664081e-05, "loss": 0.0171, "step": 16310 }, { "epoch": 0.4, "learning_rate": 1.924541312646226e-05, "loss": 0.0157, "step": 16320 }, { "epoch": 0.4, "learning_rate": 1.924492057628371e-05, "loss": 0.0154, "step": 16330 }, { "epoch": 0.4, "learning_rate": 1.924442802610516e-05, "loss": 0.0186, "step": 16340 }, { "epoch": 0.4, "learning_rate": 1.9243935475926613e-05, "loss": 0.0153, "step": 16350 }, { "epoch": 0.4, "learning_rate": 1.9243442925748062e-05, "loss": 0.0195, "step": 16360 }, { "epoch": 0.4, "learning_rate": 1.9242950375569515e-05, "loss": 0.0207, "step": 16370 }, { "epoch": 0.4, "learning_rate": 1.9242457825390964e-05, "loss": 0.0216, "step": 16380 }, { "epoch": 0.4, "learning_rate": 1.9241965275212413e-05, "loss": 0.0233, "step": 16390 }, { "epoch": 0.4, "learning_rate": 1.9241472725033866e-05, "loss": 0.017, "step": 16400 }, { "epoch": 0.4, "learning_rate": 1.9240980174855315e-05, "loss": 0.0175, "step": 16410 }, { "epoch": 0.4, "learning_rate": 1.9240487624676764e-05, "loss": 0.0244, "step": 16420 }, { "epoch": 0.4, "learning_rate": 1.9239995074498216e-05, "loss": 0.019, "step": 16430 }, { "epoch": 0.4, "learning_rate": 1.9239502524319666e-05, "loss": 0.0179, "step": 16440 }, { "epoch": 0.4, "learning_rate": 1.9239009974141115e-05, "loss": 0.0297, "step": 16450 }, { "epoch": 0.4, "learning_rate": 1.9238517423962567e-05, "loss": 0.0249, "step": 16460 }, { "epoch": 0.4, "learning_rate": 1.9238024873784016e-05, "loss": 0.0156, "step": 16470 }, { "epoch": 0.4, "learning_rate": 1.923753232360547e-05, "loss": 0.0217, "step": 16480 }, { "epoch": 0.41, "learning_rate": 1.923703977342692e-05, "loss": 0.0199, "step": 16490 }, { "epoch": 0.41, "learning_rate": 1.923654722324837e-05, "loss": 0.0209, "step": 16500 }, { "epoch": 0.41, "learning_rate": 1.9236054673069823e-05, "loss": 0.018, "step": 16510 }, { "epoch": 0.41, "learning_rate": 1.9235562122891272e-05, "loss": 0.0165, "step": 16520 }, { "epoch": 0.41, "learning_rate": 1.923506957271272e-05, "loss": 0.0157, "step": 16530 }, { "epoch": 0.41, "learning_rate": 1.9234577022534174e-05, "loss": 0.0167, "step": 16540 }, { "epoch": 0.41, "learning_rate": 1.9234084472355623e-05, "loss": 0.0188, "step": 16550 }, { "epoch": 0.41, "learning_rate": 1.9233591922177072e-05, "loss": 0.0182, "step": 16560 }, { "epoch": 0.41, "learning_rate": 1.9233099371998525e-05, "loss": 0.017, "step": 16570 }, { "epoch": 0.41, "learning_rate": 1.9232606821819974e-05, "loss": 0.0237, "step": 16580 }, { "epoch": 0.41, "learning_rate": 1.9232114271641423e-05, "loss": 0.019, "step": 16590 }, { "epoch": 0.41, "learning_rate": 1.9231621721462876e-05, "loss": 0.0152, "step": 16600 }, { "epoch": 0.41, "learning_rate": 1.9231129171284325e-05, "loss": 0.0217, "step": 16610 }, { "epoch": 0.41, "learning_rate": 1.9230636621105777e-05, "loss": 0.0172, "step": 16620 }, { "epoch": 0.41, "learning_rate": 1.9230144070927227e-05, "loss": 0.0237, "step": 16630 }, { "epoch": 0.41, "learning_rate": 1.922965152074868e-05, "loss": 0.0196, "step": 16640 }, { "epoch": 0.41, "learning_rate": 1.9229158970570128e-05, "loss": 0.014, "step": 16650 }, { "epoch": 0.41, "learning_rate": 1.922866642039158e-05, "loss": 0.0196, "step": 16660 }, { "epoch": 0.41, "learning_rate": 1.922817387021303e-05, "loss": 0.0173, "step": 16670 }, { "epoch": 0.41, "learning_rate": 1.922768132003448e-05, "loss": 0.0161, "step": 16680 }, { "epoch": 0.41, "learning_rate": 1.922718876985593e-05, "loss": 0.0164, "step": 16690 }, { "epoch": 0.41, "learning_rate": 1.922669621967738e-05, "loss": 0.0311, "step": 16700 }, { "epoch": 0.41, "learning_rate": 1.922620366949883e-05, "loss": 0.0159, "step": 16710 }, { "epoch": 0.41, "learning_rate": 1.9225711119320282e-05, "loss": 0.017, "step": 16720 }, { "epoch": 0.41, "learning_rate": 1.922521856914173e-05, "loss": 0.0204, "step": 16730 }, { "epoch": 0.41, "learning_rate": 1.922472601896318e-05, "loss": 0.0196, "step": 16740 }, { "epoch": 0.41, "learning_rate": 1.9224233468784633e-05, "loss": 0.017, "step": 16750 }, { "epoch": 0.41, "learning_rate": 1.9223740918606086e-05, "loss": 0.0192, "step": 16760 }, { "epoch": 0.41, "learning_rate": 1.9223248368427535e-05, "loss": 0.0204, "step": 16770 }, { "epoch": 0.41, "learning_rate": 1.9222755818248987e-05, "loss": 0.0177, "step": 16780 }, { "epoch": 0.41, "learning_rate": 1.9222263268070437e-05, "loss": 0.0157, "step": 16790 }, { "epoch": 0.41, "learning_rate": 1.922177071789189e-05, "loss": 0.0265, "step": 16800 }, { "epoch": 0.41, "learning_rate": 1.922127816771334e-05, "loss": 0.0176, "step": 16810 }, { "epoch": 0.41, "learning_rate": 1.9220785617534787e-05, "loss": 0.0145, "step": 16820 }, { "epoch": 0.41, "learning_rate": 1.922029306735624e-05, "loss": 0.0197, "step": 16830 }, { "epoch": 0.41, "learning_rate": 1.921980051717769e-05, "loss": 0.0186, "step": 16840 }, { "epoch": 0.41, "learning_rate": 1.9219307966999138e-05, "loss": 0.0151, "step": 16850 }, { "epoch": 0.41, "learning_rate": 1.921881541682059e-05, "loss": 0.0281, "step": 16860 }, { "epoch": 0.41, "learning_rate": 1.921832286664204e-05, "loss": 0.0158, "step": 16870 }, { "epoch": 0.41, "learning_rate": 1.921783031646349e-05, "loss": 0.0167, "step": 16880 }, { "epoch": 0.41, "learning_rate": 1.921733776628494e-05, "loss": 0.0178, "step": 16890 }, { "epoch": 0.42, "learning_rate": 1.921684521610639e-05, "loss": 0.0197, "step": 16900 }, { "epoch": 0.42, "learning_rate": 1.9216352665927843e-05, "loss": 0.019, "step": 16910 }, { "epoch": 0.42, "learning_rate": 1.9215860115749293e-05, "loss": 0.0276, "step": 16920 }, { "epoch": 0.42, "learning_rate": 1.9215367565570745e-05, "loss": 0.019, "step": 16930 }, { "epoch": 0.42, "learning_rate": 1.9214875015392194e-05, "loss": 0.0261, "step": 16940 }, { "epoch": 0.42, "learning_rate": 1.9214382465213647e-05, "loss": 0.0161, "step": 16950 }, { "epoch": 0.42, "learning_rate": 1.9213889915035096e-05, "loss": 0.0161, "step": 16960 }, { "epoch": 0.42, "learning_rate": 1.9213397364856545e-05, "loss": 0.0238, "step": 16970 }, { "epoch": 0.42, "learning_rate": 1.9212904814677998e-05, "loss": 0.0167, "step": 16980 }, { "epoch": 0.42, "learning_rate": 1.9212412264499447e-05, "loss": 0.0242, "step": 16990 }, { "epoch": 0.42, "learning_rate": 1.92119197143209e-05, "loss": 0.0185, "step": 17000 }, { "epoch": 0.42, "learning_rate": 1.921142716414235e-05, "loss": 0.0182, "step": 17010 }, { "epoch": 0.42, "learning_rate": 1.9210934613963798e-05, "loss": 0.0277, "step": 17020 }, { "epoch": 0.42, "learning_rate": 1.921044206378525e-05, "loss": 0.0296, "step": 17030 }, { "epoch": 0.42, "learning_rate": 1.92099495136067e-05, "loss": 0.0372, "step": 17040 }, { "epoch": 0.42, "learning_rate": 1.9209456963428152e-05, "loss": 0.0159, "step": 17050 }, { "epoch": 0.42, "learning_rate": 1.92089644132496e-05, "loss": 0.0214, "step": 17060 }, { "epoch": 0.42, "learning_rate": 1.9208471863071053e-05, "loss": 0.0241, "step": 17070 }, { "epoch": 0.42, "learning_rate": 1.9207979312892503e-05, "loss": 0.0166, "step": 17080 }, { "epoch": 0.42, "learning_rate": 1.9207486762713955e-05, "loss": 0.0172, "step": 17090 }, { "epoch": 0.42, "learning_rate": 1.9206994212535404e-05, "loss": 0.0154, "step": 17100 }, { "epoch": 0.42, "learning_rate": 1.9206501662356853e-05, "loss": 0.017, "step": 17110 }, { "epoch": 0.42, "learning_rate": 1.9206009112178306e-05, "loss": 0.02, "step": 17120 }, { "epoch": 0.42, "learning_rate": 1.9205516561999755e-05, "loss": 0.0146, "step": 17130 }, { "epoch": 0.42, "learning_rate": 1.9205024011821204e-05, "loss": 0.0193, "step": 17140 }, { "epoch": 0.42, "learning_rate": 1.9204531461642657e-05, "loss": 0.0168, "step": 17150 }, { "epoch": 0.42, "learning_rate": 1.9204038911464106e-05, "loss": 0.0234, "step": 17160 }, { "epoch": 0.42, "learning_rate": 1.9203546361285555e-05, "loss": 0.0159, "step": 17170 }, { "epoch": 0.42, "learning_rate": 1.9203053811107008e-05, "loss": 0.0203, "step": 17180 }, { "epoch": 0.42, "learning_rate": 1.9202561260928457e-05, "loss": 0.0201, "step": 17190 }, { "epoch": 0.42, "learning_rate": 1.920206871074991e-05, "loss": 0.0159, "step": 17200 }, { "epoch": 0.42, "learning_rate": 1.9201576160571362e-05, "loss": 0.0177, "step": 17210 }, { "epoch": 0.42, "learning_rate": 1.920108361039281e-05, "loss": 0.0174, "step": 17220 }, { "epoch": 0.42, "learning_rate": 1.9200591060214264e-05, "loss": 0.021, "step": 17230 }, { "epoch": 0.42, "learning_rate": 1.9200098510035713e-05, "loss": 0.0296, "step": 17240 }, { "epoch": 0.42, "learning_rate": 1.9199605959857162e-05, "loss": 0.0172, "step": 17250 }, { "epoch": 0.42, "learning_rate": 1.9199113409678614e-05, "loss": 0.0158, "step": 17260 }, { "epoch": 0.42, "learning_rate": 1.9198620859500064e-05, "loss": 0.0163, "step": 17270 }, { "epoch": 0.42, "learning_rate": 1.9198128309321513e-05, "loss": 0.0158, "step": 17280 }, { "epoch": 0.42, "learning_rate": 1.9197635759142965e-05, "loss": 0.0208, "step": 17290 }, { "epoch": 0.43, "learning_rate": 1.9197143208964414e-05, "loss": 0.0153, "step": 17300 }, { "epoch": 0.43, "learning_rate": 1.9196650658785864e-05, "loss": 0.0222, "step": 17310 }, { "epoch": 0.43, "learning_rate": 1.9196158108607316e-05, "loss": 0.019, "step": 17320 }, { "epoch": 0.43, "learning_rate": 1.9195665558428765e-05, "loss": 0.021, "step": 17330 }, { "epoch": 0.43, "learning_rate": 1.9195173008250218e-05, "loss": 0.0173, "step": 17340 }, { "epoch": 0.43, "learning_rate": 1.9194680458071667e-05, "loss": 0.0236, "step": 17350 }, { "epoch": 0.43, "learning_rate": 1.919418790789312e-05, "loss": 0.0175, "step": 17360 }, { "epoch": 0.43, "learning_rate": 1.919369535771457e-05, "loss": 0.0181, "step": 17370 }, { "epoch": 0.43, "learning_rate": 1.919320280753602e-05, "loss": 0.0176, "step": 17380 }, { "epoch": 0.43, "learning_rate": 1.919271025735747e-05, "loss": 0.014, "step": 17390 }, { "epoch": 0.43, "learning_rate": 1.919221770717892e-05, "loss": 0.0154, "step": 17400 }, { "epoch": 0.43, "learning_rate": 1.9191725157000372e-05, "loss": 0.0231, "step": 17410 }, { "epoch": 0.43, "learning_rate": 1.919123260682182e-05, "loss": 0.0197, "step": 17420 }, { "epoch": 0.43, "learning_rate": 1.919074005664327e-05, "loss": 0.0147, "step": 17430 }, { "epoch": 0.43, "learning_rate": 1.9190247506464723e-05, "loss": 0.0179, "step": 17440 }, { "epoch": 0.43, "learning_rate": 1.9189754956286172e-05, "loss": 0.0171, "step": 17450 }, { "epoch": 0.43, "learning_rate": 1.918926240610762e-05, "loss": 0.0162, "step": 17460 }, { "epoch": 0.43, "learning_rate": 1.9188769855929074e-05, "loss": 0.0167, "step": 17470 }, { "epoch": 0.43, "learning_rate": 1.9188277305750526e-05, "loss": 0.0176, "step": 17480 }, { "epoch": 0.43, "learning_rate": 1.9187784755571975e-05, "loss": 0.0213, "step": 17490 }, { "epoch": 0.43, "learning_rate": 1.9187292205393428e-05, "loss": 0.0172, "step": 17500 }, { "epoch": 0.43, "learning_rate": 1.9186799655214877e-05, "loss": 0.0255, "step": 17510 }, { "epoch": 0.43, "learning_rate": 1.918630710503633e-05, "loss": 0.0258, "step": 17520 }, { "epoch": 0.43, "learning_rate": 1.918581455485778e-05, "loss": 0.0267, "step": 17530 }, { "epoch": 0.43, "learning_rate": 1.9185322004679228e-05, "loss": 0.0142, "step": 17540 }, { "epoch": 0.43, "learning_rate": 1.918482945450068e-05, "loss": 0.0174, "step": 17550 }, { "epoch": 0.43, "learning_rate": 1.918433690432213e-05, "loss": 0.0157, "step": 17560 }, { "epoch": 0.43, "learning_rate": 1.918384435414358e-05, "loss": 0.0175, "step": 17570 }, { "epoch": 0.43, "learning_rate": 1.918335180396503e-05, "loss": 0.0229, "step": 17580 }, { "epoch": 0.43, "learning_rate": 1.918285925378648e-05, "loss": 0.0162, "step": 17590 }, { "epoch": 0.43, "learning_rate": 1.918236670360793e-05, "loss": 0.0156, "step": 17600 }, { "epoch": 0.43, "learning_rate": 1.9181874153429382e-05, "loss": 0.0231, "step": 17610 }, { "epoch": 0.43, "learning_rate": 1.918138160325083e-05, "loss": 0.0227, "step": 17620 }, { "epoch": 0.43, "learning_rate": 1.9180889053072284e-05, "loss": 0.015, "step": 17630 }, { "epoch": 0.43, "learning_rate": 1.9180396502893733e-05, "loss": 0.0166, "step": 17640 }, { "epoch": 0.43, "learning_rate": 1.9179903952715185e-05, "loss": 0.018, "step": 17650 }, { "epoch": 0.43, "learning_rate": 1.9179411402536635e-05, "loss": 0.0233, "step": 17660 }, { "epoch": 0.43, "learning_rate": 1.9178918852358087e-05, "loss": 0.0178, "step": 17670 }, { "epoch": 0.43, "learning_rate": 1.9178426302179536e-05, "loss": 0.0157, "step": 17680 }, { "epoch": 0.43, "learning_rate": 1.9177933752000985e-05, "loss": 0.0195, "step": 17690 }, { "epoch": 0.43, "learning_rate": 1.9177441201822438e-05, "loss": 0.0192, "step": 17700 }, { "epoch": 0.44, "learning_rate": 1.9176948651643887e-05, "loss": 0.0245, "step": 17710 }, { "epoch": 0.44, "learning_rate": 1.917645610146534e-05, "loss": 0.0153, "step": 17720 }, { "epoch": 0.44, "learning_rate": 1.917596355128679e-05, "loss": 0.022, "step": 17730 }, { "epoch": 0.44, "learning_rate": 1.9175471001108238e-05, "loss": 0.0164, "step": 17740 }, { "epoch": 0.44, "learning_rate": 1.917497845092969e-05, "loss": 0.0229, "step": 17750 }, { "epoch": 0.44, "learning_rate": 1.917448590075114e-05, "loss": 0.0157, "step": 17760 }, { "epoch": 0.44, "learning_rate": 1.9173993350572592e-05, "loss": 0.0204, "step": 17770 }, { "epoch": 0.44, "learning_rate": 1.917350080039404e-05, "loss": 0.0242, "step": 17780 }, { "epoch": 0.44, "learning_rate": 1.9173008250215494e-05, "loss": 0.0232, "step": 17790 }, { "epoch": 0.44, "learning_rate": 1.9172515700036943e-05, "loss": 0.0217, "step": 17800 }, { "epoch": 0.44, "learning_rate": 1.9172023149858395e-05, "loss": 0.0174, "step": 17810 }, { "epoch": 0.44, "learning_rate": 1.9171530599679845e-05, "loss": 0.0198, "step": 17820 }, { "epoch": 0.44, "learning_rate": 1.9171038049501294e-05, "loss": 0.0157, "step": 17830 }, { "epoch": 0.44, "learning_rate": 1.9170545499322746e-05, "loss": 0.0244, "step": 17840 }, { "epoch": 0.44, "learning_rate": 1.9170052949144195e-05, "loss": 0.015, "step": 17850 }, { "epoch": 0.44, "learning_rate": 1.9169560398965645e-05, "loss": 0.0135, "step": 17860 }, { "epoch": 0.44, "learning_rate": 1.9169067848787097e-05, "loss": 0.0162, "step": 17870 }, { "epoch": 0.44, "learning_rate": 1.9168575298608546e-05, "loss": 0.0237, "step": 17880 }, { "epoch": 0.44, "learning_rate": 1.9168082748429995e-05, "loss": 0.0147, "step": 17890 }, { "epoch": 0.44, "learning_rate": 1.9167590198251448e-05, "loss": 0.0167, "step": 17900 }, { "epoch": 0.44, "learning_rate": 1.9167097648072897e-05, "loss": 0.0171, "step": 17910 }, { "epoch": 0.44, "learning_rate": 1.916660509789435e-05, "loss": 0.0146, "step": 17920 }, { "epoch": 0.44, "learning_rate": 1.9166112547715802e-05, "loss": 0.0182, "step": 17930 }, { "epoch": 0.44, "learning_rate": 1.916561999753725e-05, "loss": 0.0248, "step": 17940 }, { "epoch": 0.44, "learning_rate": 1.9165127447358704e-05, "loss": 0.0197, "step": 17950 }, { "epoch": 0.44, "learning_rate": 1.9164634897180153e-05, "loss": 0.0188, "step": 17960 }, { "epoch": 0.44, "learning_rate": 1.9164142347001602e-05, "loss": 0.0178, "step": 17970 }, { "epoch": 0.44, "learning_rate": 1.9163649796823055e-05, "loss": 0.0173, "step": 17980 }, { "epoch": 0.44, "learning_rate": 1.9163157246644504e-05, "loss": 0.0136, "step": 17990 }, { "epoch": 0.44, "learning_rate": 1.9162664696465953e-05, "loss": 0.0182, "step": 18000 }, { "epoch": 0.44, "learning_rate": 1.9162172146287406e-05, "loss": 0.0195, "step": 18010 }, { "epoch": 0.44, "learning_rate": 1.9161679596108855e-05, "loss": 0.0302, "step": 18020 }, { "epoch": 0.44, "learning_rate": 1.9161187045930304e-05, "loss": 0.0177, "step": 18030 }, { "epoch": 0.44, "learning_rate": 1.9160694495751756e-05, "loss": 0.0215, "step": 18040 }, { "epoch": 0.44, "learning_rate": 1.9160201945573206e-05, "loss": 0.0217, "step": 18050 }, { "epoch": 0.44, "learning_rate": 1.9159709395394658e-05, "loss": 0.0159, "step": 18060 }, { "epoch": 0.44, "learning_rate": 1.9159216845216107e-05, "loss": 0.0171, "step": 18070 }, { "epoch": 0.44, "learning_rate": 1.915872429503756e-05, "loss": 0.0187, "step": 18080 }, { "epoch": 0.44, "learning_rate": 1.915823174485901e-05, "loss": 0.0152, "step": 18090 }, { "epoch": 0.44, "learning_rate": 1.915773919468046e-05, "loss": 0.0244, "step": 18100 }, { "epoch": 0.44, "learning_rate": 1.915724664450191e-05, "loss": 0.0157, "step": 18110 }, { "epoch": 0.45, "learning_rate": 1.915675409432336e-05, "loss": 0.0172, "step": 18120 }, { "epoch": 0.45, "learning_rate": 1.9156261544144812e-05, "loss": 0.0172, "step": 18130 }, { "epoch": 0.45, "learning_rate": 1.915576899396626e-05, "loss": 0.0179, "step": 18140 }, { "epoch": 0.45, "learning_rate": 1.915527644378771e-05, "loss": 0.0149, "step": 18150 }, { "epoch": 0.45, "learning_rate": 1.9154783893609163e-05, "loss": 0.0173, "step": 18160 }, { "epoch": 0.45, "learning_rate": 1.9154291343430612e-05, "loss": 0.024, "step": 18170 }, { "epoch": 0.45, "learning_rate": 1.915379879325206e-05, "loss": 0.0149, "step": 18180 }, { "epoch": 0.45, "learning_rate": 1.9153306243073514e-05, "loss": 0.0186, "step": 18190 }, { "epoch": 0.45, "learning_rate": 1.9152813692894966e-05, "loss": 0.0191, "step": 18200 }, { "epoch": 0.45, "learning_rate": 1.9152321142716416e-05, "loss": 0.0181, "step": 18210 }, { "epoch": 0.45, "learning_rate": 1.9151828592537868e-05, "loss": 0.0153, "step": 18220 }, { "epoch": 0.45, "learning_rate": 1.9151336042359317e-05, "loss": 0.0161, "step": 18230 }, { "epoch": 0.45, "learning_rate": 1.915084349218077e-05, "loss": 0.017, "step": 18240 }, { "epoch": 0.45, "learning_rate": 1.915035094200222e-05, "loss": 0.0148, "step": 18250 }, { "epoch": 0.45, "learning_rate": 1.9149858391823668e-05, "loss": 0.0162, "step": 18260 }, { "epoch": 0.45, "learning_rate": 1.914936584164512e-05, "loss": 0.018, "step": 18270 }, { "epoch": 0.45, "learning_rate": 1.914887329146657e-05, "loss": 0.0178, "step": 18280 }, { "epoch": 0.45, "learning_rate": 1.914838074128802e-05, "loss": 0.0246, "step": 18290 }, { "epoch": 0.45, "learning_rate": 1.914788819110947e-05, "loss": 0.0178, "step": 18300 }, { "epoch": 0.45, "learning_rate": 1.914739564093092e-05, "loss": 0.0138, "step": 18310 }, { "epoch": 0.45, "learning_rate": 1.914690309075237e-05, "loss": 0.0227, "step": 18320 }, { "epoch": 0.45, "learning_rate": 1.9146410540573822e-05, "loss": 0.0281, "step": 18330 }, { "epoch": 0.45, "learning_rate": 1.914591799039527e-05, "loss": 0.0164, "step": 18340 }, { "epoch": 0.45, "learning_rate": 1.9145425440216724e-05, "loss": 0.017, "step": 18350 }, { "epoch": 0.45, "learning_rate": 1.9144932890038173e-05, "loss": 0.0213, "step": 18360 }, { "epoch": 0.45, "learning_rate": 1.9144440339859626e-05, "loss": 0.0187, "step": 18370 }, { "epoch": 0.45, "learning_rate": 1.9143947789681075e-05, "loss": 0.0225, "step": 18380 }, { "epoch": 0.45, "learning_rate": 1.9143455239502527e-05, "loss": 0.0149, "step": 18390 }, { "epoch": 0.45, "learning_rate": 1.9142962689323977e-05, "loss": 0.0141, "step": 18400 }, { "epoch": 0.45, "learning_rate": 1.9142470139145426e-05, "loss": 0.0263, "step": 18410 }, { "epoch": 0.45, "learning_rate": 1.9141977588966878e-05, "loss": 0.0147, "step": 18420 }, { "epoch": 0.45, "learning_rate": 1.9141485038788327e-05, "loss": 0.0218, "step": 18430 }, { "epoch": 0.45, "learning_rate": 1.914099248860978e-05, "loss": 0.0187, "step": 18440 }, { "epoch": 0.45, "learning_rate": 1.914049993843123e-05, "loss": 0.0145, "step": 18450 }, { "epoch": 0.45, "learning_rate": 1.9140007388252678e-05, "loss": 0.017, "step": 18460 }, { "epoch": 0.45, "learning_rate": 1.913951483807413e-05, "loss": 0.0156, "step": 18470 }, { "epoch": 0.45, "learning_rate": 1.913902228789558e-05, "loss": 0.0189, "step": 18480 }, { "epoch": 0.45, "learning_rate": 1.9138529737717032e-05, "loss": 0.0188, "step": 18490 }, { "epoch": 0.45, "learning_rate": 1.913803718753848e-05, "loss": 0.018, "step": 18500 }, { "epoch": 0.45, "learning_rate": 1.9137544637359934e-05, "loss": 0.0167, "step": 18510 }, { "epoch": 0.45, "learning_rate": 1.9137052087181383e-05, "loss": 0.0174, "step": 18520 }, { "epoch": 0.46, "learning_rate": 1.9136559537002836e-05, "loss": 0.016, "step": 18530 }, { "epoch": 0.46, "learning_rate": 1.9136066986824285e-05, "loss": 0.0148, "step": 18540 }, { "epoch": 0.46, "learning_rate": 1.9135574436645734e-05, "loss": 0.0184, "step": 18550 }, { "epoch": 0.46, "learning_rate": 1.9135081886467187e-05, "loss": 0.0182, "step": 18560 }, { "epoch": 0.46, "learning_rate": 1.9134589336288636e-05, "loss": 0.0187, "step": 18570 }, { "epoch": 0.46, "learning_rate": 1.9134096786110085e-05, "loss": 0.023, "step": 18580 }, { "epoch": 0.46, "learning_rate": 1.9133604235931537e-05, "loss": 0.0167, "step": 18590 }, { "epoch": 0.46, "learning_rate": 1.9133111685752987e-05, "loss": 0.0155, "step": 18600 }, { "epoch": 0.46, "learning_rate": 1.9132619135574436e-05, "loss": 0.017, "step": 18610 }, { "epoch": 0.46, "learning_rate": 1.913212658539589e-05, "loss": 0.0144, "step": 18620 }, { "epoch": 0.46, "learning_rate": 1.9131634035217337e-05, "loss": 0.0186, "step": 18630 }, { "epoch": 0.46, "learning_rate": 1.913114148503879e-05, "loss": 0.0313, "step": 18640 }, { "epoch": 0.46, "learning_rate": 1.9130648934860243e-05, "loss": 0.0162, "step": 18650 }, { "epoch": 0.46, "learning_rate": 1.913015638468169e-05, "loss": 0.0216, "step": 18660 }, { "epoch": 0.46, "learning_rate": 1.912966383450314e-05, "loss": 0.0138, "step": 18670 }, { "epoch": 0.46, "learning_rate": 1.9129171284324593e-05, "loss": 0.0147, "step": 18680 }, { "epoch": 0.46, "learning_rate": 1.9128678734146043e-05, "loss": 0.0358, "step": 18690 }, { "epoch": 0.46, "learning_rate": 1.9128186183967495e-05, "loss": 0.0167, "step": 18700 }, { "epoch": 0.46, "learning_rate": 1.9127693633788944e-05, "loss": 0.0157, "step": 18710 }, { "epoch": 0.46, "learning_rate": 1.9127201083610393e-05, "loss": 0.0154, "step": 18720 }, { "epoch": 0.46, "learning_rate": 1.9126708533431846e-05, "loss": 0.0345, "step": 18730 }, { "epoch": 0.46, "learning_rate": 1.9126215983253295e-05, "loss": 0.0176, "step": 18740 }, { "epoch": 0.46, "learning_rate": 1.9125723433074744e-05, "loss": 0.0168, "step": 18750 }, { "epoch": 0.46, "learning_rate": 1.9125230882896197e-05, "loss": 0.0169, "step": 18760 }, { "epoch": 0.46, "learning_rate": 1.9124738332717646e-05, "loss": 0.0175, "step": 18770 }, { "epoch": 0.46, "learning_rate": 1.91242457825391e-05, "loss": 0.0178, "step": 18780 }, { "epoch": 0.46, "learning_rate": 1.9123753232360548e-05, "loss": 0.0155, "step": 18790 }, { "epoch": 0.46, "learning_rate": 1.9123260682182e-05, "loss": 0.0206, "step": 18800 }, { "epoch": 0.46, "learning_rate": 1.912276813200345e-05, "loss": 0.0168, "step": 18810 }, { "epoch": 0.46, "learning_rate": 1.9122275581824902e-05, "loss": 0.0188, "step": 18820 }, { "epoch": 0.46, "learning_rate": 1.912178303164635e-05, "loss": 0.0152, "step": 18830 }, { "epoch": 0.46, "learning_rate": 1.91212904814678e-05, "loss": 0.0158, "step": 18840 }, { "epoch": 0.46, "learning_rate": 1.9120797931289253e-05, "loss": 0.0171, "step": 18850 }, { "epoch": 0.46, "learning_rate": 1.9120305381110702e-05, "loss": 0.0255, "step": 18860 }, { "epoch": 0.46, "learning_rate": 1.911981283093215e-05, "loss": 0.0194, "step": 18870 }, { "epoch": 0.46, "learning_rate": 1.9119320280753603e-05, "loss": 0.0179, "step": 18880 }, { "epoch": 0.46, "learning_rate": 1.9118827730575053e-05, "loss": 0.0283, "step": 18890 }, { "epoch": 0.46, "learning_rate": 1.9118335180396502e-05, "loss": 0.0167, "step": 18900 }, { "epoch": 0.46, "learning_rate": 1.9117842630217954e-05, "loss": 0.0148, "step": 18910 }, { "epoch": 0.46, "learning_rate": 1.9117350080039407e-05, "loss": 0.0188, "step": 18920 }, { "epoch": 0.47, "learning_rate": 1.9116857529860856e-05, "loss": 0.0163, "step": 18930 }, { "epoch": 0.47, "learning_rate": 1.911636497968231e-05, "loss": 0.0221, "step": 18940 }, { "epoch": 0.47, "learning_rate": 1.9115872429503758e-05, "loss": 0.0171, "step": 18950 }, { "epoch": 0.47, "learning_rate": 1.911537987932521e-05, "loss": 0.0171, "step": 18960 }, { "epoch": 0.47, "learning_rate": 1.911488732914666e-05, "loss": 0.0188, "step": 18970 }, { "epoch": 0.47, "learning_rate": 1.911439477896811e-05, "loss": 0.0168, "step": 18980 }, { "epoch": 0.47, "learning_rate": 1.911390222878956e-05, "loss": 0.0171, "step": 18990 }, { "epoch": 0.47, "learning_rate": 1.911340967861101e-05, "loss": 0.025, "step": 19000 }, { "epoch": 0.47, "learning_rate": 1.911291712843246e-05, "loss": 0.0161, "step": 19010 }, { "epoch": 0.47, "learning_rate": 1.9112424578253912e-05, "loss": 0.02, "step": 19020 }, { "epoch": 0.47, "learning_rate": 1.911193202807536e-05, "loss": 0.0162, "step": 19030 }, { "epoch": 0.47, "learning_rate": 1.911143947789681e-05, "loss": 0.0203, "step": 19040 }, { "epoch": 0.47, "learning_rate": 1.9110946927718263e-05, "loss": 0.0197, "step": 19050 }, { "epoch": 0.47, "learning_rate": 1.9110454377539712e-05, "loss": 0.0158, "step": 19060 }, { "epoch": 0.47, "learning_rate": 1.9109961827361164e-05, "loss": 0.0285, "step": 19070 }, { "epoch": 0.47, "learning_rate": 1.9109469277182614e-05, "loss": 0.0223, "step": 19080 }, { "epoch": 0.47, "learning_rate": 1.9108976727004066e-05, "loss": 0.0144, "step": 19090 }, { "epoch": 0.47, "learning_rate": 1.9108484176825515e-05, "loss": 0.02, "step": 19100 }, { "epoch": 0.47, "learning_rate": 1.9107991626646968e-05, "loss": 0.0152, "step": 19110 }, { "epoch": 0.47, "learning_rate": 1.9107499076468417e-05, "loss": 0.0143, "step": 19120 }, { "epoch": 0.47, "learning_rate": 1.9107006526289866e-05, "loss": 0.0135, "step": 19130 }, { "epoch": 0.47, "learning_rate": 1.910651397611132e-05, "loss": 0.0182, "step": 19140 }, { "epoch": 0.47, "learning_rate": 1.9106021425932768e-05, "loss": 0.0282, "step": 19150 }, { "epoch": 0.47, "learning_rate": 1.910552887575422e-05, "loss": 0.029, "step": 19160 }, { "epoch": 0.47, "learning_rate": 1.910503632557567e-05, "loss": 0.0134, "step": 19170 }, { "epoch": 0.47, "learning_rate": 1.910454377539712e-05, "loss": 0.0248, "step": 19180 }, { "epoch": 0.47, "learning_rate": 1.910405122521857e-05, "loss": 0.0164, "step": 19190 }, { "epoch": 0.47, "learning_rate": 1.910355867504002e-05, "loss": 0.0217, "step": 19200 }, { "epoch": 0.47, "learning_rate": 1.9103066124861473e-05, "loss": 0.0231, "step": 19210 }, { "epoch": 0.47, "learning_rate": 1.9102573574682922e-05, "loss": 0.0178, "step": 19220 }, { "epoch": 0.47, "learning_rate": 1.9102081024504374e-05, "loss": 0.0177, "step": 19230 }, { "epoch": 0.47, "learning_rate": 1.9101588474325824e-05, "loss": 0.0165, "step": 19240 }, { "epoch": 0.47, "learning_rate": 1.9101095924147276e-05, "loss": 0.0153, "step": 19250 }, { "epoch": 0.47, "learning_rate": 1.9100603373968725e-05, "loss": 0.0159, "step": 19260 }, { "epoch": 0.47, "learning_rate": 1.9100110823790174e-05, "loss": 0.0147, "step": 19270 }, { "epoch": 0.47, "learning_rate": 1.9099618273611627e-05, "loss": 0.0167, "step": 19280 }, { "epoch": 0.47, "learning_rate": 1.9099125723433076e-05, "loss": 0.0138, "step": 19290 }, { "epoch": 0.47, "learning_rate": 1.9098633173254525e-05, "loss": 0.0215, "step": 19300 }, { "epoch": 0.47, "learning_rate": 1.9098140623075978e-05, "loss": 0.0151, "step": 19310 }, { "epoch": 0.47, "learning_rate": 1.9097648072897427e-05, "loss": 0.0165, "step": 19320 }, { "epoch": 0.47, "learning_rate": 1.9097155522718876e-05, "loss": 0.0175, "step": 19330 }, { "epoch": 0.48, "learning_rate": 1.909666297254033e-05, "loss": 0.0232, "step": 19340 }, { "epoch": 0.48, "learning_rate": 1.9096170422361778e-05, "loss": 0.0155, "step": 19350 }, { "epoch": 0.48, "learning_rate": 1.909567787218323e-05, "loss": 0.0252, "step": 19360 }, { "epoch": 0.48, "learning_rate": 1.9095185322004683e-05, "loss": 0.0139, "step": 19370 }, { "epoch": 0.48, "learning_rate": 1.9094692771826132e-05, "loss": 0.0155, "step": 19380 }, { "epoch": 0.48, "learning_rate": 1.909420022164758e-05, "loss": 0.0261, "step": 19390 }, { "epoch": 0.48, "learning_rate": 1.9093707671469034e-05, "loss": 0.0325, "step": 19400 }, { "epoch": 0.48, "learning_rate": 1.9093215121290483e-05, "loss": 0.0132, "step": 19410 }, { "epoch": 0.48, "learning_rate": 1.9092722571111935e-05, "loss": 0.0161, "step": 19420 }, { "epoch": 0.48, "learning_rate": 1.9092230020933385e-05, "loss": 0.0305, "step": 19430 }, { "epoch": 0.48, "learning_rate": 1.9091737470754834e-05, "loss": 0.0272, "step": 19440 }, { "epoch": 0.48, "learning_rate": 1.9091244920576286e-05, "loss": 0.0357, "step": 19450 }, { "epoch": 0.48, "learning_rate": 1.9090752370397735e-05, "loss": 0.0253, "step": 19460 }, { "epoch": 0.48, "learning_rate": 1.9090259820219185e-05, "loss": 0.0166, "step": 19470 }, { "epoch": 0.48, "learning_rate": 1.9089767270040637e-05, "loss": 0.015, "step": 19480 }, { "epoch": 0.48, "learning_rate": 1.9089274719862086e-05, "loss": 0.0175, "step": 19490 }, { "epoch": 0.48, "learning_rate": 1.908878216968354e-05, "loss": 0.013, "step": 19500 }, { "epoch": 0.48, "learning_rate": 1.9088289619504988e-05, "loss": 0.0147, "step": 19510 }, { "epoch": 0.48, "learning_rate": 1.908779706932644e-05, "loss": 0.0151, "step": 19520 }, { "epoch": 0.48, "learning_rate": 1.908730451914789e-05, "loss": 0.0159, "step": 19530 }, { "epoch": 0.48, "learning_rate": 1.9086811968969342e-05, "loss": 0.0218, "step": 19540 }, { "epoch": 0.48, "learning_rate": 1.908631941879079e-05, "loss": 0.018, "step": 19550 }, { "epoch": 0.48, "learning_rate": 1.908582686861224e-05, "loss": 0.0144, "step": 19560 }, { "epoch": 0.48, "learning_rate": 1.9085334318433693e-05, "loss": 0.0156, "step": 19570 }, { "epoch": 0.48, "learning_rate": 1.9084841768255142e-05, "loss": 0.023, "step": 19580 }, { "epoch": 0.48, "learning_rate": 1.908434921807659e-05, "loss": 0.023, "step": 19590 }, { "epoch": 0.48, "learning_rate": 1.9083856667898044e-05, "loss": 0.0254, "step": 19600 }, { "epoch": 0.48, "learning_rate": 1.9083364117719493e-05, "loss": 0.0147, "step": 19610 }, { "epoch": 0.48, "learning_rate": 1.9082871567540942e-05, "loss": 0.0191, "step": 19620 }, { "epoch": 0.48, "learning_rate": 1.9082379017362395e-05, "loss": 0.0167, "step": 19630 }, { "epoch": 0.48, "learning_rate": 1.9081886467183847e-05, "loss": 0.0134, "step": 19640 }, { "epoch": 0.48, "learning_rate": 1.9081393917005296e-05, "loss": 0.0156, "step": 19650 }, { "epoch": 0.48, "learning_rate": 1.908090136682675e-05, "loss": 0.0161, "step": 19660 }, { "epoch": 0.48, "learning_rate": 1.9080408816648198e-05, "loss": 0.0155, "step": 19670 }, { "epoch": 0.48, "learning_rate": 1.907991626646965e-05, "loss": 0.0168, "step": 19680 }, { "epoch": 0.48, "learning_rate": 1.90794237162911e-05, "loss": 0.0149, "step": 19690 }, { "epoch": 0.48, "learning_rate": 1.907893116611255e-05, "loss": 0.0158, "step": 19700 }, { "epoch": 0.48, "learning_rate": 1.9078438615934e-05, "loss": 0.0166, "step": 19710 }, { "epoch": 0.48, "learning_rate": 1.907794606575545e-05, "loss": 0.0172, "step": 19720 }, { "epoch": 0.48, "learning_rate": 1.90774535155769e-05, "loss": 0.0181, "step": 19730 }, { "epoch": 0.48, "learning_rate": 1.9076960965398352e-05, "loss": 0.0145, "step": 19740 }, { "epoch": 0.49, "learning_rate": 1.90764684152198e-05, "loss": 0.0142, "step": 19750 }, { "epoch": 0.49, "learning_rate": 1.907597586504125e-05, "loss": 0.0302, "step": 19760 }, { "epoch": 0.49, "learning_rate": 1.9075483314862703e-05, "loss": 0.0311, "step": 19770 }, { "epoch": 0.49, "learning_rate": 1.9074990764684152e-05, "loss": 0.0136, "step": 19780 }, { "epoch": 0.49, "learning_rate": 1.9074498214505605e-05, "loss": 0.0159, "step": 19790 }, { "epoch": 0.49, "learning_rate": 1.9074005664327054e-05, "loss": 0.0174, "step": 19800 }, { "epoch": 0.49, "learning_rate": 1.9073513114148506e-05, "loss": 0.0181, "step": 19810 }, { "epoch": 0.49, "learning_rate": 1.9073020563969956e-05, "loss": 0.0158, "step": 19820 }, { "epoch": 0.49, "learning_rate": 1.9072528013791408e-05, "loss": 0.0176, "step": 19830 }, { "epoch": 0.49, "learning_rate": 1.9072035463612857e-05, "loss": 0.0154, "step": 19840 }, { "epoch": 0.49, "learning_rate": 1.9071542913434306e-05, "loss": 0.0145, "step": 19850 }, { "epoch": 0.49, "learning_rate": 1.907105036325576e-05, "loss": 0.0182, "step": 19860 }, { "epoch": 0.49, "learning_rate": 1.9070557813077208e-05, "loss": 0.0158, "step": 19870 }, { "epoch": 0.49, "learning_rate": 1.907006526289866e-05, "loss": 0.0222, "step": 19880 }, { "epoch": 0.49, "learning_rate": 1.906957271272011e-05, "loss": 0.0144, "step": 19890 }, { "epoch": 0.49, "learning_rate": 1.906908016254156e-05, "loss": 0.0195, "step": 19900 }, { "epoch": 0.49, "learning_rate": 1.906858761236301e-05, "loss": 0.0179, "step": 19910 }, { "epoch": 0.49, "learning_rate": 1.906809506218446e-05, "loss": 0.0167, "step": 19920 }, { "epoch": 0.49, "learning_rate": 1.9067602512005913e-05, "loss": 0.0166, "step": 19930 }, { "epoch": 0.49, "learning_rate": 1.9067109961827362e-05, "loss": 0.0165, "step": 19940 }, { "epoch": 0.49, "learning_rate": 1.9066617411648815e-05, "loss": 0.0157, "step": 19950 }, { "epoch": 0.49, "learning_rate": 1.9066124861470264e-05, "loss": 0.0225, "step": 19960 }, { "epoch": 0.49, "learning_rate": 1.9065632311291716e-05, "loss": 0.018, "step": 19970 }, { "epoch": 0.49, "learning_rate": 1.9065139761113166e-05, "loss": 0.0195, "step": 19980 }, { "epoch": 0.49, "learning_rate": 1.9064647210934615e-05, "loss": 0.0222, "step": 19990 }, { "epoch": 0.49, "learning_rate": 1.9064154660756067e-05, "loss": 0.014, "step": 20000 }, { "epoch": 0.49, "eval_bleu": 97.4618, "eval_gen_len_ratio": 0.9994, "eval_loss": 0.011187599040567875, "eval_runtime": 254.7279, "eval_samples_per_second": 7.325, "eval_steps_per_second": 0.232, "step": 20000 }, { "epoch": 0.49, "learning_rate": 1.9063662110577516e-05, "loss": 0.0164, "step": 20010 }, { "epoch": 0.49, "learning_rate": 1.9063169560398966e-05, "loss": 0.0157, "step": 20020 }, { "epoch": 0.49, "learning_rate": 1.9062677010220418e-05, "loss": 0.0264, "step": 20030 }, { "epoch": 0.49, "learning_rate": 1.9062184460041867e-05, "loss": 0.0158, "step": 20040 }, { "epoch": 0.49, "learning_rate": 1.9061691909863316e-05, "loss": 0.0141, "step": 20050 }, { "epoch": 0.49, "learning_rate": 1.906119935968477e-05, "loss": 0.0179, "step": 20060 }, { "epoch": 0.49, "learning_rate": 1.9060706809506218e-05, "loss": 0.0163, "step": 20070 }, { "epoch": 0.49, "learning_rate": 1.906021425932767e-05, "loss": 0.0168, "step": 20080 }, { "epoch": 0.49, "learning_rate": 1.9059721709149123e-05, "loss": 0.0173, "step": 20090 }, { "epoch": 0.49, "learning_rate": 1.9059229158970572e-05, "loss": 0.0162, "step": 20100 }, { "epoch": 0.49, "learning_rate": 1.905873660879202e-05, "loss": 0.0187, "step": 20110 }, { "epoch": 0.49, "learning_rate": 1.9058244058613474e-05, "loss": 0.0281, "step": 20120 }, { "epoch": 0.49, "learning_rate": 1.9057751508434923e-05, "loss": 0.0187, "step": 20130 }, { "epoch": 0.49, "learning_rate": 1.9057258958256376e-05, "loss": 0.0153, "step": 20140 }, { "epoch": 0.5, "learning_rate": 1.9056766408077825e-05, "loss": 0.0162, "step": 20150 }, { "epoch": 0.5, "learning_rate": 1.9056273857899274e-05, "loss": 0.017, "step": 20160 }, { "epoch": 0.5, "learning_rate": 1.9055781307720727e-05, "loss": 0.0163, "step": 20170 }, { "epoch": 0.5, "learning_rate": 1.9055288757542176e-05, "loss": 0.0166, "step": 20180 }, { "epoch": 0.5, "learning_rate": 1.9054796207363625e-05, "loss": 0.0171, "step": 20190 }, { "epoch": 0.5, "learning_rate": 1.9054303657185077e-05, "loss": 0.0151, "step": 20200 }, { "epoch": 0.5, "learning_rate": 1.9053811107006527e-05, "loss": 0.0211, "step": 20210 }, { "epoch": 0.5, "learning_rate": 1.905331855682798e-05, "loss": 0.017, "step": 20220 }, { "epoch": 0.5, "learning_rate": 1.9052826006649428e-05, "loss": 0.0249, "step": 20230 }, { "epoch": 0.5, "learning_rate": 1.905233345647088e-05, "loss": 0.0164, "step": 20240 }, { "epoch": 0.5, "learning_rate": 1.905184090629233e-05, "loss": 0.0153, "step": 20250 }, { "epoch": 0.5, "learning_rate": 1.9051348356113782e-05, "loss": 0.0189, "step": 20260 }, { "epoch": 0.5, "learning_rate": 1.905085580593523e-05, "loss": 0.0223, "step": 20270 }, { "epoch": 0.5, "learning_rate": 1.905036325575668e-05, "loss": 0.0145, "step": 20280 }, { "epoch": 0.5, "learning_rate": 1.9049870705578133e-05, "loss": 0.0157, "step": 20290 }, { "epoch": 0.5, "learning_rate": 1.9049378155399582e-05, "loss": 0.0149, "step": 20300 }, { "epoch": 0.5, "learning_rate": 1.904888560522103e-05, "loss": 0.0154, "step": 20310 }, { "epoch": 0.5, "learning_rate": 1.9048393055042484e-05, "loss": 0.0222, "step": 20320 }, { "epoch": 0.5, "learning_rate": 1.9047900504863933e-05, "loss": 0.0149, "step": 20330 }, { "epoch": 0.5, "learning_rate": 1.9047407954685382e-05, "loss": 0.0152, "step": 20340 }, { "epoch": 0.5, "learning_rate": 1.9046915404506835e-05, "loss": 0.0166, "step": 20350 }, { "epoch": 0.5, "learning_rate": 1.9046422854328288e-05, "loss": 0.0164, "step": 20360 }, { "epoch": 0.5, "learning_rate": 1.9045930304149737e-05, "loss": 0.0173, "step": 20370 }, { "epoch": 0.5, "learning_rate": 1.904543775397119e-05, "loss": 0.0155, "step": 20380 }, { "epoch": 0.5, "learning_rate": 1.904494520379264e-05, "loss": 0.0267, "step": 20390 }, { "epoch": 0.5, "learning_rate": 1.904445265361409e-05, "loss": 0.017, "step": 20400 }, { "epoch": 0.5, "learning_rate": 1.904396010343554e-05, "loss": 0.0252, "step": 20410 }, { "epoch": 0.5, "learning_rate": 1.904346755325699e-05, "loss": 0.0153, "step": 20420 }, { "epoch": 0.5, "learning_rate": 1.9042975003078442e-05, "loss": 0.0212, "step": 20430 }, { "epoch": 0.5, "learning_rate": 1.904248245289989e-05, "loss": 0.0272, "step": 20440 }, { "epoch": 0.5, "learning_rate": 1.904198990272134e-05, "loss": 0.0236, "step": 20450 }, { "epoch": 0.5, "learning_rate": 1.9041497352542793e-05, "loss": 0.0167, "step": 20460 }, { "epoch": 0.5, "learning_rate": 1.904100480236424e-05, "loss": 0.0286, "step": 20470 }, { "epoch": 0.5, "learning_rate": 1.904051225218569e-05, "loss": 0.0184, "step": 20480 }, { "epoch": 0.5, "learning_rate": 1.9040019702007143e-05, "loss": 0.0158, "step": 20490 }, { "epoch": 0.5, "learning_rate": 1.9039527151828593e-05, "loss": 0.0166, "step": 20500 }, { "epoch": 0.5, "learning_rate": 1.9039034601650045e-05, "loss": 0.0153, "step": 20510 }, { "epoch": 0.5, "learning_rate": 1.9038542051471494e-05, "loss": 0.017, "step": 20520 }, { "epoch": 0.5, "learning_rate": 1.9038049501292947e-05, "loss": 0.0242, "step": 20530 }, { "epoch": 0.5, "learning_rate": 1.9037556951114396e-05, "loss": 0.019, "step": 20540 }, { "epoch": 0.5, "learning_rate": 1.903706440093585e-05, "loss": 0.0155, "step": 20550 }, { "epoch": 0.51, "learning_rate": 1.9036571850757298e-05, "loss": 0.0175, "step": 20560 }, { "epoch": 0.51, "learning_rate": 1.9036079300578747e-05, "loss": 0.0206, "step": 20570 }, { "epoch": 0.51, "learning_rate": 1.90355867504002e-05, "loss": 0.017, "step": 20580 }, { "epoch": 0.51, "learning_rate": 1.903509420022165e-05, "loss": 0.0145, "step": 20590 }, { "epoch": 0.51, "learning_rate": 1.90346016500431e-05, "loss": 0.026, "step": 20600 }, { "epoch": 0.51, "learning_rate": 1.903410909986455e-05, "loss": 0.014, "step": 20610 }, { "epoch": 0.51, "learning_rate": 1.9033616549686e-05, "loss": 0.0134, "step": 20620 }, { "epoch": 0.51, "learning_rate": 1.9033123999507452e-05, "loss": 0.0167, "step": 20630 }, { "epoch": 0.51, "learning_rate": 1.90326314493289e-05, "loss": 0.0173, "step": 20640 }, { "epoch": 0.51, "learning_rate": 1.9032138899150353e-05, "loss": 0.016, "step": 20650 }, { "epoch": 0.51, "learning_rate": 1.9031646348971803e-05, "loss": 0.0188, "step": 20660 }, { "epoch": 0.51, "learning_rate": 1.9031153798793255e-05, "loss": 0.0243, "step": 20670 }, { "epoch": 0.51, "learning_rate": 1.9030661248614704e-05, "loss": 0.0146, "step": 20680 }, { "epoch": 0.51, "learning_rate": 1.9030168698436157e-05, "loss": 0.0248, "step": 20690 }, { "epoch": 0.51, "learning_rate": 1.9029676148257606e-05, "loss": 0.0298, "step": 20700 }, { "epoch": 0.51, "learning_rate": 1.9029183598079055e-05, "loss": 0.0153, "step": 20710 }, { "epoch": 0.51, "learning_rate": 1.9028691047900508e-05, "loss": 0.0265, "step": 20720 }, { "epoch": 0.51, "learning_rate": 1.9028198497721957e-05, "loss": 0.0187, "step": 20730 }, { "epoch": 0.51, "learning_rate": 1.9027705947543406e-05, "loss": 0.0159, "step": 20740 }, { "epoch": 0.51, "learning_rate": 1.902721339736486e-05, "loss": 0.015, "step": 20750 }, { "epoch": 0.51, "learning_rate": 1.9026720847186308e-05, "loss": 0.0167, "step": 20760 }, { "epoch": 0.51, "learning_rate": 1.9026228297007757e-05, "loss": 0.0152, "step": 20770 }, { "epoch": 0.51, "learning_rate": 1.902573574682921e-05, "loss": 0.016, "step": 20780 }, { "epoch": 0.51, "learning_rate": 1.902524319665066e-05, "loss": 0.0144, "step": 20790 }, { "epoch": 0.51, "learning_rate": 1.902475064647211e-05, "loss": 0.0148, "step": 20800 }, { "epoch": 0.51, "learning_rate": 1.9024258096293564e-05, "loss": 0.0159, "step": 20810 }, { "epoch": 0.51, "learning_rate": 1.9023765546115013e-05, "loss": 0.015, "step": 20820 }, { "epoch": 0.51, "learning_rate": 1.9023272995936462e-05, "loss": 0.018, "step": 20830 }, { "epoch": 0.51, "learning_rate": 1.9022780445757914e-05, "loss": 0.0185, "step": 20840 }, { "epoch": 0.51, "learning_rate": 1.9022287895579364e-05, "loss": 0.0201, "step": 20850 }, { "epoch": 0.51, "learning_rate": 1.9021795345400816e-05, "loss": 0.0167, "step": 20860 }, { "epoch": 0.51, "learning_rate": 1.9021302795222265e-05, "loss": 0.0145, "step": 20870 }, { "epoch": 0.51, "learning_rate": 1.9020810245043714e-05, "loss": 0.0174, "step": 20880 }, { "epoch": 0.51, "learning_rate": 1.9020317694865167e-05, "loss": 0.0163, "step": 20890 }, { "epoch": 0.51, "learning_rate": 1.9019825144686616e-05, "loss": 0.0157, "step": 20900 }, { "epoch": 0.51, "learning_rate": 1.9019332594508065e-05, "loss": 0.0231, "step": 20910 }, { "epoch": 0.51, "learning_rate": 1.9018840044329518e-05, "loss": 0.0151, "step": 20920 }, { "epoch": 0.51, "learning_rate": 1.9018347494150967e-05, "loss": 0.018, "step": 20930 }, { "epoch": 0.51, "learning_rate": 1.901785494397242e-05, "loss": 0.0172, "step": 20940 }, { "epoch": 0.51, "learning_rate": 1.901736239379387e-05, "loss": 0.0246, "step": 20950 }, { "epoch": 0.51, "learning_rate": 1.901686984361532e-05, "loss": 0.0161, "step": 20960 }, { "epoch": 0.52, "learning_rate": 1.901637729343677e-05, "loss": 0.0132, "step": 20970 }, { "epoch": 0.52, "learning_rate": 1.9015884743258223e-05, "loss": 0.0155, "step": 20980 }, { "epoch": 0.52, "learning_rate": 1.9015392193079672e-05, "loss": 0.0159, "step": 20990 }, { "epoch": 0.52, "learning_rate": 1.901489964290112e-05, "loss": 0.0167, "step": 21000 }, { "epoch": 0.52, "learning_rate": 1.9014407092722574e-05, "loss": 0.0141, "step": 21010 }, { "epoch": 0.52, "learning_rate": 1.9013914542544023e-05, "loss": 0.022, "step": 21020 }, { "epoch": 0.52, "learning_rate": 1.9013421992365472e-05, "loss": 0.0159, "step": 21030 }, { "epoch": 0.52, "learning_rate": 1.9012929442186924e-05, "loss": 0.0182, "step": 21040 }, { "epoch": 0.52, "learning_rate": 1.9012436892008374e-05, "loss": 0.0148, "step": 21050 }, { "epoch": 0.52, "learning_rate": 1.9011944341829823e-05, "loss": 0.0181, "step": 21060 }, { "epoch": 0.52, "learning_rate": 1.9011451791651275e-05, "loss": 0.0175, "step": 21070 }, { "epoch": 0.52, "learning_rate": 1.9010959241472728e-05, "loss": 0.014, "step": 21080 }, { "epoch": 0.52, "learning_rate": 1.9010466691294177e-05, "loss": 0.0254, "step": 21090 }, { "epoch": 0.52, "learning_rate": 1.900997414111563e-05, "loss": 0.0156, "step": 21100 }, { "epoch": 0.52, "learning_rate": 1.900948159093708e-05, "loss": 0.015, "step": 21110 }, { "epoch": 0.52, "learning_rate": 1.900898904075853e-05, "loss": 0.0165, "step": 21120 }, { "epoch": 0.52, "learning_rate": 1.900849649057998e-05, "loss": 0.0244, "step": 21130 }, { "epoch": 0.52, "learning_rate": 1.900800394040143e-05, "loss": 0.0227, "step": 21140 }, { "epoch": 0.52, "learning_rate": 1.9007511390222882e-05, "loss": 0.0172, "step": 21150 }, { "epoch": 0.52, "learning_rate": 1.900701884004433e-05, "loss": 0.0131, "step": 21160 }, { "epoch": 0.52, "learning_rate": 1.900652628986578e-05, "loss": 0.0176, "step": 21170 }, { "epoch": 0.52, "learning_rate": 1.9006033739687233e-05, "loss": 0.0133, "step": 21180 }, { "epoch": 0.52, "learning_rate": 1.9005541189508682e-05, "loss": 0.0152, "step": 21190 }, { "epoch": 0.52, "learning_rate": 1.900504863933013e-05, "loss": 0.0157, "step": 21200 }, { "epoch": 0.52, "learning_rate": 1.9004556089151584e-05, "loss": 0.0164, "step": 21210 }, { "epoch": 0.52, "learning_rate": 1.9004063538973033e-05, "loss": 0.0178, "step": 21220 }, { "epoch": 0.52, "learning_rate": 1.9003570988794485e-05, "loss": 0.0175, "step": 21230 }, { "epoch": 0.52, "learning_rate": 1.9003078438615935e-05, "loss": 0.013, "step": 21240 }, { "epoch": 0.52, "learning_rate": 1.9002585888437387e-05, "loss": 0.0176, "step": 21250 }, { "epoch": 0.52, "learning_rate": 1.9002093338258836e-05, "loss": 0.0188, "step": 21260 }, { "epoch": 0.52, "learning_rate": 1.900160078808029e-05, "loss": 0.0194, "step": 21270 }, { "epoch": 0.52, "learning_rate": 1.9001108237901738e-05, "loss": 0.0153, "step": 21280 }, { "epoch": 0.52, "learning_rate": 1.9000615687723187e-05, "loss": 0.0232, "step": 21290 }, { "epoch": 0.52, "learning_rate": 1.900012313754464e-05, "loss": 0.0171, "step": 21300 }, { "epoch": 0.52, "learning_rate": 1.899963058736609e-05, "loss": 0.0173, "step": 21310 }, { "epoch": 0.52, "learning_rate": 1.899913803718754e-05, "loss": 0.0167, "step": 21320 }, { "epoch": 0.52, "learning_rate": 1.899864548700899e-05, "loss": 0.0141, "step": 21330 }, { "epoch": 0.52, "learning_rate": 1.899815293683044e-05, "loss": 0.0154, "step": 21340 }, { "epoch": 0.52, "learning_rate": 1.8997660386651892e-05, "loss": 0.0261, "step": 21350 }, { "epoch": 0.52, "learning_rate": 1.899716783647334e-05, "loss": 0.0143, "step": 21360 }, { "epoch": 0.52, "learning_rate": 1.8996675286294794e-05, "loss": 0.0181, "step": 21370 }, { "epoch": 0.53, "learning_rate": 1.8996182736116243e-05, "loss": 0.0157, "step": 21380 }, { "epoch": 0.53, "learning_rate": 1.8995690185937695e-05, "loss": 0.0162, "step": 21390 }, { "epoch": 0.53, "learning_rate": 1.8995197635759145e-05, "loss": 0.0188, "step": 21400 }, { "epoch": 0.53, "learning_rate": 1.8994705085580597e-05, "loss": 0.0138, "step": 21410 }, { "epoch": 0.53, "learning_rate": 1.8994212535402046e-05, "loss": 0.015, "step": 21420 }, { "epoch": 0.53, "learning_rate": 1.8993719985223495e-05, "loss": 0.0143, "step": 21430 }, { "epoch": 0.53, "learning_rate": 1.8993227435044948e-05, "loss": 0.017, "step": 21440 }, { "epoch": 0.53, "learning_rate": 1.8992734884866397e-05, "loss": 0.0166, "step": 21450 }, { "epoch": 0.53, "learning_rate": 1.8992242334687846e-05, "loss": 0.0152, "step": 21460 }, { "epoch": 0.53, "learning_rate": 1.89917497845093e-05, "loss": 0.0328, "step": 21470 }, { "epoch": 0.53, "learning_rate": 1.8991257234330748e-05, "loss": 0.0148, "step": 21480 }, { "epoch": 0.53, "learning_rate": 1.8990764684152197e-05, "loss": 0.0144, "step": 21490 }, { "epoch": 0.53, "learning_rate": 1.899027213397365e-05, "loss": 0.0157, "step": 21500 }, { "epoch": 0.53, "learning_rate": 1.89897795837951e-05, "loss": 0.0138, "step": 21510 }, { "epoch": 0.53, "learning_rate": 1.898928703361655e-05, "loss": 0.0151, "step": 21520 }, { "epoch": 0.53, "learning_rate": 1.8988794483438004e-05, "loss": 0.02, "step": 21530 }, { "epoch": 0.53, "learning_rate": 1.8988301933259453e-05, "loss": 0.0171, "step": 21540 }, { "epoch": 0.53, "learning_rate": 1.8987809383080902e-05, "loss": 0.0157, "step": 21550 }, { "epoch": 0.53, "learning_rate": 1.8987316832902355e-05, "loss": 0.0152, "step": 21560 }, { "epoch": 0.53, "learning_rate": 1.8986824282723804e-05, "loss": 0.0136, "step": 21570 }, { "epoch": 0.53, "learning_rate": 1.8986331732545256e-05, "loss": 0.0174, "step": 21580 }, { "epoch": 0.53, "learning_rate": 1.8985839182366706e-05, "loss": 0.0193, "step": 21590 }, { "epoch": 0.53, "learning_rate": 1.8985346632188155e-05, "loss": 0.0147, "step": 21600 }, { "epoch": 0.53, "learning_rate": 1.8984854082009607e-05, "loss": 0.0242, "step": 21610 }, { "epoch": 0.53, "learning_rate": 1.8984361531831056e-05, "loss": 0.0163, "step": 21620 }, { "epoch": 0.53, "learning_rate": 1.8983868981652506e-05, "loss": 0.0178, "step": 21630 }, { "epoch": 0.53, "learning_rate": 1.8983376431473958e-05, "loss": 0.0142, "step": 21640 }, { "epoch": 0.53, "learning_rate": 1.8982883881295407e-05, "loss": 0.0224, "step": 21650 }, { "epoch": 0.53, "learning_rate": 1.898239133111686e-05, "loss": 0.016, "step": 21660 }, { "epoch": 0.53, "learning_rate": 1.898189878093831e-05, "loss": 0.0256, "step": 21670 }, { "epoch": 0.53, "learning_rate": 1.898140623075976e-05, "loss": 0.023, "step": 21680 }, { "epoch": 0.53, "learning_rate": 1.898091368058121e-05, "loss": 0.0147, "step": 21690 }, { "epoch": 0.53, "learning_rate": 1.8980421130402663e-05, "loss": 0.0264, "step": 21700 }, { "epoch": 0.53, "learning_rate": 1.8979928580224112e-05, "loss": 0.0264, "step": 21710 }, { "epoch": 0.53, "learning_rate": 1.897943603004556e-05, "loss": 0.0143, "step": 21720 }, { "epoch": 0.53, "learning_rate": 1.8978943479867014e-05, "loss": 0.0162, "step": 21730 }, { "epoch": 0.53, "learning_rate": 1.8978450929688463e-05, "loss": 0.016, "step": 21740 }, { "epoch": 0.53, "learning_rate": 1.8977958379509912e-05, "loss": 0.015, "step": 21750 }, { "epoch": 0.53, "learning_rate": 1.8977465829331365e-05, "loss": 0.0212, "step": 21760 }, { "epoch": 0.53, "learning_rate": 1.8976973279152814e-05, "loss": 0.0211, "step": 21770 }, { "epoch": 0.54, "learning_rate": 1.8976480728974263e-05, "loss": 0.0156, "step": 21780 }, { "epoch": 0.54, "learning_rate": 1.8975988178795716e-05, "loss": 0.013, "step": 21790 }, { "epoch": 0.54, "learning_rate": 1.8975495628617168e-05, "loss": 0.0163, "step": 21800 }, { "epoch": 0.54, "learning_rate": 1.8975003078438617e-05, "loss": 0.0153, "step": 21810 }, { "epoch": 0.54, "learning_rate": 1.897451052826007e-05, "loss": 0.0166, "step": 21820 }, { "epoch": 0.54, "learning_rate": 1.897401797808152e-05, "loss": 0.027, "step": 21830 }, { "epoch": 0.54, "learning_rate": 1.897352542790297e-05, "loss": 0.0157, "step": 21840 }, { "epoch": 0.54, "learning_rate": 1.897303287772442e-05, "loss": 0.0131, "step": 21850 }, { "epoch": 0.54, "learning_rate": 1.897254032754587e-05, "loss": 0.0167, "step": 21860 }, { "epoch": 0.54, "learning_rate": 1.8972047777367322e-05, "loss": 0.0219, "step": 21870 }, { "epoch": 0.54, "learning_rate": 1.897155522718877e-05, "loss": 0.0179, "step": 21880 }, { "epoch": 0.54, "learning_rate": 1.897106267701022e-05, "loss": 0.0252, "step": 21890 }, { "epoch": 0.54, "learning_rate": 1.8970570126831673e-05, "loss": 0.0208, "step": 21900 }, { "epoch": 0.54, "learning_rate": 1.8970077576653122e-05, "loss": 0.0131, "step": 21910 }, { "epoch": 0.54, "learning_rate": 1.896958502647457e-05, "loss": 0.0156, "step": 21920 }, { "epoch": 0.54, "learning_rate": 1.8969092476296024e-05, "loss": 0.0172, "step": 21930 }, { "epoch": 0.54, "learning_rate": 1.8968599926117473e-05, "loss": 0.0184, "step": 21940 }, { "epoch": 0.54, "learning_rate": 1.8968107375938926e-05, "loss": 0.0153, "step": 21950 }, { "epoch": 0.54, "learning_rate": 1.8967614825760375e-05, "loss": 0.0178, "step": 21960 }, { "epoch": 0.54, "learning_rate": 1.8967122275581827e-05, "loss": 0.0205, "step": 21970 }, { "epoch": 0.54, "learning_rate": 1.8966629725403277e-05, "loss": 0.015, "step": 21980 }, { "epoch": 0.54, "learning_rate": 1.896613717522473e-05, "loss": 0.0173, "step": 21990 }, { "epoch": 0.54, "learning_rate": 1.8965644625046178e-05, "loss": 0.0157, "step": 22000 }, { "epoch": 0.54, "learning_rate": 1.8965152074867627e-05, "loss": 0.0185, "step": 22010 }, { "epoch": 0.54, "learning_rate": 1.896465952468908e-05, "loss": 0.0232, "step": 22020 }, { "epoch": 0.54, "learning_rate": 1.896416697451053e-05, "loss": 0.014, "step": 22030 }, { "epoch": 0.54, "learning_rate": 1.896367442433198e-05, "loss": 0.0142, "step": 22040 }, { "epoch": 0.54, "learning_rate": 1.896318187415343e-05, "loss": 0.0169, "step": 22050 }, { "epoch": 0.54, "learning_rate": 1.896268932397488e-05, "loss": 0.0215, "step": 22060 }, { "epoch": 0.54, "learning_rate": 1.8962196773796332e-05, "loss": 0.0153, "step": 22070 }, { "epoch": 0.54, "learning_rate": 1.896170422361778e-05, "loss": 0.0173, "step": 22080 }, { "epoch": 0.54, "learning_rate": 1.8961211673439234e-05, "loss": 0.0154, "step": 22090 }, { "epoch": 0.54, "learning_rate": 1.8960719123260683e-05, "loss": 0.0229, "step": 22100 }, { "epoch": 0.54, "learning_rate": 1.8960226573082136e-05, "loss": 0.0156, "step": 22110 }, { "epoch": 0.54, "learning_rate": 1.8959734022903585e-05, "loss": 0.0158, "step": 22120 }, { "epoch": 0.54, "learning_rate": 1.8959241472725038e-05, "loss": 0.015, "step": 22130 }, { "epoch": 0.54, "learning_rate": 1.8958748922546487e-05, "loss": 0.0252, "step": 22140 }, { "epoch": 0.54, "learning_rate": 1.8958256372367936e-05, "loss": 0.0151, "step": 22150 }, { "epoch": 0.54, "learning_rate": 1.895776382218939e-05, "loss": 0.0156, "step": 22160 }, { "epoch": 0.54, "learning_rate": 1.8957271272010838e-05, "loss": 0.0151, "step": 22170 }, { "epoch": 0.54, "learning_rate": 1.8956778721832287e-05, "loss": 0.0172, "step": 22180 }, { "epoch": 0.55, "learning_rate": 1.895628617165374e-05, "loss": 0.027, "step": 22190 }, { "epoch": 0.55, "learning_rate": 1.895579362147519e-05, "loss": 0.0269, "step": 22200 }, { "epoch": 0.55, "learning_rate": 1.8955301071296637e-05, "loss": 0.0192, "step": 22210 }, { "epoch": 0.55, "learning_rate": 1.895480852111809e-05, "loss": 0.0191, "step": 22220 }, { "epoch": 0.55, "learning_rate": 1.895431597093954e-05, "loss": 0.0172, "step": 22230 }, { "epoch": 0.55, "learning_rate": 1.8953823420760992e-05, "loss": 0.0193, "step": 22240 }, { "epoch": 0.55, "learning_rate": 1.8953330870582444e-05, "loss": 0.0211, "step": 22250 }, { "epoch": 0.55, "learning_rate": 1.8952838320403893e-05, "loss": 0.0154, "step": 22260 }, { "epoch": 0.55, "learning_rate": 1.8952345770225343e-05, "loss": 0.0156, "step": 22270 }, { "epoch": 0.55, "learning_rate": 1.8951853220046795e-05, "loss": 0.0156, "step": 22280 }, { "epoch": 0.55, "learning_rate": 1.8951360669868244e-05, "loss": 0.016, "step": 22290 }, { "epoch": 0.55, "learning_rate": 1.8950868119689697e-05, "loss": 0.0141, "step": 22300 }, { "epoch": 0.55, "learning_rate": 1.8950375569511146e-05, "loss": 0.0156, "step": 22310 }, { "epoch": 0.55, "learning_rate": 1.8949883019332595e-05, "loss": 0.0135, "step": 22320 }, { "epoch": 0.55, "learning_rate": 1.8949390469154048e-05, "loss": 0.0157, "step": 22330 }, { "epoch": 0.55, "learning_rate": 1.8948897918975497e-05, "loss": 0.0197, "step": 22340 }, { "epoch": 0.55, "learning_rate": 1.8948405368796946e-05, "loss": 0.0132, "step": 22350 }, { "epoch": 0.55, "learning_rate": 1.89479128186184e-05, "loss": 0.0228, "step": 22360 }, { "epoch": 0.55, "learning_rate": 1.8947420268439848e-05, "loss": 0.0263, "step": 22370 }, { "epoch": 0.55, "learning_rate": 1.89469277182613e-05, "loss": 0.0169, "step": 22380 }, { "epoch": 0.55, "learning_rate": 1.894643516808275e-05, "loss": 0.0148, "step": 22390 }, { "epoch": 0.55, "learning_rate": 1.8945942617904202e-05, "loss": 0.0153, "step": 22400 }, { "epoch": 0.55, "learning_rate": 1.894545006772565e-05, "loss": 0.0168, "step": 22410 }, { "epoch": 0.55, "learning_rate": 1.8944957517547103e-05, "loss": 0.019, "step": 22420 }, { "epoch": 0.55, "learning_rate": 1.8944464967368553e-05, "loss": 0.0147, "step": 22430 }, { "epoch": 0.55, "learning_rate": 1.8943972417190002e-05, "loss": 0.0238, "step": 22440 }, { "epoch": 0.55, "learning_rate": 1.8943479867011454e-05, "loss": 0.0176, "step": 22450 }, { "epoch": 0.55, "learning_rate": 1.8942987316832903e-05, "loss": 0.0146, "step": 22460 }, { "epoch": 0.55, "learning_rate": 1.8942494766654353e-05, "loss": 0.0174, "step": 22470 }, { "epoch": 0.55, "learning_rate": 1.8942002216475805e-05, "loss": 0.0196, "step": 22480 }, { "epoch": 0.55, "learning_rate": 1.8941509666297254e-05, "loss": 0.0173, "step": 22490 }, { "epoch": 0.55, "learning_rate": 1.8941017116118703e-05, "loss": 0.0158, "step": 22500 }, { "epoch": 0.55, "learning_rate": 1.8940524565940156e-05, "loss": 0.0175, "step": 22510 }, { "epoch": 0.55, "learning_rate": 1.894003201576161e-05, "loss": 0.0241, "step": 22520 }, { "epoch": 0.55, "learning_rate": 1.8939539465583058e-05, "loss": 0.0142, "step": 22530 }, { "epoch": 0.55, "learning_rate": 1.893904691540451e-05, "loss": 0.0165, "step": 22540 }, { "epoch": 0.55, "learning_rate": 1.893855436522596e-05, "loss": 0.0153, "step": 22550 }, { "epoch": 0.55, "learning_rate": 1.8938061815047412e-05, "loss": 0.0186, "step": 22560 }, { "epoch": 0.55, "learning_rate": 1.893756926486886e-05, "loss": 0.026, "step": 22570 }, { "epoch": 0.55, "learning_rate": 1.893707671469031e-05, "loss": 0.0167, "step": 22580 }, { "epoch": 0.55, "learning_rate": 1.8936584164511763e-05, "loss": 0.0163, "step": 22590 }, { "epoch": 0.56, "learning_rate": 1.8936091614333212e-05, "loss": 0.0128, "step": 22600 }, { "epoch": 0.56, "learning_rate": 1.893559906415466e-05, "loss": 0.0144, "step": 22610 }, { "epoch": 0.56, "learning_rate": 1.8935106513976114e-05, "loss": 0.0152, "step": 22620 }, { "epoch": 0.56, "learning_rate": 1.8934613963797563e-05, "loss": 0.014, "step": 22630 }, { "epoch": 0.56, "learning_rate": 1.8934121413619012e-05, "loss": 0.0157, "step": 22640 }, { "epoch": 0.56, "learning_rate": 1.8933628863440464e-05, "loss": 0.0146, "step": 22650 }, { "epoch": 0.56, "learning_rate": 1.8933136313261914e-05, "loss": 0.0149, "step": 22660 }, { "epoch": 0.56, "learning_rate": 1.8932643763083366e-05, "loss": 0.0147, "step": 22670 }, { "epoch": 0.56, "learning_rate": 1.8932151212904815e-05, "loss": 0.0147, "step": 22680 }, { "epoch": 0.56, "learning_rate": 1.8931658662726268e-05, "loss": 0.0154, "step": 22690 }, { "epoch": 0.56, "learning_rate": 1.8931166112547717e-05, "loss": 0.0163, "step": 22700 }, { "epoch": 0.56, "learning_rate": 1.893067356236917e-05, "loss": 0.0135, "step": 22710 }, { "epoch": 0.56, "learning_rate": 1.893018101219062e-05, "loss": 0.0159, "step": 22720 }, { "epoch": 0.56, "learning_rate": 1.8929688462012068e-05, "loss": 0.0134, "step": 22730 }, { "epoch": 0.56, "learning_rate": 1.892919591183352e-05, "loss": 0.0163, "step": 22740 }, { "epoch": 0.56, "learning_rate": 1.892870336165497e-05, "loss": 0.025, "step": 22750 }, { "epoch": 0.56, "learning_rate": 1.8928210811476422e-05, "loss": 0.0154, "step": 22760 }, { "epoch": 0.56, "learning_rate": 1.892771826129787e-05, "loss": 0.0132, "step": 22770 }, { "epoch": 0.56, "learning_rate": 1.892722571111932e-05, "loss": 0.018, "step": 22780 }, { "epoch": 0.56, "learning_rate": 1.8926733160940773e-05, "loss": 0.0253, "step": 22790 }, { "epoch": 0.56, "learning_rate": 1.8926240610762222e-05, "loss": 0.0135, "step": 22800 }, { "epoch": 0.56, "learning_rate": 1.8925748060583674e-05, "loss": 0.0352, "step": 22810 }, { "epoch": 0.56, "learning_rate": 1.8925255510405124e-05, "loss": 0.0133, "step": 22820 }, { "epoch": 0.56, "learning_rate": 1.8924762960226576e-05, "loss": 0.0152, "step": 22830 }, { "epoch": 0.56, "learning_rate": 1.8924270410048025e-05, "loss": 0.0156, "step": 22840 }, { "epoch": 0.56, "learning_rate": 1.8923777859869478e-05, "loss": 0.0158, "step": 22850 }, { "epoch": 0.56, "learning_rate": 1.8923285309690927e-05, "loss": 0.0152, "step": 22860 }, { "epoch": 0.56, "learning_rate": 1.8922792759512376e-05, "loss": 0.0173, "step": 22870 }, { "epoch": 0.56, "learning_rate": 1.892230020933383e-05, "loss": 0.0132, "step": 22880 }, { "epoch": 0.56, "learning_rate": 1.8921807659155278e-05, "loss": 0.0161, "step": 22890 }, { "epoch": 0.56, "learning_rate": 1.8921315108976727e-05, "loss": 0.015, "step": 22900 }, { "epoch": 0.56, "learning_rate": 1.892082255879818e-05, "loss": 0.0205, "step": 22910 }, { "epoch": 0.56, "learning_rate": 1.892033000861963e-05, "loss": 0.0145, "step": 22920 }, { "epoch": 0.56, "learning_rate": 1.8919837458441078e-05, "loss": 0.0149, "step": 22930 }, { "epoch": 0.56, "learning_rate": 1.891934490826253e-05, "loss": 0.0155, "step": 22940 }, { "epoch": 0.56, "learning_rate": 1.891885235808398e-05, "loss": 0.0152, "step": 22950 }, { "epoch": 0.56, "learning_rate": 1.8918359807905432e-05, "loss": 0.015, "step": 22960 }, { "epoch": 0.56, "learning_rate": 1.8917867257726885e-05, "loss": 0.0164, "step": 22970 }, { "epoch": 0.56, "learning_rate": 1.8917374707548334e-05, "loss": 0.0208, "step": 22980 }, { "epoch": 0.56, "learning_rate": 1.8916882157369783e-05, "loss": 0.0155, "step": 22990 }, { "epoch": 0.57, "learning_rate": 1.8916389607191235e-05, "loss": 0.0197, "step": 23000 }, { "epoch": 0.57, "learning_rate": 1.8915897057012685e-05, "loss": 0.0185, "step": 23010 }, { "epoch": 0.57, "learning_rate": 1.8915404506834137e-05, "loss": 0.0188, "step": 23020 }, { "epoch": 0.57, "learning_rate": 1.8914911956655586e-05, "loss": 0.0149, "step": 23030 }, { "epoch": 0.57, "learning_rate": 1.8914419406477035e-05, "loss": 0.0161, "step": 23040 }, { "epoch": 0.57, "learning_rate": 1.8913926856298488e-05, "loss": 0.015, "step": 23050 }, { "epoch": 0.57, "learning_rate": 1.8913434306119937e-05, "loss": 0.0179, "step": 23060 }, { "epoch": 0.57, "learning_rate": 1.8912941755941386e-05, "loss": 0.0251, "step": 23070 }, { "epoch": 0.57, "learning_rate": 1.891244920576284e-05, "loss": 0.0195, "step": 23080 }, { "epoch": 0.57, "learning_rate": 1.8911956655584288e-05, "loss": 0.0258, "step": 23090 }, { "epoch": 0.57, "learning_rate": 1.891146410540574e-05, "loss": 0.0292, "step": 23100 }, { "epoch": 0.57, "learning_rate": 1.891097155522719e-05, "loss": 0.0205, "step": 23110 }, { "epoch": 0.57, "learning_rate": 1.8910479005048642e-05, "loss": 0.0163, "step": 23120 }, { "epoch": 0.57, "learning_rate": 1.890998645487009e-05, "loss": 0.0149, "step": 23130 }, { "epoch": 0.57, "learning_rate": 1.8909493904691544e-05, "loss": 0.0158, "step": 23140 }, { "epoch": 0.57, "learning_rate": 1.8909001354512993e-05, "loss": 0.0173, "step": 23150 }, { "epoch": 0.57, "learning_rate": 1.8908508804334442e-05, "loss": 0.0146, "step": 23160 }, { "epoch": 0.57, "learning_rate": 1.8908016254155895e-05, "loss": 0.0259, "step": 23170 }, { "epoch": 0.57, "learning_rate": 1.8907523703977344e-05, "loss": 0.0141, "step": 23180 }, { "epoch": 0.57, "learning_rate": 1.8907031153798793e-05, "loss": 0.0164, "step": 23190 }, { "epoch": 0.57, "learning_rate": 1.8906538603620245e-05, "loss": 0.0153, "step": 23200 }, { "epoch": 0.57, "learning_rate": 1.8906046053441695e-05, "loss": 0.0172, "step": 23210 }, { "epoch": 0.57, "learning_rate": 1.8905553503263144e-05, "loss": 0.0263, "step": 23220 }, { "epoch": 0.57, "learning_rate": 1.8905060953084596e-05, "loss": 0.0134, "step": 23230 }, { "epoch": 0.57, "learning_rate": 1.890456840290605e-05, "loss": 0.0142, "step": 23240 }, { "epoch": 0.57, "learning_rate": 1.8904075852727498e-05, "loss": 0.0221, "step": 23250 }, { "epoch": 0.57, "learning_rate": 1.890358330254895e-05, "loss": 0.014, "step": 23260 }, { "epoch": 0.57, "learning_rate": 1.89030907523704e-05, "loss": 0.0172, "step": 23270 }, { "epoch": 0.57, "learning_rate": 1.8902598202191852e-05, "loss": 0.0144, "step": 23280 }, { "epoch": 0.57, "learning_rate": 1.89021056520133e-05, "loss": 0.0144, "step": 23290 }, { "epoch": 0.57, "learning_rate": 1.890161310183475e-05, "loss": 0.0281, "step": 23300 }, { "epoch": 0.57, "learning_rate": 1.8901120551656203e-05, "loss": 0.0151, "step": 23310 }, { "epoch": 0.57, "learning_rate": 1.8900628001477652e-05, "loss": 0.0141, "step": 23320 }, { "epoch": 0.57, "learning_rate": 1.89001354512991e-05, "loss": 0.0185, "step": 23330 }, { "epoch": 0.57, "learning_rate": 1.8899642901120554e-05, "loss": 0.0184, "step": 23340 }, { "epoch": 0.57, "learning_rate": 1.8899150350942003e-05, "loss": 0.0149, "step": 23350 }, { "epoch": 0.57, "learning_rate": 1.8898657800763452e-05, "loss": 0.0212, "step": 23360 }, { "epoch": 0.57, "learning_rate": 1.8898165250584905e-05, "loss": 0.0218, "step": 23370 }, { "epoch": 0.57, "learning_rate": 1.8897672700406354e-05, "loss": 0.0243, "step": 23380 }, { "epoch": 0.57, "learning_rate": 1.8897180150227806e-05, "loss": 0.0173, "step": 23390 }, { "epoch": 0.57, "learning_rate": 1.8896687600049256e-05, "loss": 0.0149, "step": 23400 }, { "epoch": 0.58, "learning_rate": 1.8896195049870708e-05, "loss": 0.0206, "step": 23410 }, { "epoch": 0.58, "learning_rate": 1.8895702499692157e-05, "loss": 0.0136, "step": 23420 }, { "epoch": 0.58, "learning_rate": 1.889520994951361e-05, "loss": 0.0142, "step": 23430 }, { "epoch": 0.58, "learning_rate": 1.889471739933506e-05, "loss": 0.0138, "step": 23440 }, { "epoch": 0.58, "learning_rate": 1.8894224849156508e-05, "loss": 0.0142, "step": 23450 }, { "epoch": 0.58, "learning_rate": 1.889373229897796e-05, "loss": 0.0134, "step": 23460 }, { "epoch": 0.58, "learning_rate": 1.889323974879941e-05, "loss": 0.0166, "step": 23470 }, { "epoch": 0.58, "learning_rate": 1.8892747198620862e-05, "loss": 0.0153, "step": 23480 }, { "epoch": 0.58, "learning_rate": 1.889225464844231e-05, "loss": 0.016, "step": 23490 }, { "epoch": 0.58, "learning_rate": 1.889176209826376e-05, "loss": 0.0154, "step": 23500 }, { "epoch": 0.58, "learning_rate": 1.8891269548085213e-05, "loss": 0.0158, "step": 23510 }, { "epoch": 0.58, "learning_rate": 1.8890776997906662e-05, "loss": 0.0173, "step": 23520 }, { "epoch": 0.58, "learning_rate": 1.8890284447728115e-05, "loss": 0.0167, "step": 23530 }, { "epoch": 0.58, "learning_rate": 1.8889791897549564e-05, "loss": 0.0174, "step": 23540 }, { "epoch": 0.58, "learning_rate": 1.8889299347371017e-05, "loss": 0.0187, "step": 23550 }, { "epoch": 0.58, "learning_rate": 1.8888806797192466e-05, "loss": 0.0136, "step": 23560 }, { "epoch": 0.58, "learning_rate": 1.8888314247013918e-05, "loss": 0.0171, "step": 23570 }, { "epoch": 0.58, "learning_rate": 1.8887821696835367e-05, "loss": 0.0155, "step": 23580 }, { "epoch": 0.58, "learning_rate": 1.8887329146656817e-05, "loss": 0.0157, "step": 23590 }, { "epoch": 0.58, "learning_rate": 1.888683659647827e-05, "loss": 0.0233, "step": 23600 }, { "epoch": 0.58, "learning_rate": 1.8886344046299718e-05, "loss": 0.0135, "step": 23610 }, { "epoch": 0.58, "learning_rate": 1.8885851496121167e-05, "loss": 0.0146, "step": 23620 }, { "epoch": 0.58, "learning_rate": 1.888535894594262e-05, "loss": 0.0173, "step": 23630 }, { "epoch": 0.58, "learning_rate": 1.888486639576407e-05, "loss": 0.0208, "step": 23640 }, { "epoch": 0.58, "learning_rate": 1.8884373845585518e-05, "loss": 0.0141, "step": 23650 }, { "epoch": 0.58, "learning_rate": 1.888388129540697e-05, "loss": 0.0239, "step": 23660 }, { "epoch": 0.58, "learning_rate": 1.888338874522842e-05, "loss": 0.0154, "step": 23670 }, { "epoch": 0.58, "learning_rate": 1.8882896195049872e-05, "loss": 0.0146, "step": 23680 }, { "epoch": 0.58, "learning_rate": 1.8882403644871325e-05, "loss": 0.022, "step": 23690 }, { "epoch": 0.58, "learning_rate": 1.8881911094692774e-05, "loss": 0.0158, "step": 23700 }, { "epoch": 0.58, "learning_rate": 1.8881418544514223e-05, "loss": 0.0156, "step": 23710 }, { "epoch": 0.58, "learning_rate": 1.8880925994335676e-05, "loss": 0.02, "step": 23720 }, { "epoch": 0.58, "learning_rate": 1.8880433444157125e-05, "loss": 0.017, "step": 23730 }, { "epoch": 0.58, "learning_rate": 1.8879940893978577e-05, "loss": 0.0134, "step": 23740 }, { "epoch": 0.58, "learning_rate": 1.8879448343800027e-05, "loss": 0.0182, "step": 23750 }, { "epoch": 0.58, "learning_rate": 1.8878955793621476e-05, "loss": 0.015, "step": 23760 }, { "epoch": 0.58, "learning_rate": 1.8878463243442928e-05, "loss": 0.0143, "step": 23770 }, { "epoch": 0.58, "learning_rate": 1.8877970693264377e-05, "loss": 0.0234, "step": 23780 }, { "epoch": 0.58, "learning_rate": 1.8877478143085827e-05, "loss": 0.0131, "step": 23790 }, { "epoch": 0.58, "learning_rate": 1.887698559290728e-05, "loss": 0.0158, "step": 23800 }, { "epoch": 0.58, "learning_rate": 1.8876493042728728e-05, "loss": 0.0232, "step": 23810 }, { "epoch": 0.59, "learning_rate": 1.887600049255018e-05, "loss": 0.0158, "step": 23820 }, { "epoch": 0.59, "learning_rate": 1.887550794237163e-05, "loss": 0.014, "step": 23830 }, { "epoch": 0.59, "learning_rate": 1.8875015392193082e-05, "loss": 0.0167, "step": 23840 }, { "epoch": 0.59, "learning_rate": 1.887452284201453e-05, "loss": 0.018, "step": 23850 }, { "epoch": 0.59, "learning_rate": 1.8874030291835984e-05, "loss": 0.0168, "step": 23860 }, { "epoch": 0.59, "learning_rate": 1.8873537741657433e-05, "loss": 0.0251, "step": 23870 }, { "epoch": 0.59, "learning_rate": 1.8873045191478882e-05, "loss": 0.0165, "step": 23880 }, { "epoch": 0.59, "learning_rate": 1.8872552641300335e-05, "loss": 0.0147, "step": 23890 }, { "epoch": 0.59, "learning_rate": 1.8872060091121784e-05, "loss": 0.0142, "step": 23900 }, { "epoch": 0.59, "learning_rate": 1.8871567540943233e-05, "loss": 0.0206, "step": 23910 }, { "epoch": 0.59, "learning_rate": 1.8871074990764686e-05, "loss": 0.0214, "step": 23920 }, { "epoch": 0.59, "learning_rate": 1.8870582440586135e-05, "loss": 0.0147, "step": 23930 }, { "epoch": 0.59, "learning_rate": 1.8870089890407584e-05, "loss": 0.0169, "step": 23940 }, { "epoch": 0.59, "learning_rate": 1.8869597340229037e-05, "loss": 0.0171, "step": 23950 }, { "epoch": 0.59, "learning_rate": 1.886910479005049e-05, "loss": 0.0173, "step": 23960 }, { "epoch": 0.59, "learning_rate": 1.886861223987194e-05, "loss": 0.0141, "step": 23970 }, { "epoch": 0.59, "learning_rate": 1.886811968969339e-05, "loss": 0.0179, "step": 23980 }, { "epoch": 0.59, "learning_rate": 1.886762713951484e-05, "loss": 0.014, "step": 23990 }, { "epoch": 0.59, "learning_rate": 1.8867134589336293e-05, "loss": 0.017, "step": 24000 }, { "epoch": 0.59, "learning_rate": 1.8866642039157742e-05, "loss": 0.014, "step": 24010 }, { "epoch": 0.59, "learning_rate": 1.886614948897919e-05, "loss": 0.0308, "step": 24020 }, { "epoch": 0.59, "learning_rate": 1.8865656938800643e-05, "loss": 0.0235, "step": 24030 }, { "epoch": 0.59, "learning_rate": 1.8865164388622093e-05, "loss": 0.0147, "step": 24040 }, { "epoch": 0.59, "learning_rate": 1.8864671838443542e-05, "loss": 0.0248, "step": 24050 }, { "epoch": 0.59, "learning_rate": 1.8864179288264994e-05, "loss": 0.0149, "step": 24060 }, { "epoch": 0.59, "learning_rate": 1.8863686738086443e-05, "loss": 0.0154, "step": 24070 }, { "epoch": 0.59, "learning_rate": 1.8863194187907893e-05, "loss": 0.0141, "step": 24080 }, { "epoch": 0.59, "learning_rate": 1.8862701637729345e-05, "loss": 0.0145, "step": 24090 }, { "epoch": 0.59, "learning_rate": 1.8862209087550794e-05, "loss": 0.0159, "step": 24100 }, { "epoch": 0.59, "learning_rate": 1.8861716537372247e-05, "loss": 0.0188, "step": 24110 }, { "epoch": 0.59, "learning_rate": 1.8861223987193696e-05, "loss": 0.0158, "step": 24120 }, { "epoch": 0.59, "learning_rate": 1.886073143701515e-05, "loss": 0.0146, "step": 24130 }, { "epoch": 0.59, "learning_rate": 1.8860238886836598e-05, "loss": 0.02, "step": 24140 }, { "epoch": 0.59, "learning_rate": 1.885974633665805e-05, "loss": 0.0155, "step": 24150 }, { "epoch": 0.59, "learning_rate": 1.88592537864795e-05, "loss": 0.0136, "step": 24160 }, { "epoch": 0.59, "learning_rate": 1.885876123630095e-05, "loss": 0.013, "step": 24170 }, { "epoch": 0.59, "learning_rate": 1.88582686861224e-05, "loss": 0.0125, "step": 24180 }, { "epoch": 0.59, "learning_rate": 1.885777613594385e-05, "loss": 0.0153, "step": 24190 }, { "epoch": 0.59, "learning_rate": 1.8857283585765303e-05, "loss": 0.0182, "step": 24200 }, { "epoch": 0.59, "learning_rate": 1.8856791035586752e-05, "loss": 0.0152, "step": 24210 }, { "epoch": 0.6, "learning_rate": 1.88562984854082e-05, "loss": 0.0148, "step": 24220 }, { "epoch": 0.6, "learning_rate": 1.8855805935229653e-05, "loss": 0.0363, "step": 24230 }, { "epoch": 0.6, "learning_rate": 1.8855313385051103e-05, "loss": 0.0223, "step": 24240 }, { "epoch": 0.6, "learning_rate": 1.8854820834872555e-05, "loss": 0.0165, "step": 24250 }, { "epoch": 0.6, "learning_rate": 1.8854328284694004e-05, "loss": 0.0154, "step": 24260 }, { "epoch": 0.6, "learning_rate": 1.8853835734515457e-05, "loss": 0.0213, "step": 24270 }, { "epoch": 0.6, "learning_rate": 1.8853343184336906e-05, "loss": 0.0144, "step": 24280 }, { "epoch": 0.6, "learning_rate": 1.885285063415836e-05, "loss": 0.0149, "step": 24290 }, { "epoch": 0.6, "learning_rate": 1.8852358083979808e-05, "loss": 0.0153, "step": 24300 }, { "epoch": 0.6, "learning_rate": 1.8851865533801257e-05, "loss": 0.0166, "step": 24310 }, { "epoch": 0.6, "learning_rate": 1.885137298362271e-05, "loss": 0.0165, "step": 24320 }, { "epoch": 0.6, "learning_rate": 1.885088043344416e-05, "loss": 0.0161, "step": 24330 }, { "epoch": 0.6, "learning_rate": 1.8850387883265608e-05, "loss": 0.0152, "step": 24340 }, { "epoch": 0.6, "learning_rate": 1.884989533308706e-05, "loss": 0.015, "step": 24350 }, { "epoch": 0.6, "learning_rate": 1.884940278290851e-05, "loss": 0.0147, "step": 24360 }, { "epoch": 0.6, "learning_rate": 1.884891023272996e-05, "loss": 0.0161, "step": 24370 }, { "epoch": 0.6, "learning_rate": 1.884841768255141e-05, "loss": 0.0153, "step": 24380 }, { "epoch": 0.6, "learning_rate": 1.884792513237286e-05, "loss": 0.0167, "step": 24390 }, { "epoch": 0.6, "learning_rate": 1.8847432582194313e-05, "loss": 0.0153, "step": 24400 }, { "epoch": 0.6, "learning_rate": 1.8846940032015765e-05, "loss": 0.0145, "step": 24410 }, { "epoch": 0.6, "learning_rate": 1.8846447481837214e-05, "loss": 0.0151, "step": 24420 }, { "epoch": 0.6, "learning_rate": 1.8845954931658664e-05, "loss": 0.0156, "step": 24430 }, { "epoch": 0.6, "learning_rate": 1.8845462381480116e-05, "loss": 0.0167, "step": 24440 }, { "epoch": 0.6, "learning_rate": 1.8844969831301565e-05, "loss": 0.0145, "step": 24450 }, { "epoch": 0.6, "learning_rate": 1.8844477281123018e-05, "loss": 0.018, "step": 24460 }, { "epoch": 0.6, "learning_rate": 1.8843984730944467e-05, "loss": 0.0167, "step": 24470 }, { "epoch": 0.6, "learning_rate": 1.8843492180765916e-05, "loss": 0.0142, "step": 24480 }, { "epoch": 0.6, "learning_rate": 1.884299963058737e-05, "loss": 0.0141, "step": 24490 }, { "epoch": 0.6, "learning_rate": 1.8842507080408818e-05, "loss": 0.0143, "step": 24500 }, { "epoch": 0.6, "learning_rate": 1.8842014530230267e-05, "loss": 0.0138, "step": 24510 }, { "epoch": 0.6, "learning_rate": 1.884152198005172e-05, "loss": 0.015, "step": 24520 }, { "epoch": 0.6, "learning_rate": 1.884102942987317e-05, "loss": 0.0181, "step": 24530 }, { "epoch": 0.6, "learning_rate": 1.884053687969462e-05, "loss": 0.0132, "step": 24540 }, { "epoch": 0.6, "learning_rate": 1.884004432951607e-05, "loss": 0.016, "step": 24550 }, { "epoch": 0.6, "learning_rate": 1.8839551779337523e-05, "loss": 0.0254, "step": 24560 }, { "epoch": 0.6, "learning_rate": 1.8839059229158972e-05, "loss": 0.0142, "step": 24570 }, { "epoch": 0.6, "learning_rate": 1.8838566678980425e-05, "loss": 0.0142, "step": 24580 }, { "epoch": 0.6, "learning_rate": 1.8838074128801874e-05, "loss": 0.0157, "step": 24590 }, { "epoch": 0.6, "learning_rate": 1.8837581578623323e-05, "loss": 0.0161, "step": 24600 }, { "epoch": 0.6, "learning_rate": 1.8837089028444775e-05, "loss": 0.0144, "step": 24610 }, { "epoch": 0.6, "learning_rate": 1.8836596478266224e-05, "loss": 0.0148, "step": 24620 }, { "epoch": 0.61, "learning_rate": 1.8836103928087674e-05, "loss": 0.0137, "step": 24630 }, { "epoch": 0.61, "learning_rate": 1.8835611377909126e-05, "loss": 0.022, "step": 24640 }, { "epoch": 0.61, "learning_rate": 1.8835118827730575e-05, "loss": 0.0157, "step": 24650 }, { "epoch": 0.61, "learning_rate": 1.8834626277552024e-05, "loss": 0.0239, "step": 24660 }, { "epoch": 0.61, "learning_rate": 1.8834133727373477e-05, "loss": 0.0137, "step": 24670 }, { "epoch": 0.61, "learning_rate": 1.883364117719493e-05, "loss": 0.0302, "step": 24680 }, { "epoch": 0.61, "learning_rate": 1.883314862701638e-05, "loss": 0.0164, "step": 24690 }, { "epoch": 0.61, "learning_rate": 1.883265607683783e-05, "loss": 0.0148, "step": 24700 }, { "epoch": 0.61, "learning_rate": 1.883216352665928e-05, "loss": 0.0157, "step": 24710 }, { "epoch": 0.61, "learning_rate": 1.8831670976480733e-05, "loss": 0.019, "step": 24720 }, { "epoch": 0.61, "learning_rate": 1.8831178426302182e-05, "loss": 0.0147, "step": 24730 }, { "epoch": 0.61, "learning_rate": 1.883068587612363e-05, "loss": 0.0147, "step": 24740 }, { "epoch": 0.61, "learning_rate": 1.8830193325945084e-05, "loss": 0.0184, "step": 24750 }, { "epoch": 0.61, "learning_rate": 1.8829700775766533e-05, "loss": 0.0156, "step": 24760 }, { "epoch": 0.61, "learning_rate": 1.8829208225587982e-05, "loss": 0.017, "step": 24770 }, { "epoch": 0.61, "learning_rate": 1.8828715675409435e-05, "loss": 0.013, "step": 24780 }, { "epoch": 0.61, "learning_rate": 1.8828223125230884e-05, "loss": 0.0147, "step": 24790 }, { "epoch": 0.61, "learning_rate": 1.8827730575052333e-05, "loss": 0.0129, "step": 24800 }, { "epoch": 0.61, "learning_rate": 1.8827238024873785e-05, "loss": 0.0173, "step": 24810 }, { "epoch": 0.61, "learning_rate": 1.8826745474695235e-05, "loss": 0.0171, "step": 24820 }, { "epoch": 0.61, "learning_rate": 1.8826252924516687e-05, "loss": 0.0236, "step": 24830 }, { "epoch": 0.61, "learning_rate": 1.8825760374338136e-05, "loss": 0.0133, "step": 24840 }, { "epoch": 0.61, "learning_rate": 1.882526782415959e-05, "loss": 0.0143, "step": 24850 }, { "epoch": 0.61, "learning_rate": 1.8824775273981038e-05, "loss": 0.014, "step": 24860 }, { "epoch": 0.61, "learning_rate": 1.882428272380249e-05, "loss": 0.0137, "step": 24870 }, { "epoch": 0.61, "learning_rate": 1.882379017362394e-05, "loss": 0.0133, "step": 24880 }, { "epoch": 0.61, "learning_rate": 1.882329762344539e-05, "loss": 0.0159, "step": 24890 }, { "epoch": 0.61, "learning_rate": 1.882280507326684e-05, "loss": 0.0135, "step": 24900 }, { "epoch": 0.61, "learning_rate": 1.882231252308829e-05, "loss": 0.0226, "step": 24910 }, { "epoch": 0.61, "learning_rate": 1.882181997290974e-05, "loss": 0.0152, "step": 24920 }, { "epoch": 0.61, "learning_rate": 1.8821327422731192e-05, "loss": 0.013, "step": 24930 }, { "epoch": 0.61, "learning_rate": 1.882083487255264e-05, "loss": 0.0197, "step": 24940 }, { "epoch": 0.61, "learning_rate": 1.8820342322374094e-05, "loss": 0.022, "step": 24950 }, { "epoch": 0.61, "learning_rate": 1.8819849772195543e-05, "loss": 0.0109, "step": 24960 }, { "epoch": 0.61, "learning_rate": 1.8819357222016996e-05, "loss": 0.0171, "step": 24970 }, { "epoch": 0.61, "learning_rate": 1.8818864671838445e-05, "loss": 0.02, "step": 24980 }, { "epoch": 0.61, "learning_rate": 1.8818372121659897e-05, "loss": 0.0143, "step": 24990 }, { "epoch": 0.61, "learning_rate": 1.8817879571481346e-05, "loss": 0.0148, "step": 25000 }, { "epoch": 0.61, "eval_bleu": 97.5925, "eval_gen_len_ratio": 0.9995, "eval_loss": 0.010965537279844284, "eval_runtime": 253.2631, "eval_samples_per_second": 7.368, "eval_steps_per_second": 0.233, "step": 25000 }, { "epoch": 0.61, "learning_rate": 1.88173870213028e-05, "loss": 0.018, "step": 25010 }, { "epoch": 0.61, "learning_rate": 1.8816894471124248e-05, "loss": 0.0273, "step": 25020 }, { "epoch": 0.61, "learning_rate": 1.8816401920945697e-05, "loss": 0.015, "step": 25030 }, { "epoch": 0.62, "learning_rate": 1.881590937076715e-05, "loss": 0.0263, "step": 25040 }, { "epoch": 0.62, "learning_rate": 1.88154168205886e-05, "loss": 0.0141, "step": 25050 }, { "epoch": 0.62, "learning_rate": 1.8814924270410048e-05, "loss": 0.0186, "step": 25060 }, { "epoch": 0.62, "learning_rate": 1.88144317202315e-05, "loss": 0.0137, "step": 25070 }, { "epoch": 0.62, "learning_rate": 1.881393917005295e-05, "loss": 0.0167, "step": 25080 }, { "epoch": 0.62, "learning_rate": 1.88134466198744e-05, "loss": 0.0147, "step": 25090 }, { "epoch": 0.62, "learning_rate": 1.881295406969585e-05, "loss": 0.0139, "step": 25100 }, { "epoch": 0.62, "learning_rate": 1.88124615195173e-05, "loss": 0.0151, "step": 25110 }, { "epoch": 0.62, "learning_rate": 1.8811968969338753e-05, "loss": 0.0253, "step": 25120 }, { "epoch": 0.62, "learning_rate": 1.8811476419160206e-05, "loss": 0.0141, "step": 25130 }, { "epoch": 0.62, "learning_rate": 1.8810983868981655e-05, "loss": 0.0145, "step": 25140 }, { "epoch": 0.62, "learning_rate": 1.8810491318803104e-05, "loss": 0.0199, "step": 25150 }, { "epoch": 0.62, "learning_rate": 1.8809998768624556e-05, "loss": 0.0132, "step": 25160 }, { "epoch": 0.62, "learning_rate": 1.8809506218446006e-05, "loss": 0.0138, "step": 25170 }, { "epoch": 0.62, "learning_rate": 1.8809013668267458e-05, "loss": 0.0127, "step": 25180 }, { "epoch": 0.62, "learning_rate": 1.8808521118088907e-05, "loss": 0.0169, "step": 25190 }, { "epoch": 0.62, "learning_rate": 1.8808028567910356e-05, "loss": 0.0137, "step": 25200 }, { "epoch": 0.62, "learning_rate": 1.880753601773181e-05, "loss": 0.0158, "step": 25210 }, { "epoch": 0.62, "learning_rate": 1.8807043467553258e-05, "loss": 0.013, "step": 25220 }, { "epoch": 0.62, "learning_rate": 1.8806550917374707e-05, "loss": 0.0149, "step": 25230 }, { "epoch": 0.62, "learning_rate": 1.880605836719616e-05, "loss": 0.0133, "step": 25240 }, { "epoch": 0.62, "learning_rate": 1.880556581701761e-05, "loss": 0.0128, "step": 25250 }, { "epoch": 0.62, "learning_rate": 1.880507326683906e-05, "loss": 0.0149, "step": 25260 }, { "epoch": 0.62, "learning_rate": 1.880458071666051e-05, "loss": 0.0214, "step": 25270 }, { "epoch": 0.62, "learning_rate": 1.8804088166481963e-05, "loss": 0.0139, "step": 25280 }, { "epoch": 0.62, "learning_rate": 1.8803595616303412e-05, "loss": 0.012, "step": 25290 }, { "epoch": 0.62, "learning_rate": 1.8803103066124865e-05, "loss": 0.0143, "step": 25300 }, { "epoch": 0.62, "learning_rate": 1.8802610515946314e-05, "loss": 0.0141, "step": 25310 }, { "epoch": 0.62, "learning_rate": 1.8802117965767763e-05, "loss": 0.0177, "step": 25320 }, { "epoch": 0.62, "learning_rate": 1.8801625415589216e-05, "loss": 0.0164, "step": 25330 }, { "epoch": 0.62, "learning_rate": 1.8801132865410665e-05, "loss": 0.0247, "step": 25340 }, { "epoch": 0.62, "learning_rate": 1.8800640315232114e-05, "loss": 0.0163, "step": 25350 }, { "epoch": 0.62, "learning_rate": 1.8800147765053567e-05, "loss": 0.0151, "step": 25360 }, { "epoch": 0.62, "learning_rate": 1.8799655214875016e-05, "loss": 0.0229, "step": 25370 }, { "epoch": 0.62, "learning_rate": 1.8799162664696465e-05, "loss": 0.0254, "step": 25380 }, { "epoch": 0.62, "learning_rate": 1.8798670114517917e-05, "loss": 0.0248, "step": 25390 }, { "epoch": 0.62, "learning_rate": 1.879817756433937e-05, "loss": 0.0212, "step": 25400 }, { "epoch": 0.62, "learning_rate": 1.879768501416082e-05, "loss": 0.0169, "step": 25410 }, { "epoch": 0.62, "learning_rate": 1.879719246398227e-05, "loss": 0.0185, "step": 25420 }, { "epoch": 0.62, "learning_rate": 1.879669991380372e-05, "loss": 0.0158, "step": 25430 }, { "epoch": 0.62, "learning_rate": 1.8796207363625173e-05, "loss": 0.0142, "step": 25440 }, { "epoch": 0.63, "learning_rate": 1.8795714813446622e-05, "loss": 0.025, "step": 25450 }, { "epoch": 0.63, "learning_rate": 1.879522226326807e-05, "loss": 0.0135, "step": 25460 }, { "epoch": 0.63, "learning_rate": 1.8794729713089524e-05, "loss": 0.0132, "step": 25470 }, { "epoch": 0.63, "learning_rate": 1.8794237162910973e-05, "loss": 0.0175, "step": 25480 }, { "epoch": 0.63, "learning_rate": 1.8793744612732422e-05, "loss": 0.0161, "step": 25490 }, { "epoch": 0.63, "learning_rate": 1.8793252062553875e-05, "loss": 0.0147, "step": 25500 }, { "epoch": 0.63, "learning_rate": 1.8792759512375324e-05, "loss": 0.0135, "step": 25510 }, { "epoch": 0.63, "learning_rate": 1.8792266962196773e-05, "loss": 0.0152, "step": 25520 }, { "epoch": 0.63, "learning_rate": 1.8791774412018226e-05, "loss": 0.0191, "step": 25530 }, { "epoch": 0.63, "learning_rate": 1.8791281861839675e-05, "loss": 0.017, "step": 25540 }, { "epoch": 0.63, "learning_rate": 1.8790789311661127e-05, "loss": 0.0198, "step": 25550 }, { "epoch": 0.63, "learning_rate": 1.8790296761482577e-05, "loss": 0.0134, "step": 25560 }, { "epoch": 0.63, "learning_rate": 1.878980421130403e-05, "loss": 0.0173, "step": 25570 }, { "epoch": 0.63, "learning_rate": 1.8789311661125478e-05, "loss": 0.0141, "step": 25580 }, { "epoch": 0.63, "learning_rate": 1.878881911094693e-05, "loss": 0.0164, "step": 25590 }, { "epoch": 0.63, "learning_rate": 1.878832656076838e-05, "loss": 0.0144, "step": 25600 }, { "epoch": 0.63, "learning_rate": 1.878783401058983e-05, "loss": 0.0143, "step": 25610 }, { "epoch": 0.63, "learning_rate": 1.878734146041128e-05, "loss": 0.036, "step": 25620 }, { "epoch": 0.63, "learning_rate": 1.878684891023273e-05, "loss": 0.0212, "step": 25630 }, { "epoch": 0.63, "learning_rate": 1.878635636005418e-05, "loss": 0.0288, "step": 25640 }, { "epoch": 0.63, "learning_rate": 1.8785863809875632e-05, "loss": 0.0138, "step": 25650 }, { "epoch": 0.63, "learning_rate": 1.878537125969708e-05, "loss": 0.0134, "step": 25660 }, { "epoch": 0.63, "learning_rate": 1.8784878709518534e-05, "loss": 0.0139, "step": 25670 }, { "epoch": 0.63, "learning_rate": 1.8784386159339983e-05, "loss": 0.0152, "step": 25680 }, { "epoch": 0.63, "learning_rate": 1.8783893609161436e-05, "loss": 0.0174, "step": 25690 }, { "epoch": 0.63, "learning_rate": 1.8783401058982885e-05, "loss": 0.0168, "step": 25700 }, { "epoch": 0.63, "learning_rate": 1.8782908508804338e-05, "loss": 0.0118, "step": 25710 }, { "epoch": 0.63, "learning_rate": 1.8782415958625787e-05, "loss": 0.0142, "step": 25720 }, { "epoch": 0.63, "learning_rate": 1.878192340844724e-05, "loss": 0.013, "step": 25730 }, { "epoch": 0.63, "learning_rate": 1.878143085826869e-05, "loss": 0.0135, "step": 25740 }, { "epoch": 0.63, "learning_rate": 1.8780938308090138e-05, "loss": 0.0151, "step": 25750 }, { "epoch": 0.63, "learning_rate": 1.878044575791159e-05, "loss": 0.0169, "step": 25760 }, { "epoch": 0.63, "learning_rate": 1.877995320773304e-05, "loss": 0.0173, "step": 25770 }, { "epoch": 0.63, "learning_rate": 1.877946065755449e-05, "loss": 0.0193, "step": 25780 }, { "epoch": 0.63, "learning_rate": 1.877896810737594e-05, "loss": 0.0147, "step": 25790 }, { "epoch": 0.63, "learning_rate": 1.877847555719739e-05, "loss": 0.0185, "step": 25800 }, { "epoch": 0.63, "learning_rate": 1.877798300701884e-05, "loss": 0.0152, "step": 25810 }, { "epoch": 0.63, "learning_rate": 1.8777490456840292e-05, "loss": 0.0161, "step": 25820 }, { "epoch": 0.63, "learning_rate": 1.877699790666174e-05, "loss": 0.0136, "step": 25830 }, { "epoch": 0.63, "learning_rate": 1.8776505356483193e-05, "loss": 0.0199, "step": 25840 }, { "epoch": 0.64, "learning_rate": 1.8776012806304646e-05, "loss": 0.0144, "step": 25850 }, { "epoch": 0.64, "learning_rate": 1.8775520256126095e-05, "loss": 0.0141, "step": 25860 }, { "epoch": 0.64, "learning_rate": 1.8775027705947544e-05, "loss": 0.0168, "step": 25870 }, { "epoch": 0.64, "learning_rate": 1.8774535155768997e-05, "loss": 0.0147, "step": 25880 }, { "epoch": 0.64, "learning_rate": 1.8774042605590446e-05, "loss": 0.0167, "step": 25890 }, { "epoch": 0.64, "learning_rate": 1.87735500554119e-05, "loss": 0.0157, "step": 25900 }, { "epoch": 0.64, "learning_rate": 1.8773057505233348e-05, "loss": 0.0185, "step": 25910 }, { "epoch": 0.64, "learning_rate": 1.8772564955054797e-05, "loss": 0.0136, "step": 25920 }, { "epoch": 0.64, "learning_rate": 1.877207240487625e-05, "loss": 0.015, "step": 25930 }, { "epoch": 0.64, "learning_rate": 1.87715798546977e-05, "loss": 0.0151, "step": 25940 }, { "epoch": 0.64, "learning_rate": 1.8771087304519148e-05, "loss": 0.0171, "step": 25950 }, { "epoch": 0.64, "learning_rate": 1.87705947543406e-05, "loss": 0.0131, "step": 25960 }, { "epoch": 0.64, "learning_rate": 1.877010220416205e-05, "loss": 0.0163, "step": 25970 }, { "epoch": 0.64, "learning_rate": 1.8769609653983502e-05, "loss": 0.0143, "step": 25980 }, { "epoch": 0.64, "learning_rate": 1.876911710380495e-05, "loss": 0.0142, "step": 25990 }, { "epoch": 0.64, "learning_rate": 1.8768624553626404e-05, "loss": 0.0153, "step": 26000 }, { "epoch": 0.64, "learning_rate": 1.8768132003447853e-05, "loss": 0.015, "step": 26010 }, { "epoch": 0.64, "learning_rate": 1.8767639453269305e-05, "loss": 0.0249, "step": 26020 }, { "epoch": 0.64, "learning_rate": 1.8767146903090754e-05, "loss": 0.0275, "step": 26030 }, { "epoch": 0.64, "learning_rate": 1.8766654352912203e-05, "loss": 0.0169, "step": 26040 }, { "epoch": 0.64, "learning_rate": 1.8766161802733656e-05, "loss": 0.0163, "step": 26050 }, { "epoch": 0.64, "learning_rate": 1.8765669252555105e-05, "loss": 0.0147, "step": 26060 }, { "epoch": 0.64, "learning_rate": 1.8765176702376554e-05, "loss": 0.0162, "step": 26070 }, { "epoch": 0.64, "learning_rate": 1.8764684152198007e-05, "loss": 0.0141, "step": 26080 }, { "epoch": 0.64, "learning_rate": 1.8764191602019456e-05, "loss": 0.0194, "step": 26090 }, { "epoch": 0.64, "learning_rate": 1.8763699051840905e-05, "loss": 0.016, "step": 26100 }, { "epoch": 0.64, "learning_rate": 1.8763206501662358e-05, "loss": 0.0284, "step": 26110 }, { "epoch": 0.64, "learning_rate": 1.876271395148381e-05, "loss": 0.0326, "step": 26120 }, { "epoch": 0.64, "learning_rate": 1.876222140130526e-05, "loss": 0.0137, "step": 26130 }, { "epoch": 0.64, "learning_rate": 1.8761728851126712e-05, "loss": 0.014, "step": 26140 }, { "epoch": 0.64, "learning_rate": 1.876123630094816e-05, "loss": 0.0155, "step": 26150 }, { "epoch": 0.64, "learning_rate": 1.8760743750769614e-05, "loss": 0.0138, "step": 26160 }, { "epoch": 0.64, "learning_rate": 1.8760251200591063e-05, "loss": 0.0217, "step": 26170 }, { "epoch": 0.64, "learning_rate": 1.8759758650412512e-05, "loss": 0.0196, "step": 26180 }, { "epoch": 0.64, "learning_rate": 1.8759266100233964e-05, "loss": 0.0152, "step": 26190 }, { "epoch": 0.64, "learning_rate": 1.8758773550055414e-05, "loss": 0.0174, "step": 26200 }, { "epoch": 0.64, "learning_rate": 1.8758280999876863e-05, "loss": 0.0154, "step": 26210 }, { "epoch": 0.64, "learning_rate": 1.8757788449698315e-05, "loss": 0.0162, "step": 26220 }, { "epoch": 0.64, "learning_rate": 1.8757295899519764e-05, "loss": 0.0172, "step": 26230 }, { "epoch": 0.64, "learning_rate": 1.8756803349341214e-05, "loss": 0.0167, "step": 26240 }, { "epoch": 0.64, "learning_rate": 1.8756310799162666e-05, "loss": 0.0151, "step": 26250 }, { "epoch": 0.65, "learning_rate": 1.8755818248984115e-05, "loss": 0.0225, "step": 26260 }, { "epoch": 0.65, "learning_rate": 1.8755325698805568e-05, "loss": 0.0148, "step": 26270 }, { "epoch": 0.65, "learning_rate": 1.8754833148627017e-05, "loss": 0.0152, "step": 26280 }, { "epoch": 0.65, "learning_rate": 1.875434059844847e-05, "loss": 0.0252, "step": 26290 }, { "epoch": 0.65, "learning_rate": 1.875384804826992e-05, "loss": 0.0138, "step": 26300 }, { "epoch": 0.65, "learning_rate": 1.875335549809137e-05, "loss": 0.0171, "step": 26310 }, { "epoch": 0.65, "learning_rate": 1.875286294791282e-05, "loss": 0.0311, "step": 26320 }, { "epoch": 0.65, "learning_rate": 1.875237039773427e-05, "loss": 0.0136, "step": 26330 }, { "epoch": 0.65, "learning_rate": 1.8751877847555722e-05, "loss": 0.0251, "step": 26340 }, { "epoch": 0.65, "learning_rate": 1.875138529737717e-05, "loss": 0.0223, "step": 26350 }, { "epoch": 0.65, "learning_rate": 1.875089274719862e-05, "loss": 0.0147, "step": 26360 }, { "epoch": 0.65, "learning_rate": 1.8750400197020073e-05, "loss": 0.0221, "step": 26370 }, { "epoch": 0.65, "learning_rate": 1.8749907646841522e-05, "loss": 0.016, "step": 26380 }, { "epoch": 0.65, "learning_rate": 1.8749415096662975e-05, "loss": 0.0298, "step": 26390 }, { "epoch": 0.65, "learning_rate": 1.8748922546484424e-05, "loss": 0.0193, "step": 26400 }, { "epoch": 0.65, "learning_rate": 1.8748429996305876e-05, "loss": 0.0157, "step": 26410 }, { "epoch": 0.65, "learning_rate": 1.8747937446127325e-05, "loss": 0.0306, "step": 26420 }, { "epoch": 0.65, "learning_rate": 1.8747444895948778e-05, "loss": 0.0128, "step": 26430 }, { "epoch": 0.65, "learning_rate": 1.8746952345770227e-05, "loss": 0.0157, "step": 26440 }, { "epoch": 0.65, "learning_rate": 1.874645979559168e-05, "loss": 0.0159, "step": 26450 }, { "epoch": 0.65, "learning_rate": 1.874596724541313e-05, "loss": 0.0181, "step": 26460 }, { "epoch": 0.65, "learning_rate": 1.8745474695234578e-05, "loss": 0.0155, "step": 26470 }, { "epoch": 0.65, "learning_rate": 1.874498214505603e-05, "loss": 0.0113, "step": 26480 }, { "epoch": 0.65, "learning_rate": 1.874448959487748e-05, "loss": 0.0189, "step": 26490 }, { "epoch": 0.65, "learning_rate": 1.874399704469893e-05, "loss": 0.0127, "step": 26500 }, { "epoch": 0.65, "learning_rate": 1.874350449452038e-05, "loss": 0.0147, "step": 26510 }, { "epoch": 0.65, "learning_rate": 1.874301194434183e-05, "loss": 0.0129, "step": 26520 }, { "epoch": 0.65, "learning_rate": 1.874251939416328e-05, "loss": 0.015, "step": 26530 }, { "epoch": 0.65, "learning_rate": 1.8742026843984732e-05, "loss": 0.015, "step": 26540 }, { "epoch": 0.65, "learning_rate": 1.874153429380618e-05, "loss": 0.0151, "step": 26550 }, { "epoch": 0.65, "learning_rate": 1.8741041743627634e-05, "loss": 0.0152, "step": 26560 }, { "epoch": 0.65, "learning_rate": 1.8740549193449086e-05, "loss": 0.0151, "step": 26570 }, { "epoch": 0.65, "learning_rate": 1.8740056643270535e-05, "loss": 0.0145, "step": 26580 }, { "epoch": 0.65, "learning_rate": 1.8739564093091985e-05, "loss": 0.0262, "step": 26590 }, { "epoch": 0.65, "learning_rate": 1.8739071542913437e-05, "loss": 0.0151, "step": 26600 }, { "epoch": 0.65, "learning_rate": 1.8738578992734886e-05, "loss": 0.0149, "step": 26610 }, { "epoch": 0.65, "learning_rate": 1.873808644255634e-05, "loss": 0.0143, "step": 26620 }, { "epoch": 0.65, "learning_rate": 1.8737593892377788e-05, "loss": 0.0151, "step": 26630 }, { "epoch": 0.65, "learning_rate": 1.8737101342199237e-05, "loss": 0.0159, "step": 26640 }, { "epoch": 0.65, "learning_rate": 1.873660879202069e-05, "loss": 0.0153, "step": 26650 }, { "epoch": 0.65, "learning_rate": 1.873611624184214e-05, "loss": 0.0149, "step": 26660 }, { "epoch": 0.66, "learning_rate": 1.8735623691663588e-05, "loss": 0.0134, "step": 26670 }, { "epoch": 0.66, "learning_rate": 1.873513114148504e-05, "loss": 0.014, "step": 26680 }, { "epoch": 0.66, "learning_rate": 1.873463859130649e-05, "loss": 0.0157, "step": 26690 }, { "epoch": 0.66, "learning_rate": 1.8734146041127942e-05, "loss": 0.0145, "step": 26700 }, { "epoch": 0.66, "learning_rate": 1.873365349094939e-05, "loss": 0.0125, "step": 26710 }, { "epoch": 0.66, "learning_rate": 1.8733160940770844e-05, "loss": 0.0154, "step": 26720 }, { "epoch": 0.66, "learning_rate": 1.8732668390592293e-05, "loss": 0.011, "step": 26730 }, { "epoch": 0.66, "learning_rate": 1.8732175840413746e-05, "loss": 0.0124, "step": 26740 }, { "epoch": 0.66, "learning_rate": 1.8731683290235195e-05, "loss": 0.0151, "step": 26750 }, { "epoch": 0.66, "learning_rate": 1.8731190740056644e-05, "loss": 0.0147, "step": 26760 }, { "epoch": 0.66, "learning_rate": 1.8730698189878096e-05, "loss": 0.0197, "step": 26770 }, { "epoch": 0.66, "learning_rate": 1.8730205639699546e-05, "loss": 0.0153, "step": 26780 }, { "epoch": 0.66, "learning_rate": 1.8729713089520995e-05, "loss": 0.0212, "step": 26790 }, { "epoch": 0.66, "learning_rate": 1.8729220539342447e-05, "loss": 0.0148, "step": 26800 }, { "epoch": 0.66, "learning_rate": 1.8728727989163896e-05, "loss": 0.0133, "step": 26810 }, { "epoch": 0.66, "learning_rate": 1.8728235438985346e-05, "loss": 0.0218, "step": 26820 }, { "epoch": 0.66, "learning_rate": 1.8727742888806798e-05, "loss": 0.0249, "step": 26830 }, { "epoch": 0.66, "learning_rate": 1.872725033862825e-05, "loss": 0.0169, "step": 26840 }, { "epoch": 0.66, "learning_rate": 1.87267577884497e-05, "loss": 0.0136, "step": 26850 }, { "epoch": 0.66, "learning_rate": 1.8726265238271152e-05, "loss": 0.0162, "step": 26860 }, { "epoch": 0.66, "learning_rate": 1.87257726880926e-05, "loss": 0.0226, "step": 26870 }, { "epoch": 0.66, "learning_rate": 1.8725280137914054e-05, "loss": 0.0173, "step": 26880 }, { "epoch": 0.66, "learning_rate": 1.8724787587735503e-05, "loss": 0.0135, "step": 26890 }, { "epoch": 0.66, "learning_rate": 1.8724295037556952e-05, "loss": 0.0147, "step": 26900 }, { "epoch": 0.66, "learning_rate": 1.8723802487378405e-05, "loss": 0.0159, "step": 26910 }, { "epoch": 0.66, "learning_rate": 1.8723309937199854e-05, "loss": 0.0159, "step": 26920 }, { "epoch": 0.66, "learning_rate": 1.8722817387021303e-05, "loss": 0.0172, "step": 26930 }, { "epoch": 0.66, "learning_rate": 1.8722324836842756e-05, "loss": 0.0129, "step": 26940 }, { "epoch": 0.66, "learning_rate": 1.8721832286664205e-05, "loss": 0.0131, "step": 26950 }, { "epoch": 0.66, "learning_rate": 1.8721339736485654e-05, "loss": 0.0132, "step": 26960 }, { "epoch": 0.66, "learning_rate": 1.8720847186307106e-05, "loss": 0.0202, "step": 26970 }, { "epoch": 0.66, "learning_rate": 1.8720354636128556e-05, "loss": 0.0155, "step": 26980 }, { "epoch": 0.66, "learning_rate": 1.8719862085950008e-05, "loss": 0.0144, "step": 26990 }, { "epoch": 0.66, "learning_rate": 1.8719369535771457e-05, "loss": 0.0146, "step": 27000 }, { "epoch": 0.66, "learning_rate": 1.871887698559291e-05, "loss": 0.0143, "step": 27010 }, { "epoch": 0.66, "learning_rate": 1.871838443541436e-05, "loss": 0.016, "step": 27020 }, { "epoch": 0.66, "learning_rate": 1.871789188523581e-05, "loss": 0.0162, "step": 27030 }, { "epoch": 0.66, "learning_rate": 1.871739933505726e-05, "loss": 0.0199, "step": 27040 }, { "epoch": 0.66, "learning_rate": 1.871690678487871e-05, "loss": 0.0179, "step": 27050 }, { "epoch": 0.66, "learning_rate": 1.8716414234700162e-05, "loss": 0.0153, "step": 27060 }, { "epoch": 0.67, "learning_rate": 1.871592168452161e-05, "loss": 0.017, "step": 27070 }, { "epoch": 0.67, "learning_rate": 1.871542913434306e-05, "loss": 0.0138, "step": 27080 }, { "epoch": 0.67, "learning_rate": 1.8714936584164513e-05, "loss": 0.0153, "step": 27090 }, { "epoch": 0.67, "learning_rate": 1.8714444033985962e-05, "loss": 0.0157, "step": 27100 }, { "epoch": 0.67, "learning_rate": 1.8713951483807415e-05, "loss": 0.0152, "step": 27110 }, { "epoch": 0.67, "learning_rate": 1.8713458933628864e-05, "loss": 0.0151, "step": 27120 }, { "epoch": 0.67, "learning_rate": 1.8712966383450317e-05, "loss": 0.016, "step": 27130 }, { "epoch": 0.67, "learning_rate": 1.8712473833271766e-05, "loss": 0.0177, "step": 27140 }, { "epoch": 0.67, "learning_rate": 1.8711981283093218e-05, "loss": 0.0153, "step": 27150 }, { "epoch": 0.67, "learning_rate": 1.8711488732914667e-05, "loss": 0.0198, "step": 27160 }, { "epoch": 0.67, "learning_rate": 1.871099618273612e-05, "loss": 0.0215, "step": 27170 }, { "epoch": 0.67, "learning_rate": 1.871050363255757e-05, "loss": 0.0118, "step": 27180 }, { "epoch": 0.67, "learning_rate": 1.8710011082379018e-05, "loss": 0.0133, "step": 27190 }, { "epoch": 0.67, "learning_rate": 1.870951853220047e-05, "loss": 0.0147, "step": 27200 }, { "epoch": 0.67, "learning_rate": 1.870902598202192e-05, "loss": 0.0164, "step": 27210 }, { "epoch": 0.67, "learning_rate": 1.870853343184337e-05, "loss": 0.0347, "step": 27220 }, { "epoch": 0.67, "learning_rate": 1.870804088166482e-05, "loss": 0.0144, "step": 27230 }, { "epoch": 0.67, "learning_rate": 1.870754833148627e-05, "loss": 0.0147, "step": 27240 }, { "epoch": 0.67, "learning_rate": 1.870705578130772e-05, "loss": 0.014, "step": 27250 }, { "epoch": 0.67, "learning_rate": 1.8706563231129172e-05, "loss": 0.017, "step": 27260 }, { "epoch": 0.67, "learning_rate": 1.870607068095062e-05, "loss": 0.0221, "step": 27270 }, { "epoch": 0.67, "learning_rate": 1.8705578130772074e-05, "loss": 0.0171, "step": 27280 }, { "epoch": 0.67, "learning_rate": 1.8705085580593527e-05, "loss": 0.0183, "step": 27290 }, { "epoch": 0.67, "learning_rate": 1.8704593030414976e-05, "loss": 0.0142, "step": 27300 }, { "epoch": 0.67, "learning_rate": 1.8704100480236425e-05, "loss": 0.0135, "step": 27310 }, { "epoch": 0.67, "learning_rate": 1.8703607930057877e-05, "loss": 0.0151, "step": 27320 }, { "epoch": 0.67, "learning_rate": 1.8703115379879327e-05, "loss": 0.0154, "step": 27330 }, { "epoch": 0.67, "learning_rate": 1.870262282970078e-05, "loss": 0.0156, "step": 27340 }, { "epoch": 0.67, "learning_rate": 1.8702130279522228e-05, "loss": 0.0149, "step": 27350 }, { "epoch": 0.67, "learning_rate": 1.8701637729343677e-05, "loss": 0.0145, "step": 27360 }, { "epoch": 0.67, "learning_rate": 1.870114517916513e-05, "loss": 0.0171, "step": 27370 }, { "epoch": 0.67, "learning_rate": 1.870065262898658e-05, "loss": 0.0166, "step": 27380 }, { "epoch": 0.67, "learning_rate": 1.8700160078808028e-05, "loss": 0.0143, "step": 27390 }, { "epoch": 0.67, "learning_rate": 1.869966752862948e-05, "loss": 0.0219, "step": 27400 }, { "epoch": 0.67, "learning_rate": 1.869917497845093e-05, "loss": 0.0184, "step": 27410 }, { "epoch": 0.67, "learning_rate": 1.8698682428272383e-05, "loss": 0.011, "step": 27420 }, { "epoch": 0.67, "learning_rate": 1.869818987809383e-05, "loss": 0.0161, "step": 27430 }, { "epoch": 0.67, "learning_rate": 1.8697697327915284e-05, "loss": 0.013, "step": 27440 }, { "epoch": 0.67, "learning_rate": 1.8697204777736733e-05, "loss": 0.015, "step": 27450 }, { "epoch": 0.67, "learning_rate": 1.8696712227558186e-05, "loss": 0.0121, "step": 27460 }, { "epoch": 0.67, "learning_rate": 1.8696219677379635e-05, "loss": 0.0161, "step": 27470 }, { "epoch": 0.68, "learning_rate": 1.8695727127201084e-05, "loss": 0.0141, "step": 27480 }, { "epoch": 0.68, "learning_rate": 1.8695234577022537e-05, "loss": 0.0146, "step": 27490 }, { "epoch": 0.68, "learning_rate": 1.8694742026843986e-05, "loss": 0.0129, "step": 27500 }, { "epoch": 0.68, "learning_rate": 1.8694249476665435e-05, "loss": 0.0149, "step": 27510 }, { "epoch": 0.68, "learning_rate": 1.8693756926486888e-05, "loss": 0.0244, "step": 27520 }, { "epoch": 0.68, "learning_rate": 1.8693264376308337e-05, "loss": 0.0244, "step": 27530 }, { "epoch": 0.68, "learning_rate": 1.8692771826129786e-05, "loss": 0.0159, "step": 27540 }, { "epoch": 0.68, "learning_rate": 1.869227927595124e-05, "loss": 0.0193, "step": 27550 }, { "epoch": 0.68, "learning_rate": 1.869178672577269e-05, "loss": 0.0151, "step": 27560 }, { "epoch": 0.68, "learning_rate": 1.869129417559414e-05, "loss": 0.0183, "step": 27570 }, { "epoch": 0.68, "learning_rate": 1.8690801625415593e-05, "loss": 0.0175, "step": 27580 }, { "epoch": 0.68, "learning_rate": 1.8690309075237042e-05, "loss": 0.0137, "step": 27590 }, { "epoch": 0.68, "learning_rate": 1.8689816525058494e-05, "loss": 0.0141, "step": 27600 }, { "epoch": 0.68, "learning_rate": 1.8689323974879943e-05, "loss": 0.0144, "step": 27610 }, { "epoch": 0.68, "learning_rate": 1.8688831424701393e-05, "loss": 0.0212, "step": 27620 }, { "epoch": 0.68, "learning_rate": 1.8688338874522845e-05, "loss": 0.0159, "step": 27630 }, { "epoch": 0.68, "learning_rate": 1.8687846324344294e-05, "loss": 0.0193, "step": 27640 }, { "epoch": 0.68, "learning_rate": 1.8687353774165743e-05, "loss": 0.0158, "step": 27650 }, { "epoch": 0.68, "learning_rate": 1.8686861223987196e-05, "loss": 0.0151, "step": 27660 }, { "epoch": 0.68, "learning_rate": 1.8686368673808645e-05, "loss": 0.0152, "step": 27670 }, { "epoch": 0.68, "learning_rate": 1.8685876123630094e-05, "loss": 0.0271, "step": 27680 }, { "epoch": 0.68, "learning_rate": 1.8685383573451547e-05, "loss": 0.0154, "step": 27690 }, { "epoch": 0.68, "learning_rate": 1.8684891023272996e-05, "loss": 0.0205, "step": 27700 }, { "epoch": 0.68, "learning_rate": 1.868439847309445e-05, "loss": 0.0149, "step": 27710 }, { "epoch": 0.68, "learning_rate": 1.8683905922915898e-05, "loss": 0.0271, "step": 27720 }, { "epoch": 0.68, "learning_rate": 1.868341337273735e-05, "loss": 0.0143, "step": 27730 }, { "epoch": 0.68, "learning_rate": 1.86829208225588e-05, "loss": 0.0169, "step": 27740 }, { "epoch": 0.68, "learning_rate": 1.8682428272380252e-05, "loss": 0.0113, "step": 27750 }, { "epoch": 0.68, "learning_rate": 1.86819357222017e-05, "loss": 0.0267, "step": 27760 }, { "epoch": 0.68, "learning_rate": 1.868144317202315e-05, "loss": 0.0134, "step": 27770 }, { "epoch": 0.68, "learning_rate": 1.8680950621844603e-05, "loss": 0.0156, "step": 27780 }, { "epoch": 0.68, "learning_rate": 1.8680458071666052e-05, "loss": 0.0158, "step": 27790 }, { "epoch": 0.68, "learning_rate": 1.86799655214875e-05, "loss": 0.0124, "step": 27800 }, { "epoch": 0.68, "learning_rate": 1.8679472971308954e-05, "loss": 0.0167, "step": 27810 }, { "epoch": 0.68, "learning_rate": 1.8678980421130403e-05, "loss": 0.0261, "step": 27820 }, { "epoch": 0.68, "learning_rate": 1.8678487870951855e-05, "loss": 0.015, "step": 27830 }, { "epoch": 0.68, "learning_rate": 1.8677995320773304e-05, "loss": 0.0202, "step": 27840 }, { "epoch": 0.68, "learning_rate": 1.8677502770594757e-05, "loss": 0.0314, "step": 27850 }, { "epoch": 0.68, "learning_rate": 1.8677010220416206e-05, "loss": 0.0149, "step": 27860 }, { "epoch": 0.68, "learning_rate": 1.867651767023766e-05, "loss": 0.0152, "step": 27870 }, { "epoch": 0.68, "learning_rate": 1.8676025120059108e-05, "loss": 0.0133, "step": 27880 }, { "epoch": 0.69, "learning_rate": 1.867553256988056e-05, "loss": 0.0281, "step": 27890 }, { "epoch": 0.69, "learning_rate": 1.867504001970201e-05, "loss": 0.016, "step": 27900 }, { "epoch": 0.69, "learning_rate": 1.867454746952346e-05, "loss": 0.0137, "step": 27910 }, { "epoch": 0.69, "learning_rate": 1.867405491934491e-05, "loss": 0.0256, "step": 27920 }, { "epoch": 0.69, "learning_rate": 1.867356236916636e-05, "loss": 0.014, "step": 27930 }, { "epoch": 0.69, "learning_rate": 1.867306981898781e-05, "loss": 0.0135, "step": 27940 }, { "epoch": 0.69, "learning_rate": 1.8672577268809262e-05, "loss": 0.016, "step": 27950 }, { "epoch": 0.69, "learning_rate": 1.867208471863071e-05, "loss": 0.0158, "step": 27960 }, { "epoch": 0.69, "learning_rate": 1.867159216845216e-05, "loss": 0.0188, "step": 27970 }, { "epoch": 0.69, "learning_rate": 1.8671099618273613e-05, "loss": 0.0137, "step": 27980 }, { "epoch": 0.69, "learning_rate": 1.8670607068095062e-05, "loss": 0.0147, "step": 27990 }, { "epoch": 0.69, "learning_rate": 1.8670114517916514e-05, "loss": 0.0189, "step": 28000 }, { "epoch": 0.69, "learning_rate": 1.8669621967737967e-05, "loss": 0.0155, "step": 28010 }, { "epoch": 0.69, "learning_rate": 1.8669129417559416e-05, "loss": 0.0292, "step": 28020 }, { "epoch": 0.69, "learning_rate": 1.8668636867380865e-05, "loss": 0.0166, "step": 28030 }, { "epoch": 0.69, "learning_rate": 1.8668144317202318e-05, "loss": 0.0147, "step": 28040 }, { "epoch": 0.69, "learning_rate": 1.8667651767023767e-05, "loss": 0.0124, "step": 28050 }, { "epoch": 0.69, "learning_rate": 1.866715921684522e-05, "loss": 0.0146, "step": 28060 }, { "epoch": 0.69, "learning_rate": 1.866666666666667e-05, "loss": 0.0136, "step": 28070 }, { "epoch": 0.69, "learning_rate": 1.8666174116488118e-05, "loss": 0.0238, "step": 28080 }, { "epoch": 0.69, "learning_rate": 1.866568156630957e-05, "loss": 0.0144, "step": 28090 }, { "epoch": 0.69, "learning_rate": 1.866518901613102e-05, "loss": 0.0228, "step": 28100 }, { "epoch": 0.69, "learning_rate": 1.866469646595247e-05, "loss": 0.0162, "step": 28110 }, { "epoch": 0.69, "learning_rate": 1.866420391577392e-05, "loss": 0.0133, "step": 28120 }, { "epoch": 0.69, "learning_rate": 1.866371136559537e-05, "loss": 0.0192, "step": 28130 }, { "epoch": 0.69, "learning_rate": 1.8663218815416823e-05, "loss": 0.0134, "step": 28140 }, { "epoch": 0.69, "learning_rate": 1.8662726265238272e-05, "loss": 0.0173, "step": 28150 }, { "epoch": 0.69, "learning_rate": 1.8662233715059725e-05, "loss": 0.0193, "step": 28160 }, { "epoch": 0.69, "learning_rate": 1.8661741164881174e-05, "loss": 0.0156, "step": 28170 }, { "epoch": 0.69, "learning_rate": 1.8661248614702626e-05, "loss": 0.0142, "step": 28180 }, { "epoch": 0.69, "learning_rate": 1.8660756064524075e-05, "loss": 0.0278, "step": 28190 }, { "epoch": 0.69, "learning_rate": 1.8660263514345525e-05, "loss": 0.015, "step": 28200 }, { "epoch": 0.69, "learning_rate": 1.8659770964166977e-05, "loss": 0.0159, "step": 28210 }, { "epoch": 0.69, "learning_rate": 1.8659278413988426e-05, "loss": 0.0145, "step": 28220 }, { "epoch": 0.69, "learning_rate": 1.8658785863809875e-05, "loss": 0.0235, "step": 28230 }, { "epoch": 0.69, "learning_rate": 1.8658293313631328e-05, "loss": 0.0193, "step": 28240 }, { "epoch": 0.69, "learning_rate": 1.8657800763452777e-05, "loss": 0.0133, "step": 28250 }, { "epoch": 0.69, "learning_rate": 1.8657308213274226e-05, "loss": 0.0157, "step": 28260 }, { "epoch": 0.69, "learning_rate": 1.865681566309568e-05, "loss": 0.0138, "step": 28270 }, { "epoch": 0.69, "learning_rate": 1.865632311291713e-05, "loss": 0.0141, "step": 28280 }, { "epoch": 0.69, "learning_rate": 1.865583056273858e-05, "loss": 0.0131, "step": 28290 }, { "epoch": 0.7, "learning_rate": 1.8655338012560033e-05, "loss": 0.0233, "step": 28300 }, { "epoch": 0.7, "learning_rate": 1.8654845462381482e-05, "loss": 0.0231, "step": 28310 }, { "epoch": 0.7, "learning_rate": 1.8654352912202935e-05, "loss": 0.0136, "step": 28320 }, { "epoch": 0.7, "learning_rate": 1.8653860362024384e-05, "loss": 0.0134, "step": 28330 }, { "epoch": 0.7, "learning_rate": 1.8653367811845833e-05, "loss": 0.0132, "step": 28340 }, { "epoch": 0.7, "learning_rate": 1.8652875261667285e-05, "loss": 0.0144, "step": 28350 }, { "epoch": 0.7, "learning_rate": 1.8652382711488735e-05, "loss": 0.0156, "step": 28360 }, { "epoch": 0.7, "learning_rate": 1.8651890161310184e-05, "loss": 0.0146, "step": 28370 }, { "epoch": 0.7, "learning_rate": 1.8651397611131636e-05, "loss": 0.0111, "step": 28380 }, { "epoch": 0.7, "learning_rate": 1.8650905060953085e-05, "loss": 0.0214, "step": 28390 }, { "epoch": 0.7, "learning_rate": 1.8650412510774535e-05, "loss": 0.0168, "step": 28400 }, { "epoch": 0.7, "learning_rate": 1.8649919960595987e-05, "loss": 0.0152, "step": 28410 }, { "epoch": 0.7, "learning_rate": 1.8649427410417436e-05, "loss": 0.0213, "step": 28420 }, { "epoch": 0.7, "learning_rate": 1.864893486023889e-05, "loss": 0.0128, "step": 28430 }, { "epoch": 0.7, "learning_rate": 1.8648442310060338e-05, "loss": 0.0171, "step": 28440 }, { "epoch": 0.7, "learning_rate": 1.864794975988179e-05, "loss": 0.0143, "step": 28450 }, { "epoch": 0.7, "learning_rate": 1.864745720970324e-05, "loss": 0.0163, "step": 28460 }, { "epoch": 0.7, "learning_rate": 1.8646964659524692e-05, "loss": 0.0159, "step": 28470 }, { "epoch": 0.7, "learning_rate": 1.864647210934614e-05, "loss": 0.0138, "step": 28480 }, { "epoch": 0.7, "learning_rate": 1.864597955916759e-05, "loss": 0.0266, "step": 28490 }, { "epoch": 0.7, "learning_rate": 1.8645487008989043e-05, "loss": 0.0144, "step": 28500 }, { "epoch": 0.7, "learning_rate": 1.8644994458810492e-05, "loss": 0.0134, "step": 28510 }, { "epoch": 0.7, "learning_rate": 1.864450190863194e-05, "loss": 0.0182, "step": 28520 }, { "epoch": 0.7, "learning_rate": 1.8644009358453394e-05, "loss": 0.0138, "step": 28530 }, { "epoch": 0.7, "learning_rate": 1.8643516808274843e-05, "loss": 0.0134, "step": 28540 }, { "epoch": 0.7, "learning_rate": 1.8643024258096296e-05, "loss": 0.0139, "step": 28550 }, { "epoch": 0.7, "learning_rate": 1.8642531707917745e-05, "loss": 0.0151, "step": 28560 }, { "epoch": 0.7, "learning_rate": 1.8642039157739197e-05, "loss": 0.0129, "step": 28570 }, { "epoch": 0.7, "learning_rate": 1.8641546607560646e-05, "loss": 0.0134, "step": 28580 }, { "epoch": 0.7, "learning_rate": 1.86410540573821e-05, "loss": 0.0166, "step": 28590 }, { "epoch": 0.7, "learning_rate": 1.8640561507203548e-05, "loss": 0.0157, "step": 28600 }, { "epoch": 0.7, "learning_rate": 1.8640068957025e-05, "loss": 0.0141, "step": 28610 }, { "epoch": 0.7, "learning_rate": 1.863957640684645e-05, "loss": 0.0144, "step": 28620 }, { "epoch": 0.7, "learning_rate": 1.86390838566679e-05, "loss": 0.0117, "step": 28630 }, { "epoch": 0.7, "learning_rate": 1.863859130648935e-05, "loss": 0.0165, "step": 28640 }, { "epoch": 0.7, "learning_rate": 1.86380987563108e-05, "loss": 0.0141, "step": 28650 }, { "epoch": 0.7, "learning_rate": 1.863760620613225e-05, "loss": 0.0132, "step": 28660 }, { "epoch": 0.7, "learning_rate": 1.8637113655953702e-05, "loss": 0.0147, "step": 28670 }, { "epoch": 0.7, "learning_rate": 1.863662110577515e-05, "loss": 0.0135, "step": 28680 }, { "epoch": 0.7, "learning_rate": 1.86361285555966e-05, "loss": 0.0189, "step": 28690 }, { "epoch": 0.71, "learning_rate": 1.8635636005418053e-05, "loss": 0.0235, "step": 28700 }, { "epoch": 0.71, "learning_rate": 1.8635143455239502e-05, "loss": 0.0164, "step": 28710 }, { "epoch": 0.71, "learning_rate": 1.8634650905060955e-05, "loss": 0.0154, "step": 28720 }, { "epoch": 0.71, "learning_rate": 1.8634158354882407e-05, "loss": 0.0131, "step": 28730 }, { "epoch": 0.71, "learning_rate": 1.8633665804703856e-05, "loss": 0.0124, "step": 28740 }, { "epoch": 0.71, "learning_rate": 1.8633173254525306e-05, "loss": 0.0144, "step": 28750 }, { "epoch": 0.71, "learning_rate": 1.8632680704346758e-05, "loss": 0.0139, "step": 28760 }, { "epoch": 0.71, "learning_rate": 1.8632188154168207e-05, "loss": 0.0124, "step": 28770 }, { "epoch": 0.71, "learning_rate": 1.863169560398966e-05, "loss": 0.0129, "step": 28780 }, { "epoch": 0.71, "learning_rate": 1.863120305381111e-05, "loss": 0.0162, "step": 28790 }, { "epoch": 0.71, "learning_rate": 1.8630710503632558e-05, "loss": 0.0215, "step": 28800 }, { "epoch": 0.71, "learning_rate": 1.863021795345401e-05, "loss": 0.0192, "step": 28810 }, { "epoch": 0.71, "learning_rate": 1.862972540327546e-05, "loss": 0.0135, "step": 28820 }, { "epoch": 0.71, "learning_rate": 1.862923285309691e-05, "loss": 0.0144, "step": 28830 }, { "epoch": 0.71, "learning_rate": 1.862874030291836e-05, "loss": 0.0164, "step": 28840 }, { "epoch": 0.71, "learning_rate": 1.862824775273981e-05, "loss": 0.0144, "step": 28850 }, { "epoch": 0.71, "learning_rate": 1.8627755202561263e-05, "loss": 0.015, "step": 28860 }, { "epoch": 0.71, "learning_rate": 1.8627262652382712e-05, "loss": 0.0169, "step": 28870 }, { "epoch": 0.71, "learning_rate": 1.8626770102204165e-05, "loss": 0.0201, "step": 28880 }, { "epoch": 0.71, "learning_rate": 1.8626277552025614e-05, "loss": 0.0126, "step": 28890 }, { "epoch": 0.71, "learning_rate": 1.8625785001847067e-05, "loss": 0.0134, "step": 28900 }, { "epoch": 0.71, "learning_rate": 1.8625292451668516e-05, "loss": 0.0134, "step": 28910 }, { "epoch": 0.71, "learning_rate": 1.8624799901489965e-05, "loss": 0.015, "step": 28920 }, { "epoch": 0.71, "learning_rate": 1.8624307351311417e-05, "loss": 0.0146, "step": 28930 }, { "epoch": 0.71, "learning_rate": 1.8623814801132867e-05, "loss": 0.0143, "step": 28940 }, { "epoch": 0.71, "learning_rate": 1.8623322250954316e-05, "loss": 0.0152, "step": 28950 }, { "epoch": 0.71, "learning_rate": 1.8622829700775768e-05, "loss": 0.023, "step": 28960 }, { "epoch": 0.71, "learning_rate": 1.8622337150597217e-05, "loss": 0.0169, "step": 28970 }, { "epoch": 0.71, "learning_rate": 1.8621844600418667e-05, "loss": 0.0165, "step": 28980 }, { "epoch": 0.71, "learning_rate": 1.862135205024012e-05, "loss": 0.0158, "step": 28990 }, { "epoch": 0.71, "learning_rate": 1.862085950006157e-05, "loss": 0.0147, "step": 29000 }, { "epoch": 0.71, "learning_rate": 1.862036694988302e-05, "loss": 0.0161, "step": 29010 }, { "epoch": 0.71, "learning_rate": 1.8619874399704473e-05, "loss": 0.0136, "step": 29020 }, { "epoch": 0.71, "learning_rate": 1.8619381849525922e-05, "loss": 0.0248, "step": 29030 }, { "epoch": 0.71, "learning_rate": 1.8618889299347375e-05, "loss": 0.0147, "step": 29040 }, { "epoch": 0.71, "learning_rate": 1.8618396749168824e-05, "loss": 0.0173, "step": 29050 }, { "epoch": 0.71, "learning_rate": 1.8617904198990273e-05, "loss": 0.0139, "step": 29060 }, { "epoch": 0.71, "learning_rate": 1.8617411648811726e-05, "loss": 0.0122, "step": 29070 }, { "epoch": 0.71, "learning_rate": 1.8616919098633175e-05, "loss": 0.0143, "step": 29080 }, { "epoch": 0.71, "learning_rate": 1.8616426548454624e-05, "loss": 0.0203, "step": 29090 }, { "epoch": 0.71, "learning_rate": 1.8615933998276077e-05, "loss": 0.0165, "step": 29100 }, { "epoch": 0.72, "learning_rate": 1.8615441448097526e-05, "loss": 0.018, "step": 29110 }, { "epoch": 0.72, "learning_rate": 1.8614948897918975e-05, "loss": 0.0134, "step": 29120 }, { "epoch": 0.72, "learning_rate": 1.8614456347740427e-05, "loss": 0.0166, "step": 29130 }, { "epoch": 0.72, "learning_rate": 1.8613963797561877e-05, "loss": 0.0152, "step": 29140 }, { "epoch": 0.72, "learning_rate": 1.861347124738333e-05, "loss": 0.015, "step": 29150 }, { "epoch": 0.72, "learning_rate": 1.861297869720478e-05, "loss": 0.0144, "step": 29160 }, { "epoch": 0.72, "learning_rate": 1.861248614702623e-05, "loss": 0.0139, "step": 29170 }, { "epoch": 0.72, "learning_rate": 1.861199359684768e-05, "loss": 0.0155, "step": 29180 }, { "epoch": 0.72, "learning_rate": 1.8611501046669133e-05, "loss": 0.0136, "step": 29190 }, { "epoch": 0.72, "learning_rate": 1.861100849649058e-05, "loss": 0.0137, "step": 29200 }, { "epoch": 0.72, "learning_rate": 1.861051594631203e-05, "loss": 0.02, "step": 29210 }, { "epoch": 0.72, "learning_rate": 1.8610023396133483e-05, "loss": 0.0139, "step": 29220 }, { "epoch": 0.72, "learning_rate": 1.8609530845954933e-05, "loss": 0.0148, "step": 29230 }, { "epoch": 0.72, "learning_rate": 1.860903829577638e-05, "loss": 0.0189, "step": 29240 }, { "epoch": 0.72, "learning_rate": 1.8608545745597834e-05, "loss": 0.0129, "step": 29250 }, { "epoch": 0.72, "learning_rate": 1.8608053195419283e-05, "loss": 0.0154, "step": 29260 }, { "epoch": 0.72, "learning_rate": 1.8607560645240736e-05, "loss": 0.0124, "step": 29270 }, { "epoch": 0.72, "learning_rate": 1.8607068095062185e-05, "loss": 0.0144, "step": 29280 }, { "epoch": 0.72, "learning_rate": 1.8606575544883638e-05, "loss": 0.0147, "step": 29290 }, { "epoch": 0.72, "learning_rate": 1.8606082994705087e-05, "loss": 0.019, "step": 29300 }, { "epoch": 0.72, "learning_rate": 1.860559044452654e-05, "loss": 0.0188, "step": 29310 }, { "epoch": 0.72, "learning_rate": 1.860509789434799e-05, "loss": 0.0147, "step": 29320 }, { "epoch": 0.72, "learning_rate": 1.860460534416944e-05, "loss": 0.0135, "step": 29330 }, { "epoch": 0.72, "learning_rate": 1.860411279399089e-05, "loss": 0.0156, "step": 29340 }, { "epoch": 0.72, "learning_rate": 1.860362024381234e-05, "loss": 0.0162, "step": 29350 }, { "epoch": 0.72, "learning_rate": 1.8603127693633792e-05, "loss": 0.0141, "step": 29360 }, { "epoch": 0.72, "learning_rate": 1.860263514345524e-05, "loss": 0.0155, "step": 29370 }, { "epoch": 0.72, "learning_rate": 1.860214259327669e-05, "loss": 0.0141, "step": 29380 }, { "epoch": 0.72, "learning_rate": 1.8601650043098143e-05, "loss": 0.0198, "step": 29390 }, { "epoch": 0.72, "learning_rate": 1.8601157492919592e-05, "loss": 0.0138, "step": 29400 }, { "epoch": 0.72, "learning_rate": 1.860066494274104e-05, "loss": 0.0137, "step": 29410 }, { "epoch": 0.72, "learning_rate": 1.8600172392562493e-05, "loss": 0.0144, "step": 29420 }, { "epoch": 0.72, "learning_rate": 1.8599679842383943e-05, "loss": 0.0147, "step": 29430 }, { "epoch": 0.72, "learning_rate": 1.8599187292205395e-05, "loss": 0.0139, "step": 29440 }, { "epoch": 0.72, "learning_rate": 1.8598694742026848e-05, "loss": 0.0136, "step": 29450 }, { "epoch": 0.72, "learning_rate": 1.8598202191848297e-05, "loss": 0.0208, "step": 29460 }, { "epoch": 0.72, "learning_rate": 1.8597709641669746e-05, "loss": 0.0154, "step": 29470 }, { "epoch": 0.72, "learning_rate": 1.85972170914912e-05, "loss": 0.0168, "step": 29480 }, { "epoch": 0.72, "learning_rate": 1.8596724541312648e-05, "loss": 0.0144, "step": 29490 }, { "epoch": 0.72, "learning_rate": 1.85962319911341e-05, "loss": 0.0217, "step": 29500 }, { "epoch": 0.72, "learning_rate": 1.859573944095555e-05, "loss": 0.0233, "step": 29510 }, { "epoch": 0.73, "learning_rate": 1.8595246890777e-05, "loss": 0.0137, "step": 29520 }, { "epoch": 0.73, "learning_rate": 1.859475434059845e-05, "loss": 0.0154, "step": 29530 }, { "epoch": 0.73, "learning_rate": 1.85942617904199e-05, "loss": 0.0216, "step": 29540 }, { "epoch": 0.73, "learning_rate": 1.859376924024135e-05, "loss": 0.0217, "step": 29550 }, { "epoch": 0.73, "learning_rate": 1.8593276690062802e-05, "loss": 0.0153, "step": 29560 }, { "epoch": 0.73, "learning_rate": 1.859278413988425e-05, "loss": 0.0156, "step": 29570 }, { "epoch": 0.73, "learning_rate": 1.8592291589705704e-05, "loss": 0.0148, "step": 29580 }, { "epoch": 0.73, "learning_rate": 1.8591799039527153e-05, "loss": 0.0137, "step": 29590 }, { "epoch": 0.73, "learning_rate": 1.8591306489348605e-05, "loss": 0.0141, "step": 29600 }, { "epoch": 0.73, "learning_rate": 1.8590813939170054e-05, "loss": 0.0148, "step": 29610 }, { "epoch": 0.73, "learning_rate": 1.8590321388991507e-05, "loss": 0.0124, "step": 29620 }, { "epoch": 0.73, "learning_rate": 1.8589828838812956e-05, "loss": 0.0154, "step": 29630 }, { "epoch": 0.73, "learning_rate": 1.8589336288634405e-05, "loss": 0.0122, "step": 29640 }, { "epoch": 0.73, "learning_rate": 1.8588843738455858e-05, "loss": 0.016, "step": 29650 }, { "epoch": 0.73, "learning_rate": 1.8588351188277307e-05, "loss": 0.0228, "step": 29660 }, { "epoch": 0.73, "learning_rate": 1.8587858638098756e-05, "loss": 0.0238, "step": 29670 }, { "epoch": 0.73, "learning_rate": 1.858736608792021e-05, "loss": 0.0138, "step": 29680 }, { "epoch": 0.73, "learning_rate": 1.8586873537741658e-05, "loss": 0.0144, "step": 29690 }, { "epoch": 0.73, "learning_rate": 1.8586380987563107e-05, "loss": 0.0124, "step": 29700 }, { "epoch": 0.73, "learning_rate": 1.858588843738456e-05, "loss": 0.0165, "step": 29710 }, { "epoch": 0.73, "learning_rate": 1.8585395887206012e-05, "loss": 0.0146, "step": 29720 }, { "epoch": 0.73, "learning_rate": 1.858490333702746e-05, "loss": 0.018, "step": 29730 }, { "epoch": 0.73, "learning_rate": 1.8584410786848914e-05, "loss": 0.0206, "step": 29740 }, { "epoch": 0.73, "learning_rate": 1.8583918236670363e-05, "loss": 0.0135, "step": 29750 }, { "epoch": 0.73, "learning_rate": 1.8583425686491815e-05, "loss": 0.0168, "step": 29760 }, { "epoch": 0.73, "learning_rate": 1.8582933136313264e-05, "loss": 0.0118, "step": 29770 }, { "epoch": 0.73, "learning_rate": 1.8582440586134714e-05, "loss": 0.0155, "step": 29780 }, { "epoch": 0.73, "learning_rate": 1.8581948035956166e-05, "loss": 0.013, "step": 29790 }, { "epoch": 0.73, "learning_rate": 1.8581455485777615e-05, "loss": 0.0137, "step": 29800 }, { "epoch": 0.73, "learning_rate": 1.8580962935599064e-05, "loss": 0.0116, "step": 29810 }, { "epoch": 0.73, "learning_rate": 1.8580470385420517e-05, "loss": 0.0169, "step": 29820 }, { "epoch": 0.73, "learning_rate": 1.8579977835241966e-05, "loss": 0.0236, "step": 29830 }, { "epoch": 0.73, "learning_rate": 1.8579485285063415e-05, "loss": 0.0167, "step": 29840 }, { "epoch": 0.73, "learning_rate": 1.8578992734884868e-05, "loss": 0.0133, "step": 29850 }, { "epoch": 0.73, "learning_rate": 1.8578500184706317e-05, "loss": 0.0201, "step": 29860 }, { "epoch": 0.73, "learning_rate": 1.857800763452777e-05, "loss": 0.0198, "step": 29870 }, { "epoch": 0.73, "learning_rate": 1.857751508434922e-05, "loss": 0.014, "step": 29880 }, { "epoch": 0.73, "learning_rate": 1.857702253417067e-05, "loss": 0.014, "step": 29890 }, { "epoch": 0.73, "learning_rate": 1.857652998399212e-05, "loss": 0.0238, "step": 29900 }, { "epoch": 0.73, "learning_rate": 1.8576037433813573e-05, "loss": 0.0143, "step": 29910 }, { "epoch": 0.74, "learning_rate": 1.8575544883635022e-05, "loss": 0.0134, "step": 29920 }, { "epoch": 0.74, "learning_rate": 1.857505233345647e-05, "loss": 0.0125, "step": 29930 }, { "epoch": 0.74, "learning_rate": 1.8574559783277924e-05, "loss": 0.014, "step": 29940 }, { "epoch": 0.74, "learning_rate": 1.8574067233099373e-05, "loss": 0.0159, "step": 29950 }, { "epoch": 0.74, "learning_rate": 1.8573574682920822e-05, "loss": 0.0175, "step": 29960 }, { "epoch": 0.74, "learning_rate": 1.8573082132742275e-05, "loss": 0.0141, "step": 29970 }, { "epoch": 0.74, "learning_rate": 1.8572589582563724e-05, "loss": 0.0144, "step": 29980 }, { "epoch": 0.74, "learning_rate": 1.8572097032385176e-05, "loss": 0.0167, "step": 29990 }, { "epoch": 0.74, "learning_rate": 1.8571604482206625e-05, "loss": 0.018, "step": 30000 }, { "epoch": 0.74, "eval_bleu": 97.7231, "eval_gen_len_ratio": 0.9995, "eval_loss": 0.010594647377729416, "eval_runtime": 253.9297, "eval_samples_per_second": 7.348, "eval_steps_per_second": 0.232, "step": 30000 }, { "epoch": 0.74, "learning_rate": 1.8571111932028078e-05, "loss": 0.0149, "step": 30010 }, { "epoch": 0.74, "learning_rate": 1.8570619381849527e-05, "loss": 0.0143, "step": 30020 }, { "epoch": 0.74, "learning_rate": 1.857012683167098e-05, "loss": 0.0159, "step": 30030 }, { "epoch": 0.74, "learning_rate": 1.856963428149243e-05, "loss": 0.0168, "step": 30040 }, { "epoch": 0.74, "learning_rate": 1.856914173131388e-05, "loss": 0.015, "step": 30050 }, { "epoch": 0.74, "learning_rate": 1.856864918113533e-05, "loss": 0.0146, "step": 30060 }, { "epoch": 0.74, "learning_rate": 1.856815663095678e-05, "loss": 0.0133, "step": 30070 }, { "epoch": 0.74, "learning_rate": 1.8567664080778232e-05, "loss": 0.0213, "step": 30080 }, { "epoch": 0.74, "learning_rate": 1.856717153059968e-05, "loss": 0.0144, "step": 30090 }, { "epoch": 0.74, "learning_rate": 1.856667898042113e-05, "loss": 0.0175, "step": 30100 }, { "epoch": 0.74, "learning_rate": 1.8566186430242583e-05, "loss": 0.0135, "step": 30110 }, { "epoch": 0.74, "learning_rate": 1.8565693880064032e-05, "loss": 0.0135, "step": 30120 }, { "epoch": 0.74, "learning_rate": 1.856520132988548e-05, "loss": 0.0141, "step": 30130 }, { "epoch": 0.74, "learning_rate": 1.8564708779706934e-05, "loss": 0.0151, "step": 30140 }, { "epoch": 0.74, "learning_rate": 1.8564216229528383e-05, "loss": 0.0128, "step": 30150 }, { "epoch": 0.74, "learning_rate": 1.8563723679349835e-05, "loss": 0.0191, "step": 30160 }, { "epoch": 0.74, "learning_rate": 1.8563231129171288e-05, "loss": 0.0152, "step": 30170 }, { "epoch": 0.74, "learning_rate": 1.8562738578992737e-05, "loss": 0.0251, "step": 30180 }, { "epoch": 0.74, "learning_rate": 1.8562246028814186e-05, "loss": 0.0152, "step": 30190 }, { "epoch": 0.74, "learning_rate": 1.856175347863564e-05, "loss": 0.0147, "step": 30200 }, { "epoch": 0.74, "learning_rate": 1.8561260928457088e-05, "loss": 0.0158, "step": 30210 }, { "epoch": 0.74, "learning_rate": 1.856076837827854e-05, "loss": 0.0151, "step": 30220 }, { "epoch": 0.74, "learning_rate": 1.856027582809999e-05, "loss": 0.0132, "step": 30230 }, { "epoch": 0.74, "learning_rate": 1.855978327792144e-05, "loss": 0.0149, "step": 30240 }, { "epoch": 0.74, "learning_rate": 1.855929072774289e-05, "loss": 0.0209, "step": 30250 }, { "epoch": 0.74, "learning_rate": 1.855879817756434e-05, "loss": 0.0177, "step": 30260 }, { "epoch": 0.74, "learning_rate": 1.855830562738579e-05, "loss": 0.0129, "step": 30270 }, { "epoch": 0.74, "learning_rate": 1.8557813077207242e-05, "loss": 0.0165, "step": 30280 }, { "epoch": 0.74, "learning_rate": 1.855732052702869e-05, "loss": 0.0125, "step": 30290 }, { "epoch": 0.74, "learning_rate": 1.8556827976850144e-05, "loss": 0.0189, "step": 30300 }, { "epoch": 0.74, "learning_rate": 1.8556335426671593e-05, "loss": 0.0145, "step": 30310 }, { "epoch": 0.74, "learning_rate": 1.8555842876493046e-05, "loss": 0.0146, "step": 30320 }, { "epoch": 0.75, "learning_rate": 1.8555350326314495e-05, "loss": 0.0124, "step": 30330 }, { "epoch": 0.75, "learning_rate": 1.8554857776135947e-05, "loss": 0.016, "step": 30340 }, { "epoch": 0.75, "learning_rate": 1.8554365225957396e-05, "loss": 0.0125, "step": 30350 }, { "epoch": 0.75, "learning_rate": 1.8553872675778846e-05, "loss": 0.0168, "step": 30360 }, { "epoch": 0.75, "learning_rate": 1.8553380125600298e-05, "loss": 0.0175, "step": 30370 }, { "epoch": 0.75, "learning_rate": 1.8552887575421747e-05, "loss": 0.0122, "step": 30380 }, { "epoch": 0.75, "learning_rate": 1.8552395025243196e-05, "loss": 0.0139, "step": 30390 }, { "epoch": 0.75, "learning_rate": 1.855190247506465e-05, "loss": 0.0154, "step": 30400 }, { "epoch": 0.75, "learning_rate": 1.8551409924886098e-05, "loss": 0.016, "step": 30410 }, { "epoch": 0.75, "learning_rate": 1.8550917374707547e-05, "loss": 0.0171, "step": 30420 }, { "epoch": 0.75, "learning_rate": 1.8550424824529e-05, "loss": 0.0225, "step": 30430 }, { "epoch": 0.75, "learning_rate": 1.8549932274350452e-05, "loss": 0.0153, "step": 30440 }, { "epoch": 0.75, "learning_rate": 1.85494397241719e-05, "loss": 0.0244, "step": 30450 }, { "epoch": 0.75, "learning_rate": 1.8548947173993354e-05, "loss": 0.0126, "step": 30460 }, { "epoch": 0.75, "learning_rate": 1.8548454623814803e-05, "loss": 0.0141, "step": 30470 }, { "epoch": 0.75, "learning_rate": 1.8547962073636256e-05, "loss": 0.0174, "step": 30480 }, { "epoch": 0.75, "learning_rate": 1.8547469523457705e-05, "loss": 0.0139, "step": 30490 }, { "epoch": 0.75, "learning_rate": 1.8546976973279154e-05, "loss": 0.0145, "step": 30500 }, { "epoch": 0.75, "learning_rate": 1.8546484423100606e-05, "loss": 0.0143, "step": 30510 }, { "epoch": 0.75, "learning_rate": 1.8545991872922056e-05, "loss": 0.0148, "step": 30520 }, { "epoch": 0.75, "learning_rate": 1.8545499322743505e-05, "loss": 0.015, "step": 30530 }, { "epoch": 0.75, "learning_rate": 1.8545006772564957e-05, "loss": 0.0134, "step": 30540 }, { "epoch": 0.75, "learning_rate": 1.8544514222386406e-05, "loss": 0.016, "step": 30550 }, { "epoch": 0.75, "learning_rate": 1.8544021672207856e-05, "loss": 0.0244, "step": 30560 }, { "epoch": 0.75, "learning_rate": 1.8543529122029308e-05, "loss": 0.0156, "step": 30570 }, { "epoch": 0.75, "learning_rate": 1.8543036571850757e-05, "loss": 0.0159, "step": 30580 }, { "epoch": 0.75, "learning_rate": 1.854254402167221e-05, "loss": 0.0211, "step": 30590 }, { "epoch": 0.75, "learning_rate": 1.854205147149366e-05, "loss": 0.0144, "step": 30600 }, { "epoch": 0.75, "learning_rate": 1.854155892131511e-05, "loss": 0.0138, "step": 30610 }, { "epoch": 0.75, "learning_rate": 1.854106637113656e-05, "loss": 0.0159, "step": 30620 }, { "epoch": 0.75, "learning_rate": 1.8540573820958013e-05, "loss": 0.0166, "step": 30630 }, { "epoch": 0.75, "learning_rate": 1.8540081270779462e-05, "loss": 0.0145, "step": 30640 }, { "epoch": 0.75, "learning_rate": 1.853958872060091e-05, "loss": 0.0132, "step": 30650 }, { "epoch": 0.75, "learning_rate": 1.8539096170422364e-05, "loss": 0.0142, "step": 30660 }, { "epoch": 0.75, "learning_rate": 1.8538603620243813e-05, "loss": 0.0134, "step": 30670 }, { "epoch": 0.75, "learning_rate": 1.8538111070065262e-05, "loss": 0.0159, "step": 30680 }, { "epoch": 0.75, "learning_rate": 1.8537618519886715e-05, "loss": 0.0135, "step": 30690 }, { "epoch": 0.75, "learning_rate": 1.8537125969708164e-05, "loss": 0.0131, "step": 30700 }, { "epoch": 0.75, "learning_rate": 1.8536633419529617e-05, "loss": 0.0183, "step": 30710 }, { "epoch": 0.75, "learning_rate": 1.8536140869351066e-05, "loss": 0.0131, "step": 30720 }, { "epoch": 0.75, "learning_rate": 1.8535648319172518e-05, "loss": 0.0139, "step": 30730 }, { "epoch": 0.76, "learning_rate": 1.8535155768993967e-05, "loss": 0.0158, "step": 30740 }, { "epoch": 0.76, "learning_rate": 1.853466321881542e-05, "loss": 0.0139, "step": 30750 }, { "epoch": 0.76, "learning_rate": 1.853417066863687e-05, "loss": 0.0147, "step": 30760 }, { "epoch": 0.76, "learning_rate": 1.853367811845832e-05, "loss": 0.0167, "step": 30770 }, { "epoch": 0.76, "learning_rate": 1.853318556827977e-05, "loss": 0.0129, "step": 30780 }, { "epoch": 0.76, "learning_rate": 1.853269301810122e-05, "loss": 0.0142, "step": 30790 }, { "epoch": 0.76, "learning_rate": 1.8532200467922672e-05, "loss": 0.0196, "step": 30800 }, { "epoch": 0.76, "learning_rate": 1.853170791774412e-05, "loss": 0.0151, "step": 30810 }, { "epoch": 0.76, "learning_rate": 1.853121536756557e-05, "loss": 0.0137, "step": 30820 }, { "epoch": 0.76, "learning_rate": 1.8530722817387023e-05, "loss": 0.014, "step": 30830 }, { "epoch": 0.76, "learning_rate": 1.8530230267208472e-05, "loss": 0.013, "step": 30840 }, { "epoch": 0.76, "learning_rate": 1.852973771702992e-05, "loss": 0.0147, "step": 30850 }, { "epoch": 0.76, "learning_rate": 1.8529245166851374e-05, "loss": 0.0143, "step": 30860 }, { "epoch": 0.76, "learning_rate": 1.8528752616672823e-05, "loss": 0.0121, "step": 30870 }, { "epoch": 0.76, "learning_rate": 1.8528260066494276e-05, "loss": 0.0126, "step": 30880 }, { "epoch": 0.76, "learning_rate": 1.852776751631573e-05, "loss": 0.0216, "step": 30890 }, { "epoch": 0.76, "learning_rate": 1.8527274966137177e-05, "loss": 0.0208, "step": 30900 }, { "epoch": 0.76, "learning_rate": 1.8526782415958627e-05, "loss": 0.0169, "step": 30910 }, { "epoch": 0.76, "learning_rate": 1.852628986578008e-05, "loss": 0.0161, "step": 30920 }, { "epoch": 0.76, "learning_rate": 1.852579731560153e-05, "loss": 0.0113, "step": 30930 }, { "epoch": 0.76, "learning_rate": 1.852530476542298e-05, "loss": 0.016, "step": 30940 }, { "epoch": 0.76, "learning_rate": 1.852481221524443e-05, "loss": 0.0194, "step": 30950 }, { "epoch": 0.76, "learning_rate": 1.852431966506588e-05, "loss": 0.0211, "step": 30960 }, { "epoch": 0.76, "learning_rate": 1.8523827114887332e-05, "loss": 0.0147, "step": 30970 }, { "epoch": 0.76, "learning_rate": 1.852333456470878e-05, "loss": 0.0125, "step": 30980 }, { "epoch": 0.76, "learning_rate": 1.852284201453023e-05, "loss": 0.014, "step": 30990 }, { "epoch": 0.76, "learning_rate": 1.8522349464351683e-05, "loss": 0.0151, "step": 31000 }, { "epoch": 0.76, "learning_rate": 1.852185691417313e-05, "loss": 0.0149, "step": 31010 }, { "epoch": 0.76, "learning_rate": 1.8521364363994584e-05, "loss": 0.016, "step": 31020 }, { "epoch": 0.76, "learning_rate": 1.8520871813816033e-05, "loss": 0.0229, "step": 31030 }, { "epoch": 0.76, "learning_rate": 1.8520379263637486e-05, "loss": 0.0139, "step": 31040 }, { "epoch": 0.76, "learning_rate": 1.8519886713458935e-05, "loss": 0.0219, "step": 31050 }, { "epoch": 0.76, "learning_rate": 1.8519394163280388e-05, "loss": 0.0143, "step": 31060 }, { "epoch": 0.76, "learning_rate": 1.8518901613101837e-05, "loss": 0.0193, "step": 31070 }, { "epoch": 0.76, "learning_rate": 1.8518409062923286e-05, "loss": 0.012, "step": 31080 }, { "epoch": 0.76, "learning_rate": 1.851791651274474e-05, "loss": 0.0169, "step": 31090 }, { "epoch": 0.76, "learning_rate": 1.8517423962566188e-05, "loss": 0.016, "step": 31100 }, { "epoch": 0.76, "learning_rate": 1.8516931412387637e-05, "loss": 0.0183, "step": 31110 }, { "epoch": 0.76, "learning_rate": 1.851643886220909e-05, "loss": 0.0158, "step": 31120 }, { "epoch": 0.76, "learning_rate": 1.851594631203054e-05, "loss": 0.015, "step": 31130 }, { "epoch": 0.77, "learning_rate": 1.8515453761851988e-05, "loss": 0.0131, "step": 31140 }, { "epoch": 0.77, "learning_rate": 1.851496121167344e-05, "loss": 0.0125, "step": 31150 }, { "epoch": 0.77, "learning_rate": 1.8514468661494893e-05, "loss": 0.0162, "step": 31160 }, { "epoch": 0.77, "learning_rate": 1.8513976111316342e-05, "loss": 0.0146, "step": 31170 }, { "epoch": 0.77, "learning_rate": 1.8513483561137794e-05, "loss": 0.0188, "step": 31180 }, { "epoch": 0.77, "learning_rate": 1.8512991010959243e-05, "loss": 0.0135, "step": 31190 }, { "epoch": 0.77, "learning_rate": 1.8512498460780696e-05, "loss": 0.0141, "step": 31200 }, { "epoch": 0.77, "learning_rate": 1.8512005910602145e-05, "loss": 0.0181, "step": 31210 }, { "epoch": 0.77, "learning_rate": 1.8511513360423594e-05, "loss": 0.0147, "step": 31220 }, { "epoch": 0.77, "learning_rate": 1.8511020810245047e-05, "loss": 0.0153, "step": 31230 }, { "epoch": 0.77, "learning_rate": 1.8510528260066496e-05, "loss": 0.0144, "step": 31240 }, { "epoch": 0.77, "learning_rate": 1.8510035709887945e-05, "loss": 0.0153, "step": 31250 }, { "epoch": 0.77, "learning_rate": 1.8509543159709398e-05, "loss": 0.0185, "step": 31260 }, { "epoch": 0.77, "learning_rate": 1.8509050609530847e-05, "loss": 0.021, "step": 31270 }, { "epoch": 0.77, "learning_rate": 1.8508558059352296e-05, "loss": 0.0159, "step": 31280 }, { "epoch": 0.77, "learning_rate": 1.850806550917375e-05, "loss": 0.0141, "step": 31290 }, { "epoch": 0.77, "learning_rate": 1.8507572958995198e-05, "loss": 0.0144, "step": 31300 }, { "epoch": 0.77, "learning_rate": 1.850708040881665e-05, "loss": 0.0128, "step": 31310 }, { "epoch": 0.77, "learning_rate": 1.85065878586381e-05, "loss": 0.0214, "step": 31320 }, { "epoch": 0.77, "learning_rate": 1.8506095308459552e-05, "loss": 0.0138, "step": 31330 }, { "epoch": 0.77, "learning_rate": 1.8505602758281e-05, "loss": 0.0106, "step": 31340 }, { "epoch": 0.77, "learning_rate": 1.8505110208102454e-05, "loss": 0.0232, "step": 31350 }, { "epoch": 0.77, "learning_rate": 1.8504617657923903e-05, "loss": 0.0143, "step": 31360 }, { "epoch": 0.77, "learning_rate": 1.8504125107745352e-05, "loss": 0.0166, "step": 31370 }, { "epoch": 0.77, "learning_rate": 1.8503632557566804e-05, "loss": 0.0133, "step": 31380 }, { "epoch": 0.77, "learning_rate": 1.8503140007388254e-05, "loss": 0.0142, "step": 31390 }, { "epoch": 0.77, "learning_rate": 1.8502647457209703e-05, "loss": 0.0147, "step": 31400 }, { "epoch": 0.77, "learning_rate": 1.8502154907031155e-05, "loss": 0.025, "step": 31410 }, { "epoch": 0.77, "learning_rate": 1.8501662356852604e-05, "loss": 0.0148, "step": 31420 }, { "epoch": 0.77, "learning_rate": 1.8501169806674057e-05, "loss": 0.0162, "step": 31430 }, { "epoch": 0.77, "learning_rate": 1.8500677256495506e-05, "loss": 0.0182, "step": 31440 }, { "epoch": 0.77, "learning_rate": 1.850018470631696e-05, "loss": 0.0144, "step": 31450 }, { "epoch": 0.77, "learning_rate": 1.8499692156138408e-05, "loss": 0.0197, "step": 31460 }, { "epoch": 0.77, "learning_rate": 1.849919960595986e-05, "loss": 0.0133, "step": 31470 }, { "epoch": 0.77, "learning_rate": 1.849870705578131e-05, "loss": 0.017, "step": 31480 }, { "epoch": 0.77, "learning_rate": 1.8498214505602762e-05, "loss": 0.0144, "step": 31490 }, { "epoch": 0.77, "learning_rate": 1.849772195542421e-05, "loss": 0.0156, "step": 31500 }, { "epoch": 0.77, "learning_rate": 1.849722940524566e-05, "loss": 0.013, "step": 31510 }, { "epoch": 0.77, "learning_rate": 1.8496736855067113e-05, "loss": 0.0195, "step": 31520 }, { "epoch": 0.77, "learning_rate": 1.8496244304888562e-05, "loss": 0.0255, "step": 31530 }, { "epoch": 0.77, "learning_rate": 1.849575175471001e-05, "loss": 0.0133, "step": 31540 }, { "epoch": 0.78, "learning_rate": 1.8495259204531464e-05, "loss": 0.0136, "step": 31550 }, { "epoch": 0.78, "learning_rate": 1.8494766654352913e-05, "loss": 0.0218, "step": 31560 }, { "epoch": 0.78, "learning_rate": 1.8494274104174362e-05, "loss": 0.0118, "step": 31570 }, { "epoch": 0.78, "learning_rate": 1.8493781553995814e-05, "loss": 0.0141, "step": 31580 }, { "epoch": 0.78, "learning_rate": 1.8493289003817264e-05, "loss": 0.0144, "step": 31590 }, { "epoch": 0.78, "learning_rate": 1.8492796453638716e-05, "loss": 0.0125, "step": 31600 }, { "epoch": 0.78, "learning_rate": 1.849230390346017e-05, "loss": 0.0137, "step": 31610 }, { "epoch": 0.78, "learning_rate": 1.8491811353281618e-05, "loss": 0.0238, "step": 31620 }, { "epoch": 0.78, "learning_rate": 1.8491318803103067e-05, "loss": 0.0164, "step": 31630 }, { "epoch": 0.78, "learning_rate": 1.849082625292452e-05, "loss": 0.0137, "step": 31640 }, { "epoch": 0.78, "learning_rate": 1.849033370274597e-05, "loss": 0.0187, "step": 31650 }, { "epoch": 0.78, "learning_rate": 1.848984115256742e-05, "loss": 0.0155, "step": 31660 }, { "epoch": 0.78, "learning_rate": 1.848934860238887e-05, "loss": 0.0206, "step": 31670 }, { "epoch": 0.78, "learning_rate": 1.848885605221032e-05, "loss": 0.0163, "step": 31680 }, { "epoch": 0.78, "learning_rate": 1.8488363502031772e-05, "loss": 0.0153, "step": 31690 }, { "epoch": 0.78, "learning_rate": 1.848787095185322e-05, "loss": 0.0139, "step": 31700 }, { "epoch": 0.78, "learning_rate": 1.848737840167467e-05, "loss": 0.0142, "step": 31710 }, { "epoch": 0.78, "learning_rate": 1.8486885851496123e-05, "loss": 0.0166, "step": 31720 }, { "epoch": 0.78, "learning_rate": 1.8486393301317572e-05, "loss": 0.0222, "step": 31730 }, { "epoch": 0.78, "learning_rate": 1.8485900751139025e-05, "loss": 0.0163, "step": 31740 }, { "epoch": 0.78, "learning_rate": 1.8485408200960474e-05, "loss": 0.0131, "step": 31750 }, { "epoch": 0.78, "learning_rate": 1.8484915650781926e-05, "loss": 0.0144, "step": 31760 }, { "epoch": 0.78, "learning_rate": 1.8484423100603375e-05, "loss": 0.0181, "step": 31770 }, { "epoch": 0.78, "learning_rate": 1.8483930550424828e-05, "loss": 0.015, "step": 31780 }, { "epoch": 0.78, "learning_rate": 1.8483438000246277e-05, "loss": 0.0145, "step": 31790 }, { "epoch": 0.78, "learning_rate": 1.8482945450067726e-05, "loss": 0.0124, "step": 31800 }, { "epoch": 0.78, "learning_rate": 1.848245289988918e-05, "loss": 0.0136, "step": 31810 }, { "epoch": 0.78, "learning_rate": 1.8481960349710628e-05, "loss": 0.0145, "step": 31820 }, { "epoch": 0.78, "learning_rate": 1.8481467799532077e-05, "loss": 0.0146, "step": 31830 }, { "epoch": 0.78, "learning_rate": 1.848097524935353e-05, "loss": 0.0131, "step": 31840 }, { "epoch": 0.78, "learning_rate": 1.848048269917498e-05, "loss": 0.016, "step": 31850 }, { "epoch": 0.78, "learning_rate": 1.8479990148996428e-05, "loss": 0.0212, "step": 31860 }, { "epoch": 0.78, "learning_rate": 1.847949759881788e-05, "loss": 0.0156, "step": 31870 }, { "epoch": 0.78, "learning_rate": 1.8479005048639333e-05, "loss": 0.0139, "step": 31880 }, { "epoch": 0.78, "learning_rate": 1.8478512498460782e-05, "loss": 0.0238, "step": 31890 }, { "epoch": 0.78, "learning_rate": 1.8478019948282235e-05, "loss": 0.0147, "step": 31900 }, { "epoch": 0.78, "learning_rate": 1.8477527398103684e-05, "loss": 0.0209, "step": 31910 }, { "epoch": 0.78, "learning_rate": 1.8477034847925136e-05, "loss": 0.0135, "step": 31920 }, { "epoch": 0.78, "learning_rate": 1.8476542297746585e-05, "loss": 0.0156, "step": 31930 }, { "epoch": 0.78, "learning_rate": 1.8476049747568035e-05, "loss": 0.0166, "step": 31940 }, { "epoch": 0.78, "learning_rate": 1.8475557197389487e-05, "loss": 0.0215, "step": 31950 }, { "epoch": 0.79, "learning_rate": 1.8475064647210936e-05, "loss": 0.0133, "step": 31960 }, { "epoch": 0.79, "learning_rate": 1.8474572097032385e-05, "loss": 0.0118, "step": 31970 }, { "epoch": 0.79, "learning_rate": 1.8474079546853838e-05, "loss": 0.0138, "step": 31980 }, { "epoch": 0.79, "learning_rate": 1.8473586996675287e-05, "loss": 0.0147, "step": 31990 }, { "epoch": 0.79, "learning_rate": 1.8473094446496736e-05, "loss": 0.0155, "step": 32000 }, { "epoch": 0.79, "learning_rate": 1.847260189631819e-05, "loss": 0.0127, "step": 32010 }, { "epoch": 0.79, "learning_rate": 1.8472109346139638e-05, "loss": 0.0137, "step": 32020 }, { "epoch": 0.79, "learning_rate": 1.847161679596109e-05, "loss": 0.0187, "step": 32030 }, { "epoch": 0.79, "learning_rate": 1.847112424578254e-05, "loss": 0.0146, "step": 32040 }, { "epoch": 0.79, "learning_rate": 1.8470631695603992e-05, "loss": 0.0143, "step": 32050 }, { "epoch": 0.79, "learning_rate": 1.847013914542544e-05, "loss": 0.0233, "step": 32060 }, { "epoch": 0.79, "learning_rate": 1.8469646595246894e-05, "loss": 0.0113, "step": 32070 }, { "epoch": 0.79, "learning_rate": 1.8469154045068343e-05, "loss": 0.0123, "step": 32080 }, { "epoch": 0.79, "learning_rate": 1.8468661494889792e-05, "loss": 0.0141, "step": 32090 }, { "epoch": 0.79, "learning_rate": 1.8468168944711245e-05, "loss": 0.0125, "step": 32100 }, { "epoch": 0.79, "learning_rate": 1.8467676394532694e-05, "loss": 0.0155, "step": 32110 }, { "epoch": 0.79, "learning_rate": 1.8467183844354143e-05, "loss": 0.0127, "step": 32120 }, { "epoch": 0.79, "learning_rate": 1.8466691294175596e-05, "loss": 0.0158, "step": 32130 }, { "epoch": 0.79, "learning_rate": 1.8466198743997045e-05, "loss": 0.0238, "step": 32140 }, { "epoch": 0.79, "learning_rate": 1.8465706193818497e-05, "loss": 0.0125, "step": 32150 }, { "epoch": 0.79, "learning_rate": 1.8465213643639946e-05, "loss": 0.0136, "step": 32160 }, { "epoch": 0.79, "learning_rate": 1.84647210934614e-05, "loss": 0.0132, "step": 32170 }, { "epoch": 0.79, "learning_rate": 1.8464228543282848e-05, "loss": 0.0148, "step": 32180 }, { "epoch": 0.79, "learning_rate": 1.84637359931043e-05, "loss": 0.0188, "step": 32190 }, { "epoch": 0.79, "learning_rate": 1.846324344292575e-05, "loss": 0.0161, "step": 32200 }, { "epoch": 0.79, "learning_rate": 1.8462750892747202e-05, "loss": 0.0143, "step": 32210 }, { "epoch": 0.79, "learning_rate": 1.846225834256865e-05, "loss": 0.0167, "step": 32220 }, { "epoch": 0.79, "learning_rate": 1.84617657923901e-05, "loss": 0.0148, "step": 32230 }, { "epoch": 0.79, "learning_rate": 1.8461273242211553e-05, "loss": 0.0213, "step": 32240 }, { "epoch": 0.79, "learning_rate": 1.8460780692033002e-05, "loss": 0.0129, "step": 32250 }, { "epoch": 0.79, "learning_rate": 1.846028814185445e-05, "loss": 0.0148, "step": 32260 }, { "epoch": 0.79, "learning_rate": 1.8459795591675904e-05, "loss": 0.0157, "step": 32270 }, { "epoch": 0.79, "learning_rate": 1.8459303041497353e-05, "loss": 0.0142, "step": 32280 }, { "epoch": 0.79, "learning_rate": 1.8458810491318802e-05, "loss": 0.013, "step": 32290 }, { "epoch": 0.79, "learning_rate": 1.8458317941140255e-05, "loss": 0.0158, "step": 32300 }, { "epoch": 0.79, "learning_rate": 1.8457825390961704e-05, "loss": 0.0199, "step": 32310 }, { "epoch": 0.79, "learning_rate": 1.8457332840783156e-05, "loss": 0.0138, "step": 32320 }, { "epoch": 0.79, "learning_rate": 1.845684029060461e-05, "loss": 0.0277, "step": 32330 }, { "epoch": 0.79, "learning_rate": 1.8456347740426058e-05, "loss": 0.0143, "step": 32340 }, { "epoch": 0.79, "learning_rate": 1.8455855190247507e-05, "loss": 0.0131, "step": 32350 }, { "epoch": 0.79, "learning_rate": 1.845536264006896e-05, "loss": 0.0281, "step": 32360 }, { "epoch": 0.8, "learning_rate": 1.845487008989041e-05, "loss": 0.0205, "step": 32370 }, { "epoch": 0.8, "learning_rate": 1.845437753971186e-05, "loss": 0.0143, "step": 32380 }, { "epoch": 0.8, "learning_rate": 1.845388498953331e-05, "loss": 0.0133, "step": 32390 }, { "epoch": 0.8, "learning_rate": 1.845339243935476e-05, "loss": 0.0148, "step": 32400 }, { "epoch": 0.8, "learning_rate": 1.8452899889176212e-05, "loss": 0.0128, "step": 32410 }, { "epoch": 0.8, "learning_rate": 1.845240733899766e-05, "loss": 0.0174, "step": 32420 }, { "epoch": 0.8, "learning_rate": 1.845191478881911e-05, "loss": 0.0146, "step": 32430 }, { "epoch": 0.8, "learning_rate": 1.8451422238640563e-05, "loss": 0.0154, "step": 32440 }, { "epoch": 0.8, "learning_rate": 1.8450929688462012e-05, "loss": 0.0155, "step": 32450 }, { "epoch": 0.8, "learning_rate": 1.8450437138283465e-05, "loss": 0.0167, "step": 32460 }, { "epoch": 0.8, "learning_rate": 1.8449944588104914e-05, "loss": 0.0144, "step": 32470 }, { "epoch": 0.8, "learning_rate": 1.8449452037926367e-05, "loss": 0.0137, "step": 32480 }, { "epoch": 0.8, "learning_rate": 1.8448959487747816e-05, "loss": 0.0128, "step": 32490 }, { "epoch": 0.8, "learning_rate": 1.8448466937569268e-05, "loss": 0.0156, "step": 32500 }, { "epoch": 0.8, "learning_rate": 1.8447974387390717e-05, "loss": 0.014, "step": 32510 }, { "epoch": 0.8, "learning_rate": 1.8447481837212167e-05, "loss": 0.0129, "step": 32520 }, { "epoch": 0.8, "learning_rate": 1.844698928703362e-05, "loss": 0.0129, "step": 32530 }, { "epoch": 0.8, "learning_rate": 1.8446496736855068e-05, "loss": 0.0224, "step": 32540 }, { "epoch": 0.8, "learning_rate": 1.8446004186676517e-05, "loss": 0.0127, "step": 32550 }, { "epoch": 0.8, "learning_rate": 1.844551163649797e-05, "loss": 0.0154, "step": 32560 }, { "epoch": 0.8, "learning_rate": 1.844501908631942e-05, "loss": 0.0119, "step": 32570 }, { "epoch": 0.8, "learning_rate": 1.8444526536140868e-05, "loss": 0.0133, "step": 32580 }, { "epoch": 0.8, "learning_rate": 1.844403398596232e-05, "loss": 0.0134, "step": 32590 }, { "epoch": 0.8, "learning_rate": 1.8443541435783773e-05, "loss": 0.0191, "step": 32600 }, { "epoch": 0.8, "learning_rate": 1.8443048885605222e-05, "loss": 0.02, "step": 32610 }, { "epoch": 0.8, "learning_rate": 1.8442556335426675e-05, "loss": 0.0139, "step": 32620 }, { "epoch": 0.8, "learning_rate": 1.8442063785248124e-05, "loss": 0.0135, "step": 32630 }, { "epoch": 0.8, "learning_rate": 1.8441571235069577e-05, "loss": 0.0152, "step": 32640 }, { "epoch": 0.8, "learning_rate": 1.8441078684891026e-05, "loss": 0.0169, "step": 32650 }, { "epoch": 0.8, "learning_rate": 1.8440586134712475e-05, "loss": 0.0232, "step": 32660 }, { "epoch": 0.8, "learning_rate": 1.8440093584533928e-05, "loss": 0.0149, "step": 32670 }, { "epoch": 0.8, "learning_rate": 1.8439601034355377e-05, "loss": 0.0143, "step": 32680 }, { "epoch": 0.8, "learning_rate": 1.8439108484176826e-05, "loss": 0.0151, "step": 32690 }, { "epoch": 0.8, "learning_rate": 1.843861593399828e-05, "loss": 0.0157, "step": 32700 }, { "epoch": 0.8, "learning_rate": 1.8438123383819727e-05, "loss": 0.0106, "step": 32710 }, { "epoch": 0.8, "learning_rate": 1.8437630833641177e-05, "loss": 0.0193, "step": 32720 }, { "epoch": 0.8, "learning_rate": 1.843713828346263e-05, "loss": 0.0248, "step": 32730 }, { "epoch": 0.8, "learning_rate": 1.843664573328408e-05, "loss": 0.0148, "step": 32740 }, { "epoch": 0.8, "learning_rate": 1.843615318310553e-05, "loss": 0.0141, "step": 32750 }, { "epoch": 0.8, "learning_rate": 1.843566063292698e-05, "loss": 0.0157, "step": 32760 }, { "epoch": 0.81, "learning_rate": 1.8435168082748433e-05, "loss": 0.0144, "step": 32770 }, { "epoch": 0.81, "learning_rate": 1.8434675532569882e-05, "loss": 0.012, "step": 32780 }, { "epoch": 0.81, "learning_rate": 1.8434182982391334e-05, "loss": 0.0215, "step": 32790 }, { "epoch": 0.81, "learning_rate": 1.8433690432212783e-05, "loss": 0.0135, "step": 32800 }, { "epoch": 0.81, "learning_rate": 1.8433197882034233e-05, "loss": 0.013, "step": 32810 }, { "epoch": 0.81, "learning_rate": 1.8432705331855685e-05, "loss": 0.0165, "step": 32820 }, { "epoch": 0.81, "learning_rate": 1.8432212781677134e-05, "loss": 0.0189, "step": 32830 }, { "epoch": 0.81, "learning_rate": 1.8431720231498583e-05, "loss": 0.0128, "step": 32840 }, { "epoch": 0.81, "learning_rate": 1.8431227681320036e-05, "loss": 0.0235, "step": 32850 }, { "epoch": 0.81, "learning_rate": 1.8430735131141485e-05, "loss": 0.0154, "step": 32860 }, { "epoch": 0.81, "learning_rate": 1.8430242580962938e-05, "loss": 0.0204, "step": 32870 }, { "epoch": 0.81, "learning_rate": 1.8429750030784387e-05, "loss": 0.0161, "step": 32880 }, { "epoch": 0.81, "learning_rate": 1.842925748060584e-05, "loss": 0.0137, "step": 32890 }, { "epoch": 0.81, "learning_rate": 1.842876493042729e-05, "loss": 0.0156, "step": 32900 }, { "epoch": 0.81, "learning_rate": 1.842827238024874e-05, "loss": 0.0271, "step": 32910 }, { "epoch": 0.81, "learning_rate": 1.842777983007019e-05, "loss": 0.0143, "step": 32920 }, { "epoch": 0.81, "learning_rate": 1.8427287279891643e-05, "loss": 0.0222, "step": 32930 }, { "epoch": 0.81, "learning_rate": 1.8426794729713092e-05, "loss": 0.0231, "step": 32940 }, { "epoch": 0.81, "learning_rate": 1.842630217953454e-05, "loss": 0.0133, "step": 32950 }, { "epoch": 0.81, "learning_rate": 1.8425809629355993e-05, "loss": 0.0144, "step": 32960 }, { "epoch": 0.81, "learning_rate": 1.8425317079177443e-05, "loss": 0.0218, "step": 32970 }, { "epoch": 0.81, "learning_rate": 1.8424824528998892e-05, "loss": 0.0144, "step": 32980 }, { "epoch": 0.81, "learning_rate": 1.8424331978820344e-05, "loss": 0.0134, "step": 32990 }, { "epoch": 0.81, "learning_rate": 1.8423839428641793e-05, "loss": 0.0128, "step": 33000 }, { "epoch": 0.81, "learning_rate": 1.8423346878463243e-05, "loss": 0.0137, "step": 33010 }, { "epoch": 0.81, "learning_rate": 1.8422854328284695e-05, "loss": 0.0143, "step": 33020 }, { "epoch": 0.81, "learning_rate": 1.8422361778106144e-05, "loss": 0.02, "step": 33030 }, { "epoch": 0.81, "learning_rate": 1.8421869227927597e-05, "loss": 0.0174, "step": 33040 }, { "epoch": 0.81, "learning_rate": 1.842137667774905e-05, "loss": 0.0147, "step": 33050 }, { "epoch": 0.81, "learning_rate": 1.84208841275705e-05, "loss": 0.0154, "step": 33060 }, { "epoch": 0.81, "learning_rate": 1.8420391577391948e-05, "loss": 0.0126, "step": 33070 }, { "epoch": 0.81, "learning_rate": 1.84198990272134e-05, "loss": 0.0134, "step": 33080 }, { "epoch": 0.81, "learning_rate": 1.841940647703485e-05, "loss": 0.0174, "step": 33090 }, { "epoch": 0.81, "learning_rate": 1.8418913926856302e-05, "loss": 0.0168, "step": 33100 }, { "epoch": 0.81, "learning_rate": 1.841842137667775e-05, "loss": 0.0214, "step": 33110 }, { "epoch": 0.81, "learning_rate": 1.84179288264992e-05, "loss": 0.014, "step": 33120 }, { "epoch": 0.81, "learning_rate": 1.8417436276320653e-05, "loss": 0.0138, "step": 33130 }, { "epoch": 0.81, "learning_rate": 1.8416943726142102e-05, "loss": 0.0141, "step": 33140 }, { "epoch": 0.81, "learning_rate": 1.841645117596355e-05, "loss": 0.0273, "step": 33150 }, { "epoch": 0.81, "learning_rate": 1.8415958625785004e-05, "loss": 0.0137, "step": 33160 }, { "epoch": 0.81, "learning_rate": 1.8415466075606453e-05, "loss": 0.0146, "step": 33170 }, { "epoch": 0.82, "learning_rate": 1.8414973525427905e-05, "loss": 0.014, "step": 33180 }, { "epoch": 0.82, "learning_rate": 1.8414480975249354e-05, "loss": 0.014, "step": 33190 }, { "epoch": 0.82, "learning_rate": 1.8413988425070807e-05, "loss": 0.014, "step": 33200 }, { "epoch": 0.82, "learning_rate": 1.8413495874892256e-05, "loss": 0.0142, "step": 33210 }, { "epoch": 0.82, "learning_rate": 1.841300332471371e-05, "loss": 0.0139, "step": 33220 }, { "epoch": 0.82, "learning_rate": 1.8412510774535158e-05, "loss": 0.0146, "step": 33230 }, { "epoch": 0.82, "learning_rate": 1.8412018224356607e-05, "loss": 0.0158, "step": 33240 }, { "epoch": 0.82, "learning_rate": 1.841152567417806e-05, "loss": 0.025, "step": 33250 }, { "epoch": 0.82, "learning_rate": 1.841103312399951e-05, "loss": 0.0125, "step": 33260 }, { "epoch": 0.82, "learning_rate": 1.8410540573820958e-05, "loss": 0.0145, "step": 33270 }, { "epoch": 0.82, "learning_rate": 1.841004802364241e-05, "loss": 0.015, "step": 33280 }, { "epoch": 0.82, "learning_rate": 1.840955547346386e-05, "loss": 0.0131, "step": 33290 }, { "epoch": 0.82, "learning_rate": 1.840906292328531e-05, "loss": 0.0139, "step": 33300 }, { "epoch": 0.82, "learning_rate": 1.840857037310676e-05, "loss": 0.0188, "step": 33310 }, { "epoch": 0.82, "learning_rate": 1.8408077822928214e-05, "loss": 0.0292, "step": 33320 }, { "epoch": 0.82, "learning_rate": 1.8407585272749663e-05, "loss": 0.027, "step": 33330 }, { "epoch": 0.82, "learning_rate": 1.8407092722571115e-05, "loss": 0.0177, "step": 33340 }, { "epoch": 0.82, "learning_rate": 1.8406600172392564e-05, "loss": 0.0148, "step": 33350 }, { "epoch": 0.82, "learning_rate": 1.8406107622214017e-05, "loss": 0.0134, "step": 33360 }, { "epoch": 0.82, "learning_rate": 1.8405615072035466e-05, "loss": 0.0186, "step": 33370 }, { "epoch": 0.82, "learning_rate": 1.8405122521856915e-05, "loss": 0.0139, "step": 33380 }, { "epoch": 0.82, "learning_rate": 1.8404629971678368e-05, "loss": 0.0122, "step": 33390 }, { "epoch": 0.82, "learning_rate": 1.8404137421499817e-05, "loss": 0.0268, "step": 33400 }, { "epoch": 0.82, "learning_rate": 1.8403644871321266e-05, "loss": 0.015, "step": 33410 }, { "epoch": 0.82, "learning_rate": 1.840315232114272e-05, "loss": 0.0216, "step": 33420 }, { "epoch": 0.82, "learning_rate": 1.8402659770964168e-05, "loss": 0.0155, "step": 33430 }, { "epoch": 0.82, "learning_rate": 1.8402167220785617e-05, "loss": 0.0161, "step": 33440 }, { "epoch": 0.82, "learning_rate": 1.840167467060707e-05, "loss": 0.0223, "step": 33450 }, { "epoch": 0.82, "learning_rate": 1.840118212042852e-05, "loss": 0.0153, "step": 33460 }, { "epoch": 0.82, "learning_rate": 1.840068957024997e-05, "loss": 0.013, "step": 33470 }, { "epoch": 0.82, "learning_rate": 1.840019702007142e-05, "loss": 0.0219, "step": 33480 }, { "epoch": 0.82, "learning_rate": 1.8399704469892873e-05, "loss": 0.0138, "step": 33490 }, { "epoch": 0.82, "learning_rate": 1.8399211919714322e-05, "loss": 0.0186, "step": 33500 }, { "epoch": 0.82, "learning_rate": 1.8398719369535775e-05, "loss": 0.0152, "step": 33510 }, { "epoch": 0.82, "learning_rate": 1.8398226819357224e-05, "loss": 0.0148, "step": 33520 }, { "epoch": 0.82, "learning_rate": 1.8397734269178673e-05, "loss": 0.0134, "step": 33530 }, { "epoch": 0.82, "learning_rate": 1.8397241719000125e-05, "loss": 0.0126, "step": 33540 }, { "epoch": 0.82, "learning_rate": 1.8396749168821575e-05, "loss": 0.02, "step": 33550 }, { "epoch": 0.82, "learning_rate": 1.8396256618643024e-05, "loss": 0.0179, "step": 33560 }, { "epoch": 0.82, "learning_rate": 1.8395764068464476e-05, "loss": 0.0105, "step": 33570 }, { "epoch": 0.82, "learning_rate": 1.8395271518285925e-05, "loss": 0.0263, "step": 33580 }, { "epoch": 0.83, "learning_rate": 1.8394778968107378e-05, "loss": 0.0123, "step": 33590 }, { "epoch": 0.83, "learning_rate": 1.8394286417928827e-05, "loss": 0.0154, "step": 33600 }, { "epoch": 0.83, "learning_rate": 1.839379386775028e-05, "loss": 0.0181, "step": 33610 }, { "epoch": 0.83, "learning_rate": 1.839330131757173e-05, "loss": 0.0131, "step": 33620 }, { "epoch": 0.83, "learning_rate": 1.839280876739318e-05, "loss": 0.0195, "step": 33630 }, { "epoch": 0.83, "learning_rate": 1.839231621721463e-05, "loss": 0.0254, "step": 33640 }, { "epoch": 0.83, "learning_rate": 1.8391823667036083e-05, "loss": 0.0127, "step": 33650 }, { "epoch": 0.83, "learning_rate": 1.8391331116857532e-05, "loss": 0.0176, "step": 33660 }, { "epoch": 0.83, "learning_rate": 1.839083856667898e-05, "loss": 0.0159, "step": 33670 }, { "epoch": 0.83, "learning_rate": 1.8390346016500434e-05, "loss": 0.0161, "step": 33680 }, { "epoch": 0.83, "learning_rate": 1.8389853466321883e-05, "loss": 0.0168, "step": 33690 }, { "epoch": 0.83, "learning_rate": 1.8389360916143332e-05, "loss": 0.0122, "step": 33700 }, { "epoch": 0.83, "learning_rate": 1.8388868365964785e-05, "loss": 0.0138, "step": 33710 }, { "epoch": 0.83, "learning_rate": 1.8388375815786234e-05, "loss": 0.035, "step": 33720 }, { "epoch": 0.83, "learning_rate": 1.8387883265607683e-05, "loss": 0.0248, "step": 33730 }, { "epoch": 0.83, "learning_rate": 1.8387390715429135e-05, "loss": 0.0136, "step": 33740 }, { "epoch": 0.83, "learning_rate": 1.8386898165250585e-05, "loss": 0.014, "step": 33750 }, { "epoch": 0.83, "learning_rate": 1.8386405615072037e-05, "loss": 0.0151, "step": 33760 }, { "epoch": 0.83, "learning_rate": 1.838591306489349e-05, "loss": 0.0162, "step": 33770 }, { "epoch": 0.83, "learning_rate": 1.838542051471494e-05, "loss": 0.0138, "step": 33780 }, { "epoch": 0.83, "learning_rate": 1.8384927964536388e-05, "loss": 0.0145, "step": 33790 }, { "epoch": 0.83, "learning_rate": 1.838443541435784e-05, "loss": 0.0146, "step": 33800 }, { "epoch": 0.83, "learning_rate": 1.838394286417929e-05, "loss": 0.0122, "step": 33810 }, { "epoch": 0.83, "learning_rate": 1.8383450314000742e-05, "loss": 0.0243, "step": 33820 }, { "epoch": 0.83, "learning_rate": 1.838295776382219e-05, "loss": 0.0203, "step": 33830 }, { "epoch": 0.83, "learning_rate": 1.838246521364364e-05, "loss": 0.0164, "step": 33840 }, { "epoch": 0.83, "learning_rate": 1.8381972663465093e-05, "loss": 0.013, "step": 33850 }, { "epoch": 0.83, "learning_rate": 1.8381480113286542e-05, "loss": 0.0151, "step": 33860 }, { "epoch": 0.83, "learning_rate": 1.838098756310799e-05, "loss": 0.0142, "step": 33870 }, { "epoch": 0.83, "learning_rate": 1.8380495012929444e-05, "loss": 0.0169, "step": 33880 }, { "epoch": 0.83, "learning_rate": 1.8380002462750893e-05, "loss": 0.0165, "step": 33890 }, { "epoch": 0.83, "learning_rate": 1.8379509912572346e-05, "loss": 0.0122, "step": 33900 }, { "epoch": 0.83, "learning_rate": 1.8379017362393795e-05, "loss": 0.0211, "step": 33910 }, { "epoch": 0.83, "learning_rate": 1.8378524812215247e-05, "loss": 0.0142, "step": 33920 }, { "epoch": 0.83, "learning_rate": 1.8378032262036696e-05, "loss": 0.0142, "step": 33930 }, { "epoch": 0.83, "learning_rate": 1.837753971185815e-05, "loss": 0.0237, "step": 33940 }, { "epoch": 0.83, "learning_rate": 1.8377047161679598e-05, "loss": 0.0136, "step": 33950 }, { "epoch": 0.83, "learning_rate": 1.8376554611501047e-05, "loss": 0.0134, "step": 33960 }, { "epoch": 0.83, "learning_rate": 1.83760620613225e-05, "loss": 0.0232, "step": 33970 }, { "epoch": 0.83, "learning_rate": 1.837556951114395e-05, "loss": 0.018, "step": 33980 }, { "epoch": 0.84, "learning_rate": 1.8375076960965398e-05, "loss": 0.014, "step": 33990 }, { "epoch": 0.84, "learning_rate": 1.837458441078685e-05, "loss": 0.0117, "step": 34000 }, { "epoch": 0.84, "learning_rate": 1.83740918606083e-05, "loss": 0.0133, "step": 34010 }, { "epoch": 0.84, "learning_rate": 1.837359931042975e-05, "loss": 0.0164, "step": 34020 }, { "epoch": 0.84, "learning_rate": 1.83731067602512e-05, "loss": 0.0128, "step": 34030 }, { "epoch": 0.84, "learning_rate": 1.8372614210072654e-05, "loss": 0.0131, "step": 34040 }, { "epoch": 0.84, "learning_rate": 1.8372121659894103e-05, "loss": 0.016, "step": 34050 }, { "epoch": 0.84, "learning_rate": 1.8371629109715556e-05, "loss": 0.0139, "step": 34060 }, { "epoch": 0.84, "learning_rate": 1.8371136559537005e-05, "loss": 0.0176, "step": 34070 }, { "epoch": 0.84, "learning_rate": 1.8370644009358457e-05, "loss": 0.0135, "step": 34080 }, { "epoch": 0.84, "learning_rate": 1.8370151459179907e-05, "loss": 0.0223, "step": 34090 }, { "epoch": 0.84, "learning_rate": 1.8369658909001356e-05, "loss": 0.018, "step": 34100 }, { "epoch": 0.84, "learning_rate": 1.8369166358822808e-05, "loss": 0.0147, "step": 34110 }, { "epoch": 0.84, "learning_rate": 1.8368673808644257e-05, "loss": 0.0146, "step": 34120 }, { "epoch": 0.84, "learning_rate": 1.8368181258465706e-05, "loss": 0.0137, "step": 34130 }, { "epoch": 0.84, "learning_rate": 1.836768870828716e-05, "loss": 0.0141, "step": 34140 }, { "epoch": 0.84, "learning_rate": 1.8367196158108608e-05, "loss": 0.0152, "step": 34150 }, { "epoch": 0.84, "learning_rate": 1.8366703607930057e-05, "loss": 0.0127, "step": 34160 }, { "epoch": 0.84, "learning_rate": 1.836621105775151e-05, "loss": 0.0142, "step": 34170 }, { "epoch": 0.84, "learning_rate": 1.836571850757296e-05, "loss": 0.0165, "step": 34180 }, { "epoch": 0.84, "learning_rate": 1.836522595739441e-05, "loss": 0.0137, "step": 34190 }, { "epoch": 0.84, "learning_rate": 1.836473340721586e-05, "loss": 0.0143, "step": 34200 }, { "epoch": 0.84, "learning_rate": 1.8364240857037313e-05, "loss": 0.0139, "step": 34210 }, { "epoch": 0.84, "learning_rate": 1.8363748306858762e-05, "loss": 0.0161, "step": 34220 }, { "epoch": 0.84, "learning_rate": 1.8363255756680215e-05, "loss": 0.0142, "step": 34230 }, { "epoch": 0.84, "learning_rate": 1.8362763206501664e-05, "loss": 0.0128, "step": 34240 }, { "epoch": 0.84, "learning_rate": 1.8362270656323113e-05, "loss": 0.0132, "step": 34250 }, { "epoch": 0.84, "learning_rate": 1.8361778106144566e-05, "loss": 0.0163, "step": 34260 }, { "epoch": 0.84, "learning_rate": 1.8361285555966015e-05, "loss": 0.0201, "step": 34270 }, { "epoch": 0.84, "learning_rate": 1.8360793005787464e-05, "loss": 0.0136, "step": 34280 }, { "epoch": 0.84, "learning_rate": 1.8360300455608917e-05, "loss": 0.0163, "step": 34290 }, { "epoch": 0.84, "learning_rate": 1.8359807905430366e-05, "loss": 0.0266, "step": 34300 }, { "epoch": 0.84, "learning_rate": 1.8359315355251818e-05, "loss": 0.0134, "step": 34310 }, { "epoch": 0.84, "learning_rate": 1.8358822805073267e-05, "loss": 0.0181, "step": 34320 }, { "epoch": 0.84, "learning_rate": 1.835833025489472e-05, "loss": 0.0206, "step": 34330 }, { "epoch": 0.84, "learning_rate": 1.835783770471617e-05, "loss": 0.0225, "step": 34340 }, { "epoch": 0.84, "learning_rate": 1.835734515453762e-05, "loss": 0.0152, "step": 34350 }, { "epoch": 0.84, "learning_rate": 1.835685260435907e-05, "loss": 0.0138, "step": 34360 }, { "epoch": 0.84, "learning_rate": 1.8356360054180523e-05, "loss": 0.0269, "step": 34370 }, { "epoch": 0.84, "learning_rate": 1.8355867504001972e-05, "loss": 0.0145, "step": 34380 }, { "epoch": 0.84, "learning_rate": 1.835537495382342e-05, "loss": 0.0152, "step": 34390 }, { "epoch": 0.85, "learning_rate": 1.8354882403644874e-05, "loss": 0.0125, "step": 34400 }, { "epoch": 0.85, "learning_rate": 1.8354389853466323e-05, "loss": 0.0142, "step": 34410 }, { "epoch": 0.85, "learning_rate": 1.8353897303287772e-05, "loss": 0.014, "step": 34420 }, { "epoch": 0.85, "learning_rate": 1.8353404753109225e-05, "loss": 0.0202, "step": 34430 }, { "epoch": 0.85, "learning_rate": 1.8352912202930674e-05, "loss": 0.0235, "step": 34440 }, { "epoch": 0.85, "learning_rate": 1.8352419652752123e-05, "loss": 0.0277, "step": 34450 }, { "epoch": 0.85, "learning_rate": 1.8351927102573576e-05, "loss": 0.0145, "step": 34460 }, { "epoch": 0.85, "learning_rate": 1.8351434552395025e-05, "loss": 0.0138, "step": 34470 }, { "epoch": 0.85, "learning_rate": 1.8350942002216478e-05, "loss": 0.014, "step": 34480 }, { "epoch": 0.85, "learning_rate": 1.835044945203793e-05, "loss": 0.0233, "step": 34490 }, { "epoch": 0.85, "learning_rate": 1.834995690185938e-05, "loss": 0.0216, "step": 34500 }, { "epoch": 0.85, "learning_rate": 1.834946435168083e-05, "loss": 0.0133, "step": 34510 }, { "epoch": 0.85, "learning_rate": 1.834897180150228e-05, "loss": 0.0263, "step": 34520 }, { "epoch": 0.85, "learning_rate": 1.834847925132373e-05, "loss": 0.0194, "step": 34530 }, { "epoch": 0.85, "learning_rate": 1.8347986701145183e-05, "loss": 0.0142, "step": 34540 }, { "epoch": 0.85, "learning_rate": 1.8347494150966632e-05, "loss": 0.0154, "step": 34550 }, { "epoch": 0.85, "learning_rate": 1.834700160078808e-05, "loss": 0.0175, "step": 34560 }, { "epoch": 0.85, "learning_rate": 1.8346509050609533e-05, "loss": 0.0135, "step": 34570 }, { "epoch": 0.85, "learning_rate": 1.8346016500430983e-05, "loss": 0.0137, "step": 34580 }, { "epoch": 0.85, "learning_rate": 1.8345523950252432e-05, "loss": 0.0307, "step": 34590 }, { "epoch": 0.85, "learning_rate": 1.8345031400073884e-05, "loss": 0.016, "step": 34600 }, { "epoch": 0.85, "learning_rate": 1.8344538849895333e-05, "loss": 0.0149, "step": 34610 }, { "epoch": 0.85, "learning_rate": 1.8344046299716786e-05, "loss": 0.0137, "step": 34620 }, { "epoch": 0.85, "learning_rate": 1.8343553749538235e-05, "loss": 0.0129, "step": 34630 }, { "epoch": 0.85, "learning_rate": 1.8343061199359688e-05, "loss": 0.0139, "step": 34640 }, { "epoch": 0.85, "learning_rate": 1.8342568649181137e-05, "loss": 0.012, "step": 34650 }, { "epoch": 0.85, "learning_rate": 1.834207609900259e-05, "loss": 0.0169, "step": 34660 }, { "epoch": 0.85, "learning_rate": 1.834158354882404e-05, "loss": 0.0136, "step": 34670 }, { "epoch": 0.85, "learning_rate": 1.8341090998645488e-05, "loss": 0.0199, "step": 34680 }, { "epoch": 0.85, "learning_rate": 1.834059844846694e-05, "loss": 0.0166, "step": 34690 }, { "epoch": 0.85, "learning_rate": 1.834010589828839e-05, "loss": 0.0142, "step": 34700 }, { "epoch": 0.85, "learning_rate": 1.833961334810984e-05, "loss": 0.0135, "step": 34710 }, { "epoch": 0.85, "learning_rate": 1.833912079793129e-05, "loss": 0.0134, "step": 34720 }, { "epoch": 0.85, "learning_rate": 1.833862824775274e-05, "loss": 0.0142, "step": 34730 }, { "epoch": 0.85, "learning_rate": 1.833813569757419e-05, "loss": 0.0117, "step": 34740 }, { "epoch": 0.85, "learning_rate": 1.8337643147395642e-05, "loss": 0.0133, "step": 34750 }, { "epoch": 0.85, "learning_rate": 1.8337150597217094e-05, "loss": 0.0176, "step": 34760 }, { "epoch": 0.85, "learning_rate": 1.8336658047038543e-05, "loss": 0.0154, "step": 34770 }, { "epoch": 0.85, "learning_rate": 1.8336165496859996e-05, "loss": 0.0142, "step": 34780 }, { "epoch": 0.85, "learning_rate": 1.8335672946681445e-05, "loss": 0.0168, "step": 34790 }, { "epoch": 0.85, "learning_rate": 1.8335180396502898e-05, "loss": 0.0168, "step": 34800 }, { "epoch": 0.86, "learning_rate": 1.8334687846324347e-05, "loss": 0.0132, "step": 34810 }, { "epoch": 0.86, "learning_rate": 1.8334195296145796e-05, "loss": 0.0123, "step": 34820 }, { "epoch": 0.86, "learning_rate": 1.833370274596725e-05, "loss": 0.0132, "step": 34830 }, { "epoch": 0.86, "learning_rate": 1.8333210195788698e-05, "loss": 0.0142, "step": 34840 }, { "epoch": 0.86, "learning_rate": 1.8332717645610147e-05, "loss": 0.0193, "step": 34850 }, { "epoch": 0.86, "learning_rate": 1.83322250954316e-05, "loss": 0.0144, "step": 34860 }, { "epoch": 0.86, "learning_rate": 1.833173254525305e-05, "loss": 0.0139, "step": 34870 }, { "epoch": 0.86, "learning_rate": 1.8331239995074498e-05, "loss": 0.0186, "step": 34880 }, { "epoch": 0.86, "learning_rate": 1.833074744489595e-05, "loss": 0.0158, "step": 34890 }, { "epoch": 0.86, "learning_rate": 1.83302548947174e-05, "loss": 0.0139, "step": 34900 }, { "epoch": 0.86, "learning_rate": 1.8329762344538852e-05, "loss": 0.0147, "step": 34910 }, { "epoch": 0.86, "learning_rate": 1.83292697943603e-05, "loss": 0.0121, "step": 34920 }, { "epoch": 0.86, "learning_rate": 1.8328777244181754e-05, "loss": 0.0141, "step": 34930 }, { "epoch": 0.86, "learning_rate": 1.8328284694003203e-05, "loss": 0.0234, "step": 34940 }, { "epoch": 0.86, "learning_rate": 1.8327792143824655e-05, "loss": 0.0148, "step": 34950 }, { "epoch": 0.86, "learning_rate": 1.8327299593646104e-05, "loss": 0.0159, "step": 34960 }, { "epoch": 0.86, "learning_rate": 1.8326807043467554e-05, "loss": 0.0118, "step": 34970 }, { "epoch": 0.86, "learning_rate": 1.8326314493289006e-05, "loss": 0.0234, "step": 34980 }, { "epoch": 0.86, "learning_rate": 1.8325821943110455e-05, "loss": 0.0217, "step": 34990 }, { "epoch": 0.86, "learning_rate": 1.8325329392931904e-05, "loss": 0.0131, "step": 35000 }, { "epoch": 0.86, "eval_bleu": 97.663, "eval_gen_len_ratio": 1.0019, "eval_loss": 0.010191327892243862, "eval_runtime": 255.1843, "eval_samples_per_second": 7.312, "eval_steps_per_second": 0.231, "step": 35000 }, { "epoch": 0.86, "learning_rate": 1.8324836842753357e-05, "loss": 0.0128, "step": 35010 }, { "epoch": 0.86, "learning_rate": 1.8324344292574806e-05, "loss": 0.018, "step": 35020 }, { "epoch": 0.86, "learning_rate": 1.832385174239626e-05, "loss": 0.0157, "step": 35030 }, { "epoch": 0.86, "learning_rate": 1.8323359192217708e-05, "loss": 0.0127, "step": 35040 }, { "epoch": 0.86, "learning_rate": 1.832286664203916e-05, "loss": 0.0143, "step": 35050 }, { "epoch": 0.86, "learning_rate": 1.832237409186061e-05, "loss": 0.013, "step": 35060 }, { "epoch": 0.86, "learning_rate": 1.8321881541682062e-05, "loss": 0.0191, "step": 35070 }, { "epoch": 0.86, "learning_rate": 1.832138899150351e-05, "loss": 0.0131, "step": 35080 }, { "epoch": 0.86, "learning_rate": 1.8320896441324964e-05, "loss": 0.0137, "step": 35090 }, { "epoch": 0.86, "learning_rate": 1.8320403891146413e-05, "loss": 0.0138, "step": 35100 }, { "epoch": 0.86, "learning_rate": 1.8319911340967862e-05, "loss": 0.0119, "step": 35110 }, { "epoch": 0.86, "learning_rate": 1.8319418790789315e-05, "loss": 0.013, "step": 35120 }, { "epoch": 0.86, "learning_rate": 1.8318926240610764e-05, "loss": 0.023, "step": 35130 }, { "epoch": 0.86, "learning_rate": 1.8318433690432213e-05, "loss": 0.0118, "step": 35140 }, { "epoch": 0.86, "learning_rate": 1.8317941140253665e-05, "loss": 0.0154, "step": 35150 }, { "epoch": 0.86, "learning_rate": 1.8317448590075114e-05, "loss": 0.0169, "step": 35160 }, { "epoch": 0.86, "learning_rate": 1.8316956039896564e-05, "loss": 0.0162, "step": 35170 }, { "epoch": 0.86, "learning_rate": 1.8316463489718016e-05, "loss": 0.0139, "step": 35180 }, { "epoch": 0.86, "learning_rate": 1.8315970939539465e-05, "loss": 0.0166, "step": 35190 }, { "epoch": 0.86, "learning_rate": 1.8315478389360918e-05, "loss": 0.0162, "step": 35200 }, { "epoch": 0.86, "learning_rate": 1.831498583918237e-05, "loss": 0.0158, "step": 35210 }, { "epoch": 0.87, "learning_rate": 1.831449328900382e-05, "loss": 0.0154, "step": 35220 }, { "epoch": 0.87, "learning_rate": 1.831400073882527e-05, "loss": 0.0136, "step": 35230 }, { "epoch": 0.87, "learning_rate": 1.831350818864672e-05, "loss": 0.0118, "step": 35240 }, { "epoch": 0.87, "learning_rate": 1.831301563846817e-05, "loss": 0.0213, "step": 35250 }, { "epoch": 0.87, "learning_rate": 1.8312523088289623e-05, "loss": 0.0141, "step": 35260 }, { "epoch": 0.87, "learning_rate": 1.8312030538111072e-05, "loss": 0.0151, "step": 35270 }, { "epoch": 0.87, "learning_rate": 1.831153798793252e-05, "loss": 0.0133, "step": 35280 }, { "epoch": 0.87, "learning_rate": 1.8311045437753974e-05, "loss": 0.0144, "step": 35290 }, { "epoch": 0.87, "learning_rate": 1.8310552887575423e-05, "loss": 0.018, "step": 35300 }, { "epoch": 0.87, "learning_rate": 1.8310060337396872e-05, "loss": 0.0135, "step": 35310 }, { "epoch": 0.87, "learning_rate": 1.8309567787218325e-05, "loss": 0.0136, "step": 35320 }, { "epoch": 0.87, "learning_rate": 1.8309075237039774e-05, "loss": 0.0139, "step": 35330 }, { "epoch": 0.87, "learning_rate": 1.8308582686861226e-05, "loss": 0.0216, "step": 35340 }, { "epoch": 0.87, "learning_rate": 1.8308090136682675e-05, "loss": 0.0133, "step": 35350 }, { "epoch": 0.87, "learning_rate": 1.8307597586504128e-05, "loss": 0.0147, "step": 35360 }, { "epoch": 0.87, "learning_rate": 1.8307105036325577e-05, "loss": 0.0203, "step": 35370 }, { "epoch": 0.87, "learning_rate": 1.830661248614703e-05, "loss": 0.0197, "step": 35380 }, { "epoch": 0.87, "learning_rate": 1.830611993596848e-05, "loss": 0.0122, "step": 35390 }, { "epoch": 0.87, "learning_rate": 1.8305627385789928e-05, "loss": 0.0233, "step": 35400 }, { "epoch": 0.87, "learning_rate": 1.830513483561138e-05, "loss": 0.0208, "step": 35410 }, { "epoch": 0.87, "learning_rate": 1.830464228543283e-05, "loss": 0.0106, "step": 35420 }, { "epoch": 0.87, "learning_rate": 1.830414973525428e-05, "loss": 0.0187, "step": 35430 }, { "epoch": 0.87, "learning_rate": 1.830365718507573e-05, "loss": 0.0138, "step": 35440 }, { "epoch": 0.87, "learning_rate": 1.830316463489718e-05, "loss": 0.0165, "step": 35450 }, { "epoch": 0.87, "learning_rate": 1.830267208471863e-05, "loss": 0.0157, "step": 35460 }, { "epoch": 0.87, "learning_rate": 1.8302179534540082e-05, "loss": 0.0243, "step": 35470 }, { "epoch": 0.87, "learning_rate": 1.8301686984361535e-05, "loss": 0.0136, "step": 35480 }, { "epoch": 0.87, "learning_rate": 1.8301194434182984e-05, "loss": 0.0145, "step": 35490 }, { "epoch": 0.87, "learning_rate": 1.8300701884004436e-05, "loss": 0.0156, "step": 35500 }, { "epoch": 0.87, "learning_rate": 1.8300209333825886e-05, "loss": 0.013, "step": 35510 }, { "epoch": 0.87, "learning_rate": 1.8299716783647338e-05, "loss": 0.0146, "step": 35520 }, { "epoch": 0.87, "learning_rate": 1.8299224233468787e-05, "loss": 0.0139, "step": 35530 }, { "epoch": 0.87, "learning_rate": 1.8298731683290236e-05, "loss": 0.0125, "step": 35540 }, { "epoch": 0.87, "learning_rate": 1.829823913311169e-05, "loss": 0.0148, "step": 35550 }, { "epoch": 0.87, "learning_rate": 1.8297746582933138e-05, "loss": 0.0268, "step": 35560 }, { "epoch": 0.87, "learning_rate": 1.8297254032754587e-05, "loss": 0.0172, "step": 35570 }, { "epoch": 0.87, "learning_rate": 1.829676148257604e-05, "loss": 0.0141, "step": 35580 }, { "epoch": 0.87, "learning_rate": 1.829626893239749e-05, "loss": 0.0259, "step": 35590 }, { "epoch": 0.87, "learning_rate": 1.8295776382218938e-05, "loss": 0.0126, "step": 35600 }, { "epoch": 0.87, "learning_rate": 1.829528383204039e-05, "loss": 0.0138, "step": 35610 }, { "epoch": 0.88, "learning_rate": 1.829479128186184e-05, "loss": 0.0147, "step": 35620 }, { "epoch": 0.88, "learning_rate": 1.8294298731683292e-05, "loss": 0.0151, "step": 35630 }, { "epoch": 0.88, "learning_rate": 1.829380618150474e-05, "loss": 0.0155, "step": 35640 }, { "epoch": 0.88, "learning_rate": 1.8293313631326194e-05, "loss": 0.0233, "step": 35650 }, { "epoch": 0.88, "learning_rate": 1.8292821081147643e-05, "loss": 0.0136, "step": 35660 }, { "epoch": 0.88, "learning_rate": 1.8292328530969096e-05, "loss": 0.0163, "step": 35670 }, { "epoch": 0.88, "learning_rate": 1.8291835980790545e-05, "loss": 0.0123, "step": 35680 }, { "epoch": 0.88, "learning_rate": 1.8291343430611994e-05, "loss": 0.0208, "step": 35690 }, { "epoch": 0.88, "learning_rate": 1.8290850880433446e-05, "loss": 0.013, "step": 35700 }, { "epoch": 0.88, "learning_rate": 1.8290358330254896e-05, "loss": 0.0137, "step": 35710 }, { "epoch": 0.88, "learning_rate": 1.8289865780076345e-05, "loss": 0.0124, "step": 35720 }, { "epoch": 0.88, "learning_rate": 1.8289373229897797e-05, "loss": 0.0154, "step": 35730 }, { "epoch": 0.88, "learning_rate": 1.8288880679719246e-05, "loss": 0.0171, "step": 35740 }, { "epoch": 0.88, "learning_rate": 1.82883881295407e-05, "loss": 0.0142, "step": 35750 }, { "epoch": 0.88, "learning_rate": 1.8287895579362148e-05, "loss": 0.0121, "step": 35760 }, { "epoch": 0.88, "learning_rate": 1.82874030291836e-05, "loss": 0.0134, "step": 35770 }, { "epoch": 0.88, "learning_rate": 1.828691047900505e-05, "loss": 0.02, "step": 35780 }, { "epoch": 0.88, "learning_rate": 1.8286417928826502e-05, "loss": 0.0146, "step": 35790 }, { "epoch": 0.88, "learning_rate": 1.828592537864795e-05, "loss": 0.016, "step": 35800 }, { "epoch": 0.88, "learning_rate": 1.8285432828469404e-05, "loss": 0.0165, "step": 35810 }, { "epoch": 0.88, "learning_rate": 1.8284940278290853e-05, "loss": 0.0163, "step": 35820 }, { "epoch": 0.88, "learning_rate": 1.8284447728112302e-05, "loss": 0.0188, "step": 35830 }, { "epoch": 0.88, "learning_rate": 1.8283955177933755e-05, "loss": 0.0154, "step": 35840 }, { "epoch": 0.88, "learning_rate": 1.8283462627755204e-05, "loss": 0.0204, "step": 35850 }, { "epoch": 0.88, "learning_rate": 1.8282970077576653e-05, "loss": 0.015, "step": 35860 }, { "epoch": 0.88, "learning_rate": 1.8282477527398106e-05, "loss": 0.0151, "step": 35870 }, { "epoch": 0.88, "learning_rate": 1.8281984977219555e-05, "loss": 0.0175, "step": 35880 }, { "epoch": 0.88, "learning_rate": 1.8281492427041004e-05, "loss": 0.0143, "step": 35890 }, { "epoch": 0.88, "learning_rate": 1.8280999876862457e-05, "loss": 0.0133, "step": 35900 }, { "epoch": 0.88, "learning_rate": 1.8280507326683906e-05, "loss": 0.0153, "step": 35910 }, { "epoch": 0.88, "learning_rate": 1.8280014776505358e-05, "loss": 0.0132, "step": 35920 }, { "epoch": 0.88, "learning_rate": 1.827952222632681e-05, "loss": 0.0218, "step": 35930 }, { "epoch": 0.88, "learning_rate": 1.827902967614826e-05, "loss": 0.0194, "step": 35940 }, { "epoch": 0.88, "learning_rate": 1.827853712596971e-05, "loss": 0.0144, "step": 35950 }, { "epoch": 0.88, "learning_rate": 1.827804457579116e-05, "loss": 0.0141, "step": 35960 }, { "epoch": 0.88, "learning_rate": 1.827755202561261e-05, "loss": 0.0186, "step": 35970 }, { "epoch": 0.88, "learning_rate": 1.8277059475434063e-05, "loss": 0.0135, "step": 35980 }, { "epoch": 0.88, "learning_rate": 1.8276566925255512e-05, "loss": 0.0192, "step": 35990 }, { "epoch": 0.88, "learning_rate": 1.827607437507696e-05, "loss": 0.0142, "step": 36000 }, { "epoch": 0.88, "learning_rate": 1.8275581824898414e-05, "loss": 0.0149, "step": 36010 }, { "epoch": 0.88, "learning_rate": 1.8275089274719863e-05, "loss": 0.0202, "step": 36020 }, { "epoch": 0.89, "learning_rate": 1.8274596724541312e-05, "loss": 0.0252, "step": 36030 }, { "epoch": 0.89, "learning_rate": 1.8274104174362765e-05, "loss": 0.0129, "step": 36040 }, { "epoch": 0.89, "learning_rate": 1.8273611624184214e-05, "loss": 0.0254, "step": 36050 }, { "epoch": 0.89, "learning_rate": 1.8273119074005667e-05, "loss": 0.0126, "step": 36060 }, { "epoch": 0.89, "learning_rate": 1.8272626523827116e-05, "loss": 0.0208, "step": 36070 }, { "epoch": 0.89, "learning_rate": 1.8272133973648568e-05, "loss": 0.0169, "step": 36080 }, { "epoch": 0.89, "learning_rate": 1.8271641423470017e-05, "loss": 0.0179, "step": 36090 }, { "epoch": 0.89, "learning_rate": 1.827114887329147e-05, "loss": 0.0127, "step": 36100 }, { "epoch": 0.89, "learning_rate": 1.827065632311292e-05, "loss": 0.0132, "step": 36110 }, { "epoch": 0.89, "learning_rate": 1.8270163772934368e-05, "loss": 0.0154, "step": 36120 }, { "epoch": 0.89, "learning_rate": 1.826967122275582e-05, "loss": 0.0151, "step": 36130 }, { "epoch": 0.89, "learning_rate": 1.826917867257727e-05, "loss": 0.0141, "step": 36140 }, { "epoch": 0.89, "learning_rate": 1.826868612239872e-05, "loss": 0.0117, "step": 36150 }, { "epoch": 0.89, "learning_rate": 1.826819357222017e-05, "loss": 0.0138, "step": 36160 }, { "epoch": 0.89, "learning_rate": 1.826770102204162e-05, "loss": 0.0132, "step": 36170 }, { "epoch": 0.89, "learning_rate": 1.826720847186307e-05, "loss": 0.0122, "step": 36180 }, { "epoch": 0.89, "learning_rate": 1.8266715921684522e-05, "loss": 0.0134, "step": 36190 }, { "epoch": 0.89, "learning_rate": 1.8266223371505975e-05, "loss": 0.0182, "step": 36200 }, { "epoch": 0.89, "learning_rate": 1.8265730821327424e-05, "loss": 0.0127, "step": 36210 }, { "epoch": 0.89, "learning_rate": 1.8265238271148877e-05, "loss": 0.0117, "step": 36220 }, { "epoch": 0.89, "learning_rate": 1.8264745720970326e-05, "loss": 0.0155, "step": 36230 }, { "epoch": 0.89, "learning_rate": 1.826425317079178e-05, "loss": 0.0116, "step": 36240 }, { "epoch": 0.89, "learning_rate": 1.8263760620613228e-05, "loss": 0.0124, "step": 36250 }, { "epoch": 0.89, "learning_rate": 1.8263268070434677e-05, "loss": 0.0151, "step": 36260 }, { "epoch": 0.89, "learning_rate": 1.826277552025613e-05, "loss": 0.013, "step": 36270 }, { "epoch": 0.89, "learning_rate": 1.826228297007758e-05, "loss": 0.0134, "step": 36280 }, { "epoch": 0.89, "learning_rate": 1.8261790419899028e-05, "loss": 0.0181, "step": 36290 }, { "epoch": 0.89, "learning_rate": 1.826129786972048e-05, "loss": 0.0134, "step": 36300 }, { "epoch": 0.89, "learning_rate": 1.826080531954193e-05, "loss": 0.0131, "step": 36310 }, { "epoch": 0.89, "learning_rate": 1.826031276936338e-05, "loss": 0.0165, "step": 36320 }, { "epoch": 0.89, "learning_rate": 1.825982021918483e-05, "loss": 0.0137, "step": 36330 }, { "epoch": 0.89, "learning_rate": 1.825932766900628e-05, "loss": 0.0114, "step": 36340 }, { "epoch": 0.89, "learning_rate": 1.8258835118827733e-05, "loss": 0.0193, "step": 36350 }, { "epoch": 0.89, "learning_rate": 1.8258342568649182e-05, "loss": 0.0173, "step": 36360 }, { "epoch": 0.89, "learning_rate": 1.8257850018470634e-05, "loss": 0.0133, "step": 36370 }, { "epoch": 0.89, "learning_rate": 1.8257357468292083e-05, "loss": 0.0134, "step": 36380 }, { "epoch": 0.89, "learning_rate": 1.8256864918113536e-05, "loss": 0.0119, "step": 36390 }, { "epoch": 0.89, "learning_rate": 1.8256372367934985e-05, "loss": 0.0121, "step": 36400 }, { "epoch": 0.89, "learning_rate": 1.8255879817756434e-05, "loss": 0.0139, "step": 36410 }, { "epoch": 0.89, "learning_rate": 1.8255387267577887e-05, "loss": 0.0157, "step": 36420 }, { "epoch": 0.89, "learning_rate": 1.8254894717399336e-05, "loss": 0.0152, "step": 36430 }, { "epoch": 0.9, "learning_rate": 1.8254402167220785e-05, "loss": 0.0116, "step": 36440 }, { "epoch": 0.9, "learning_rate": 1.8253909617042238e-05, "loss": 0.0245, "step": 36450 }, { "epoch": 0.9, "learning_rate": 1.8253417066863687e-05, "loss": 0.0108, "step": 36460 }, { "epoch": 0.9, "learning_rate": 1.825292451668514e-05, "loss": 0.0204, "step": 36470 }, { "epoch": 0.9, "learning_rate": 1.825243196650659e-05, "loss": 0.0244, "step": 36480 }, { "epoch": 0.9, "learning_rate": 1.825193941632804e-05, "loss": 0.0165, "step": 36490 }, { "epoch": 0.9, "learning_rate": 1.825144686614949e-05, "loss": 0.013, "step": 36500 }, { "epoch": 0.9, "learning_rate": 1.8250954315970943e-05, "loss": 0.0154, "step": 36510 }, { "epoch": 0.9, "learning_rate": 1.8250461765792392e-05, "loss": 0.0126, "step": 36520 }, { "epoch": 0.9, "learning_rate": 1.8249969215613844e-05, "loss": 0.0181, "step": 36530 }, { "epoch": 0.9, "learning_rate": 1.8249476665435294e-05, "loss": 0.0142, "step": 36540 }, { "epoch": 0.9, "learning_rate": 1.8248984115256743e-05, "loss": 0.0114, "step": 36550 }, { "epoch": 0.9, "learning_rate": 1.8248491565078195e-05, "loss": 0.0173, "step": 36560 }, { "epoch": 0.9, "learning_rate": 1.8247999014899644e-05, "loss": 0.0143, "step": 36570 }, { "epoch": 0.9, "learning_rate": 1.8247506464721093e-05, "loss": 0.0191, "step": 36580 }, { "epoch": 0.9, "learning_rate": 1.8247013914542546e-05, "loss": 0.016, "step": 36590 }, { "epoch": 0.9, "learning_rate": 1.8246521364363995e-05, "loss": 0.0182, "step": 36600 }, { "epoch": 0.9, "learning_rate": 1.8246028814185444e-05, "loss": 0.0134, "step": 36610 }, { "epoch": 0.9, "learning_rate": 1.8245536264006897e-05, "loss": 0.0309, "step": 36620 }, { "epoch": 0.9, "learning_rate": 1.8245043713828346e-05, "loss": 0.022, "step": 36630 }, { "epoch": 0.9, "learning_rate": 1.82445511636498e-05, "loss": 0.0128, "step": 36640 }, { "epoch": 0.9, "learning_rate": 1.824405861347125e-05, "loss": 0.0142, "step": 36650 }, { "epoch": 0.9, "learning_rate": 1.82435660632927e-05, "loss": 0.021, "step": 36660 }, { "epoch": 0.9, "learning_rate": 1.824307351311415e-05, "loss": 0.0146, "step": 36670 }, { "epoch": 0.9, "learning_rate": 1.8242580962935602e-05, "loss": 0.0207, "step": 36680 }, { "epoch": 0.9, "learning_rate": 1.824208841275705e-05, "loss": 0.0125, "step": 36690 }, { "epoch": 0.9, "learning_rate": 1.82415958625785e-05, "loss": 0.0138, "step": 36700 }, { "epoch": 0.9, "learning_rate": 1.8241103312399953e-05, "loss": 0.0138, "step": 36710 }, { "epoch": 0.9, "learning_rate": 1.8240610762221402e-05, "loss": 0.0151, "step": 36720 }, { "epoch": 0.9, "learning_rate": 1.8240118212042854e-05, "loss": 0.0197, "step": 36730 }, { "epoch": 0.9, "learning_rate": 1.8239625661864304e-05, "loss": 0.0199, "step": 36740 }, { "epoch": 0.9, "learning_rate": 1.8239133111685753e-05, "loss": 0.0148, "step": 36750 }, { "epoch": 0.9, "learning_rate": 1.8238640561507205e-05, "loss": 0.0213, "step": 36760 }, { "epoch": 0.9, "learning_rate": 1.8238148011328654e-05, "loss": 0.0128, "step": 36770 }, { "epoch": 0.9, "learning_rate": 1.8237655461150107e-05, "loss": 0.0194, "step": 36780 }, { "epoch": 0.9, "learning_rate": 1.8237162910971556e-05, "loss": 0.0123, "step": 36790 }, { "epoch": 0.9, "learning_rate": 1.823667036079301e-05, "loss": 0.0224, "step": 36800 }, { "epoch": 0.9, "learning_rate": 1.8236177810614458e-05, "loss": 0.0134, "step": 36810 }, { "epoch": 0.9, "learning_rate": 1.823568526043591e-05, "loss": 0.0228, "step": 36820 }, { "epoch": 0.9, "learning_rate": 1.823519271025736e-05, "loss": 0.013, "step": 36830 }, { "epoch": 0.91, "learning_rate": 1.823470016007881e-05, "loss": 0.0219, "step": 36840 }, { "epoch": 0.91, "learning_rate": 1.823420760990026e-05, "loss": 0.0161, "step": 36850 }, { "epoch": 0.91, "learning_rate": 1.823371505972171e-05, "loss": 0.0128, "step": 36860 }, { "epoch": 0.91, "learning_rate": 1.823322250954316e-05, "loss": 0.0192, "step": 36870 }, { "epoch": 0.91, "learning_rate": 1.8232729959364612e-05, "loss": 0.013, "step": 36880 }, { "epoch": 0.91, "learning_rate": 1.823223740918606e-05, "loss": 0.0168, "step": 36890 }, { "epoch": 0.91, "learning_rate": 1.823174485900751e-05, "loss": 0.0136, "step": 36900 }, { "epoch": 0.91, "learning_rate": 1.8231252308828963e-05, "loss": 0.0132, "step": 36910 }, { "epoch": 0.91, "learning_rate": 1.8230759758650415e-05, "loss": 0.0134, "step": 36920 }, { "epoch": 0.91, "learning_rate": 1.8230267208471865e-05, "loss": 0.0129, "step": 36930 }, { "epoch": 0.91, "learning_rate": 1.8229774658293317e-05, "loss": 0.0134, "step": 36940 }, { "epoch": 0.91, "learning_rate": 1.8229282108114766e-05, "loss": 0.0141, "step": 36950 }, { "epoch": 0.91, "learning_rate": 1.822878955793622e-05, "loss": 0.0242, "step": 36960 }, { "epoch": 0.91, "learning_rate": 1.8228297007757668e-05, "loss": 0.0132, "step": 36970 }, { "epoch": 0.91, "learning_rate": 1.8227804457579117e-05, "loss": 0.013, "step": 36980 }, { "epoch": 0.91, "learning_rate": 1.822731190740057e-05, "loss": 0.0126, "step": 36990 }, { "epoch": 0.91, "learning_rate": 1.822681935722202e-05, "loss": 0.0155, "step": 37000 }, { "epoch": 0.91, "learning_rate": 1.8226326807043468e-05, "loss": 0.0199, "step": 37010 }, { "epoch": 0.91, "learning_rate": 1.822583425686492e-05, "loss": 0.0179, "step": 37020 }, { "epoch": 0.91, "learning_rate": 1.822534170668637e-05, "loss": 0.0194, "step": 37030 }, { "epoch": 0.91, "learning_rate": 1.822484915650782e-05, "loss": 0.0163, "step": 37040 }, { "epoch": 0.91, "learning_rate": 1.822435660632927e-05, "loss": 0.0142, "step": 37050 }, { "epoch": 0.91, "learning_rate": 1.822386405615072e-05, "loss": 0.0115, "step": 37060 }, { "epoch": 0.91, "learning_rate": 1.8223371505972173e-05, "loss": 0.0259, "step": 37070 }, { "epoch": 0.91, "learning_rate": 1.8222878955793622e-05, "loss": 0.0122, "step": 37080 }, { "epoch": 0.91, "learning_rate": 1.8222386405615075e-05, "loss": 0.0116, "step": 37090 }, { "epoch": 0.91, "learning_rate": 1.8221893855436524e-05, "loss": 0.0146, "step": 37100 }, { "epoch": 0.91, "learning_rate": 1.8221401305257976e-05, "loss": 0.0126, "step": 37110 }, { "epoch": 0.91, "learning_rate": 1.8220908755079425e-05, "loss": 0.0172, "step": 37120 }, { "epoch": 0.91, "learning_rate": 1.8220416204900875e-05, "loss": 0.0101, "step": 37130 }, { "epoch": 0.91, "learning_rate": 1.8219923654722327e-05, "loss": 0.0157, "step": 37140 }, { "epoch": 0.91, "learning_rate": 1.8219431104543776e-05, "loss": 0.0147, "step": 37150 }, { "epoch": 0.91, "learning_rate": 1.8218938554365225e-05, "loss": 0.0146, "step": 37160 }, { "epoch": 0.91, "learning_rate": 1.8218446004186678e-05, "loss": 0.0143, "step": 37170 }, { "epoch": 0.91, "learning_rate": 1.8217953454008127e-05, "loss": 0.0126, "step": 37180 }, { "epoch": 0.91, "learning_rate": 1.821746090382958e-05, "loss": 0.0181, "step": 37190 }, { "epoch": 0.91, "learning_rate": 1.821696835365103e-05, "loss": 0.0154, "step": 37200 }, { "epoch": 0.91, "learning_rate": 1.821647580347248e-05, "loss": 0.0152, "step": 37210 }, { "epoch": 0.91, "learning_rate": 1.821598325329393e-05, "loss": 0.0214, "step": 37220 }, { "epoch": 0.91, "learning_rate": 1.8215490703115383e-05, "loss": 0.0128, "step": 37230 }, { "epoch": 0.91, "learning_rate": 1.8214998152936832e-05, "loss": 0.0126, "step": 37240 }, { "epoch": 0.92, "learning_rate": 1.8214505602758285e-05, "loss": 0.0188, "step": 37250 }, { "epoch": 0.92, "learning_rate": 1.8214013052579734e-05, "loss": 0.0142, "step": 37260 }, { "epoch": 0.92, "learning_rate": 1.8213520502401183e-05, "loss": 0.0154, "step": 37270 }, { "epoch": 0.92, "learning_rate": 1.8213027952222636e-05, "loss": 0.0139, "step": 37280 }, { "epoch": 0.92, "learning_rate": 1.8212535402044085e-05, "loss": 0.0242, "step": 37290 }, { "epoch": 0.92, "learning_rate": 1.8212042851865534e-05, "loss": 0.0118, "step": 37300 }, { "epoch": 0.92, "learning_rate": 1.8211550301686986e-05, "loss": 0.0146, "step": 37310 }, { "epoch": 0.92, "learning_rate": 1.8211057751508436e-05, "loss": 0.0135, "step": 37320 }, { "epoch": 0.92, "learning_rate": 1.8210565201329885e-05, "loss": 0.015, "step": 37330 }, { "epoch": 0.92, "learning_rate": 1.8210072651151337e-05, "loss": 0.0128, "step": 37340 }, { "epoch": 0.92, "learning_rate": 1.8209580100972786e-05, "loss": 0.0139, "step": 37350 }, { "epoch": 0.92, "learning_rate": 1.820908755079424e-05, "loss": 0.022, "step": 37360 }, { "epoch": 0.92, "learning_rate": 1.820859500061569e-05, "loss": 0.0136, "step": 37370 }, { "epoch": 0.92, "learning_rate": 1.820810245043714e-05, "loss": 0.0158, "step": 37380 }, { "epoch": 0.92, "learning_rate": 1.820760990025859e-05, "loss": 0.0132, "step": 37390 }, { "epoch": 0.92, "learning_rate": 1.8207117350080042e-05, "loss": 0.022, "step": 37400 }, { "epoch": 0.92, "learning_rate": 1.820662479990149e-05, "loss": 0.0142, "step": 37410 }, { "epoch": 0.92, "learning_rate": 1.820613224972294e-05, "loss": 0.0165, "step": 37420 }, { "epoch": 0.92, "learning_rate": 1.8205639699544393e-05, "loss": 0.0116, "step": 37430 }, { "epoch": 0.92, "learning_rate": 1.8205147149365842e-05, "loss": 0.0142, "step": 37440 }, { "epoch": 0.92, "learning_rate": 1.8204654599187295e-05, "loss": 0.0137, "step": 37450 }, { "epoch": 0.92, "learning_rate": 1.8204162049008744e-05, "loss": 0.0244, "step": 37460 }, { "epoch": 0.92, "learning_rate": 1.8203669498830193e-05, "loss": 0.0163, "step": 37470 }, { "epoch": 0.92, "learning_rate": 1.8203176948651646e-05, "loss": 0.0137, "step": 37480 }, { "epoch": 0.92, "learning_rate": 1.8202684398473095e-05, "loss": 0.0137, "step": 37490 }, { "epoch": 0.92, "learning_rate": 1.8202191848294547e-05, "loss": 0.0143, "step": 37500 }, { "epoch": 0.92, "learning_rate": 1.8201699298115996e-05, "loss": 0.0196, "step": 37510 }, { "epoch": 0.92, "learning_rate": 1.820120674793745e-05, "loss": 0.0121, "step": 37520 }, { "epoch": 0.92, "learning_rate": 1.8200714197758898e-05, "loss": 0.0166, "step": 37530 }, { "epoch": 0.92, "learning_rate": 1.820022164758035e-05, "loss": 0.0146, "step": 37540 }, { "epoch": 0.92, "learning_rate": 1.81997290974018e-05, "loss": 0.0148, "step": 37550 }, { "epoch": 0.92, "learning_rate": 1.819923654722325e-05, "loss": 0.0125, "step": 37560 }, { "epoch": 0.92, "learning_rate": 1.81987439970447e-05, "loss": 0.0135, "step": 37570 }, { "epoch": 0.92, "learning_rate": 1.819825144686615e-05, "loss": 0.0108, "step": 37580 }, { "epoch": 0.92, "learning_rate": 1.81977588966876e-05, "loss": 0.0173, "step": 37590 }, { "epoch": 0.92, "learning_rate": 1.8197266346509052e-05, "loss": 0.021, "step": 37600 }, { "epoch": 0.92, "learning_rate": 1.81967737963305e-05, "loss": 0.0183, "step": 37610 }, { "epoch": 0.92, "learning_rate": 1.819628124615195e-05, "loss": 0.0108, "step": 37620 }, { "epoch": 0.92, "learning_rate": 1.8195788695973403e-05, "loss": 0.0216, "step": 37630 }, { "epoch": 0.92, "learning_rate": 1.8195296145794856e-05, "loss": 0.0137, "step": 37640 }, { "epoch": 0.92, "learning_rate": 1.8194803595616305e-05, "loss": 0.0211, "step": 37650 }, { "epoch": 0.93, "learning_rate": 1.8194311045437757e-05, "loss": 0.015, "step": 37660 }, { "epoch": 0.93, "learning_rate": 1.8193818495259207e-05, "loss": 0.0146, "step": 37670 }, { "epoch": 0.93, "learning_rate": 1.819332594508066e-05, "loss": 0.0148, "step": 37680 }, { "epoch": 0.93, "learning_rate": 1.8192833394902108e-05, "loss": 0.0138, "step": 37690 }, { "epoch": 0.93, "learning_rate": 1.8192340844723557e-05, "loss": 0.0149, "step": 37700 }, { "epoch": 0.93, "learning_rate": 1.819184829454501e-05, "loss": 0.0118, "step": 37710 }, { "epoch": 0.93, "learning_rate": 1.819135574436646e-05, "loss": 0.0124, "step": 37720 }, { "epoch": 0.93, "learning_rate": 1.8190863194187908e-05, "loss": 0.0155, "step": 37730 }, { "epoch": 0.93, "learning_rate": 1.819037064400936e-05, "loss": 0.0142, "step": 37740 }, { "epoch": 0.93, "learning_rate": 1.818987809383081e-05, "loss": 0.0199, "step": 37750 }, { "epoch": 0.93, "learning_rate": 1.818938554365226e-05, "loss": 0.0166, "step": 37760 }, { "epoch": 0.93, "learning_rate": 1.818889299347371e-05, "loss": 0.0182, "step": 37770 }, { "epoch": 0.93, "learning_rate": 1.818840044329516e-05, "loss": 0.0106, "step": 37780 }, { "epoch": 0.93, "learning_rate": 1.8187907893116613e-05, "loss": 0.013, "step": 37790 }, { "epoch": 0.93, "learning_rate": 1.8187415342938062e-05, "loss": 0.0206, "step": 37800 }, { "epoch": 0.93, "learning_rate": 1.8186922792759515e-05, "loss": 0.01, "step": 37810 }, { "epoch": 0.93, "learning_rate": 1.8186430242580964e-05, "loss": 0.0164, "step": 37820 }, { "epoch": 0.93, "learning_rate": 1.8185937692402417e-05, "loss": 0.0122, "step": 37830 }, { "epoch": 0.93, "learning_rate": 1.8185445142223866e-05, "loss": 0.0131, "step": 37840 }, { "epoch": 0.93, "learning_rate": 1.8184952592045315e-05, "loss": 0.0154, "step": 37850 }, { "epoch": 0.93, "learning_rate": 1.8184460041866767e-05, "loss": 0.0245, "step": 37860 }, { "epoch": 0.93, "learning_rate": 1.8183967491688217e-05, "loss": 0.0139, "step": 37870 }, { "epoch": 0.93, "learning_rate": 1.8183474941509666e-05, "loss": 0.0149, "step": 37880 }, { "epoch": 0.93, "learning_rate": 1.8182982391331118e-05, "loss": 0.0126, "step": 37890 }, { "epoch": 0.93, "learning_rate": 1.8182489841152567e-05, "loss": 0.0146, "step": 37900 }, { "epoch": 0.93, "learning_rate": 1.818199729097402e-05, "loss": 0.0195, "step": 37910 }, { "epoch": 0.93, "learning_rate": 1.818150474079547e-05, "loss": 0.0185, "step": 37920 }, { "epoch": 0.93, "learning_rate": 1.818101219061692e-05, "loss": 0.0137, "step": 37930 }, { "epoch": 0.93, "learning_rate": 1.818051964043837e-05, "loss": 0.0141, "step": 37940 }, { "epoch": 0.93, "learning_rate": 1.8180027090259823e-05, "loss": 0.0135, "step": 37950 }, { "epoch": 0.93, "learning_rate": 1.8179534540081272e-05, "loss": 0.0244, "step": 37960 }, { "epoch": 0.93, "learning_rate": 1.8179041989902725e-05, "loss": 0.0121, "step": 37970 }, { "epoch": 0.93, "learning_rate": 1.8178549439724174e-05, "loss": 0.0148, "step": 37980 }, { "epoch": 0.93, "learning_rate": 1.8178056889545623e-05, "loss": 0.0165, "step": 37990 }, { "epoch": 0.93, "learning_rate": 1.8177564339367076e-05, "loss": 0.0176, "step": 38000 }, { "epoch": 0.93, "learning_rate": 1.8177071789188525e-05, "loss": 0.0155, "step": 38010 }, { "epoch": 0.93, "learning_rate": 1.8176579239009974e-05, "loss": 0.0244, "step": 38020 }, { "epoch": 0.93, "learning_rate": 1.8176086688831427e-05, "loss": 0.0113, "step": 38030 }, { "epoch": 0.93, "learning_rate": 1.8175594138652876e-05, "loss": 0.0126, "step": 38040 }, { "epoch": 0.93, "learning_rate": 1.8175101588474325e-05, "loss": 0.0207, "step": 38050 }, { "epoch": 0.94, "learning_rate": 1.8174609038295778e-05, "loss": 0.0215, "step": 38060 }, { "epoch": 0.94, "learning_rate": 1.8174116488117227e-05, "loss": 0.0141, "step": 38070 }, { "epoch": 0.94, "learning_rate": 1.817362393793868e-05, "loss": 0.018, "step": 38080 }, { "epoch": 0.94, "learning_rate": 1.8173131387760132e-05, "loss": 0.0124, "step": 38090 }, { "epoch": 0.94, "learning_rate": 1.817263883758158e-05, "loss": 0.0129, "step": 38100 }, { "epoch": 0.94, "learning_rate": 1.817214628740303e-05, "loss": 0.0206, "step": 38110 }, { "epoch": 0.94, "learning_rate": 1.8171653737224483e-05, "loss": 0.0205, "step": 38120 }, { "epoch": 0.94, "learning_rate": 1.8171161187045932e-05, "loss": 0.0168, "step": 38130 }, { "epoch": 0.94, "learning_rate": 1.817066863686738e-05, "loss": 0.0149, "step": 38140 }, { "epoch": 0.94, "learning_rate": 1.8170176086688833e-05, "loss": 0.0148, "step": 38150 }, { "epoch": 0.94, "learning_rate": 1.8169683536510283e-05, "loss": 0.015, "step": 38160 }, { "epoch": 0.94, "learning_rate": 1.8169190986331735e-05, "loss": 0.0157, "step": 38170 }, { "epoch": 0.94, "learning_rate": 1.8168698436153184e-05, "loss": 0.0148, "step": 38180 }, { "epoch": 0.94, "learning_rate": 1.8168205885974633e-05, "loss": 0.0138, "step": 38190 }, { "epoch": 0.94, "learning_rate": 1.8167713335796086e-05, "loss": 0.0135, "step": 38200 }, { "epoch": 0.94, "learning_rate": 1.8167220785617535e-05, "loss": 0.0118, "step": 38210 }, { "epoch": 0.94, "learning_rate": 1.8166728235438988e-05, "loss": 0.0129, "step": 38220 }, { "epoch": 0.94, "learning_rate": 1.8166235685260437e-05, "loss": 0.0127, "step": 38230 }, { "epoch": 0.94, "learning_rate": 1.816574313508189e-05, "loss": 0.0131, "step": 38240 }, { "epoch": 0.94, "learning_rate": 1.816525058490334e-05, "loss": 0.0147, "step": 38250 }, { "epoch": 0.94, "learning_rate": 1.816475803472479e-05, "loss": 0.0138, "step": 38260 }, { "epoch": 0.94, "learning_rate": 1.816426548454624e-05, "loss": 0.0268, "step": 38270 }, { "epoch": 0.94, "learning_rate": 1.816377293436769e-05, "loss": 0.0129, "step": 38280 }, { "epoch": 0.94, "learning_rate": 1.8163280384189142e-05, "loss": 0.0166, "step": 38290 }, { "epoch": 0.94, "learning_rate": 1.816278783401059e-05, "loss": 0.0148, "step": 38300 }, { "epoch": 0.94, "learning_rate": 1.816229528383204e-05, "loss": 0.0153, "step": 38310 }, { "epoch": 0.94, "learning_rate": 1.8161802733653493e-05, "loss": 0.0139, "step": 38320 }, { "epoch": 0.94, "learning_rate": 1.8161310183474942e-05, "loss": 0.011, "step": 38330 }, { "epoch": 0.94, "learning_rate": 1.816081763329639e-05, "loss": 0.0223, "step": 38340 }, { "epoch": 0.94, "learning_rate": 1.8160325083117844e-05, "loss": 0.014, "step": 38350 }, { "epoch": 0.94, "learning_rate": 1.8159832532939296e-05, "loss": 0.0158, "step": 38360 }, { "epoch": 0.94, "learning_rate": 1.8159339982760745e-05, "loss": 0.0174, "step": 38370 }, { "epoch": 0.94, "learning_rate": 1.8158847432582198e-05, "loss": 0.014, "step": 38380 }, { "epoch": 0.94, "learning_rate": 1.8158354882403647e-05, "loss": 0.0131, "step": 38390 }, { "epoch": 0.94, "learning_rate": 1.81578623322251e-05, "loss": 0.0183, "step": 38400 }, { "epoch": 0.94, "learning_rate": 1.815736978204655e-05, "loss": 0.0134, "step": 38410 }, { "epoch": 0.94, "learning_rate": 1.8156877231867998e-05, "loss": 0.0123, "step": 38420 }, { "epoch": 0.94, "learning_rate": 1.815638468168945e-05, "loss": 0.0145, "step": 38430 }, { "epoch": 0.94, "learning_rate": 1.81558921315109e-05, "loss": 0.0132, "step": 38440 }, { "epoch": 0.94, "learning_rate": 1.815539958133235e-05, "loss": 0.0159, "step": 38450 }, { "epoch": 0.94, "learning_rate": 1.81549070311538e-05, "loss": 0.0158, "step": 38460 }, { "epoch": 0.95, "learning_rate": 1.815441448097525e-05, "loss": 0.014, "step": 38470 }, { "epoch": 0.95, "learning_rate": 1.81539219307967e-05, "loss": 0.0162, "step": 38480 }, { "epoch": 0.95, "learning_rate": 1.8153429380618152e-05, "loss": 0.0135, "step": 38490 }, { "epoch": 0.95, "learning_rate": 1.81529368304396e-05, "loss": 0.0156, "step": 38500 }, { "epoch": 0.95, "learning_rate": 1.8152444280261054e-05, "loss": 0.0126, "step": 38510 }, { "epoch": 0.95, "learning_rate": 1.8151951730082503e-05, "loss": 0.012, "step": 38520 }, { "epoch": 0.95, "learning_rate": 1.8151459179903955e-05, "loss": 0.0125, "step": 38530 }, { "epoch": 0.95, "learning_rate": 1.8150966629725404e-05, "loss": 0.0185, "step": 38540 }, { "epoch": 0.95, "learning_rate": 1.8150474079546857e-05, "loss": 0.0117, "step": 38550 }, { "epoch": 0.95, "learning_rate": 1.8149981529368306e-05, "loss": 0.0153, "step": 38560 }, { "epoch": 0.95, "learning_rate": 1.8149488979189755e-05, "loss": 0.0203, "step": 38570 }, { "epoch": 0.95, "learning_rate": 1.8148996429011208e-05, "loss": 0.0155, "step": 38580 }, { "epoch": 0.95, "learning_rate": 1.8148503878832657e-05, "loss": 0.0124, "step": 38590 }, { "epoch": 0.95, "learning_rate": 1.8148011328654106e-05, "loss": 0.0169, "step": 38600 }, { "epoch": 0.95, "learning_rate": 1.814751877847556e-05, "loss": 0.0127, "step": 38610 }, { "epoch": 0.95, "learning_rate": 1.8147026228297008e-05, "loss": 0.0142, "step": 38620 }, { "epoch": 0.95, "learning_rate": 1.814653367811846e-05, "loss": 0.0117, "step": 38630 }, { "epoch": 0.95, "learning_rate": 1.814604112793991e-05, "loss": 0.0145, "step": 38640 }, { "epoch": 0.95, "learning_rate": 1.8145548577761362e-05, "loss": 0.0129, "step": 38650 }, { "epoch": 0.95, "learning_rate": 1.814505602758281e-05, "loss": 0.0157, "step": 38660 }, { "epoch": 0.95, "learning_rate": 1.8144563477404264e-05, "loss": 0.0212, "step": 38670 }, { "epoch": 0.95, "learning_rate": 1.8144070927225713e-05, "loss": 0.0218, "step": 38680 }, { "epoch": 0.95, "learning_rate": 1.8143578377047165e-05, "loss": 0.0161, "step": 38690 }, { "epoch": 0.95, "learning_rate": 1.8143085826868615e-05, "loss": 0.0133, "step": 38700 }, { "epoch": 0.95, "learning_rate": 1.8142593276690064e-05, "loss": 0.0143, "step": 38710 }, { "epoch": 0.95, "learning_rate": 1.8142100726511516e-05, "loss": 0.0202, "step": 38720 }, { "epoch": 0.95, "learning_rate": 1.8141608176332965e-05, "loss": 0.0155, "step": 38730 }, { "epoch": 0.95, "learning_rate": 1.8141115626154415e-05, "loss": 0.0135, "step": 38740 }, { "epoch": 0.95, "learning_rate": 1.8140623075975867e-05, "loss": 0.0198, "step": 38750 }, { "epoch": 0.95, "learning_rate": 1.8140130525797316e-05, "loss": 0.0164, "step": 38760 }, { "epoch": 0.95, "learning_rate": 1.8139637975618765e-05, "loss": 0.0152, "step": 38770 }, { "epoch": 0.95, "learning_rate": 1.8139145425440218e-05, "loss": 0.0225, "step": 38780 }, { "epoch": 0.95, "learning_rate": 1.8138652875261667e-05, "loss": 0.0133, "step": 38790 }, { "epoch": 0.95, "learning_rate": 1.813816032508312e-05, "loss": 0.0199, "step": 38800 }, { "epoch": 0.95, "learning_rate": 1.8137667774904572e-05, "loss": 0.0199, "step": 38810 }, { "epoch": 0.95, "learning_rate": 1.813717522472602e-05, "loss": 0.0212, "step": 38820 }, { "epoch": 0.95, "learning_rate": 1.813668267454747e-05, "loss": 0.0146, "step": 38830 }, { "epoch": 0.95, "learning_rate": 1.8136190124368923e-05, "loss": 0.0148, "step": 38840 }, { "epoch": 0.95, "learning_rate": 1.8135697574190372e-05, "loss": 0.0126, "step": 38850 }, { "epoch": 0.95, "learning_rate": 1.813520502401182e-05, "loss": 0.0152, "step": 38860 }, { "epoch": 0.95, "learning_rate": 1.8134712473833274e-05, "loss": 0.0126, "step": 38870 }, { "epoch": 0.96, "learning_rate": 1.8134219923654723e-05, "loss": 0.0153, "step": 38880 }, { "epoch": 0.96, "learning_rate": 1.8133727373476175e-05, "loss": 0.0135, "step": 38890 }, { "epoch": 0.96, "learning_rate": 1.8133234823297625e-05, "loss": 0.0145, "step": 38900 }, { "epoch": 0.96, "learning_rate": 1.8132742273119074e-05, "loss": 0.0122, "step": 38910 }, { "epoch": 0.96, "learning_rate": 1.8132249722940526e-05, "loss": 0.0142, "step": 38920 }, { "epoch": 0.96, "learning_rate": 1.8131757172761975e-05, "loss": 0.0193, "step": 38930 }, { "epoch": 0.96, "learning_rate": 1.8131264622583428e-05, "loss": 0.0139, "step": 38940 }, { "epoch": 0.96, "learning_rate": 1.8130772072404877e-05, "loss": 0.0165, "step": 38950 }, { "epoch": 0.96, "learning_rate": 1.813027952222633e-05, "loss": 0.0163, "step": 38960 }, { "epoch": 0.96, "learning_rate": 1.812978697204778e-05, "loss": 0.0128, "step": 38970 }, { "epoch": 0.96, "learning_rate": 1.812929442186923e-05, "loss": 0.0135, "step": 38980 }, { "epoch": 0.96, "learning_rate": 1.812880187169068e-05, "loss": 0.0121, "step": 38990 }, { "epoch": 0.96, "learning_rate": 1.812830932151213e-05, "loss": 0.0142, "step": 39000 }, { "epoch": 0.96, "learning_rate": 1.8127816771333582e-05, "loss": 0.0138, "step": 39010 }, { "epoch": 0.96, "learning_rate": 1.812732422115503e-05, "loss": 0.0124, "step": 39020 }, { "epoch": 0.96, "learning_rate": 1.812683167097648e-05, "loss": 0.0122, "step": 39030 }, { "epoch": 0.96, "learning_rate": 1.8126339120797933e-05, "loss": 0.0229, "step": 39040 }, { "epoch": 0.96, "learning_rate": 1.8125846570619382e-05, "loss": 0.012, "step": 39050 }, { "epoch": 0.96, "learning_rate": 1.812535402044083e-05, "loss": 0.0186, "step": 39060 }, { "epoch": 0.96, "learning_rate": 1.8124861470262284e-05, "loss": 0.0188, "step": 39070 }, { "epoch": 0.96, "learning_rate": 1.8124368920083736e-05, "loss": 0.0147, "step": 39080 }, { "epoch": 0.96, "learning_rate": 1.8123876369905186e-05, "loss": 0.0141, "step": 39090 }, { "epoch": 0.96, "learning_rate": 1.8123383819726638e-05, "loss": 0.0206, "step": 39100 }, { "epoch": 0.96, "learning_rate": 1.8122891269548087e-05, "loss": 0.0122, "step": 39110 }, { "epoch": 0.96, "learning_rate": 1.812239871936954e-05, "loss": 0.0136, "step": 39120 }, { "epoch": 0.96, "learning_rate": 1.812190616919099e-05, "loss": 0.02, "step": 39130 }, { "epoch": 0.96, "learning_rate": 1.8121413619012438e-05, "loss": 0.012, "step": 39140 }, { "epoch": 0.96, "learning_rate": 1.812092106883389e-05, "loss": 0.0187, "step": 39150 }, { "epoch": 0.96, "learning_rate": 1.812042851865534e-05, "loss": 0.0118, "step": 39160 }, { "epoch": 0.96, "learning_rate": 1.811993596847679e-05, "loss": 0.0192, "step": 39170 }, { "epoch": 0.96, "learning_rate": 1.811944341829824e-05, "loss": 0.0148, "step": 39180 }, { "epoch": 0.96, "learning_rate": 1.811895086811969e-05, "loss": 0.0253, "step": 39190 }, { "epoch": 0.96, "learning_rate": 1.811845831794114e-05, "loss": 0.0137, "step": 39200 }, { "epoch": 0.96, "learning_rate": 1.8117965767762592e-05, "loss": 0.0148, "step": 39210 }, { "epoch": 0.96, "learning_rate": 1.811747321758404e-05, "loss": 0.0175, "step": 39220 }, { "epoch": 0.96, "learning_rate": 1.8116980667405494e-05, "loss": 0.0144, "step": 39230 }, { "epoch": 0.96, "learning_rate": 1.8116488117226943e-05, "loss": 0.0141, "step": 39240 }, { "epoch": 0.96, "learning_rate": 1.8115995567048396e-05, "loss": 0.0173, "step": 39250 }, { "epoch": 0.96, "learning_rate": 1.8115503016869845e-05, "loss": 0.0142, "step": 39260 }, { "epoch": 0.96, "learning_rate": 1.8115010466691297e-05, "loss": 0.0164, "step": 39270 }, { "epoch": 0.96, "learning_rate": 1.8114517916512746e-05, "loss": 0.0148, "step": 39280 }, { "epoch": 0.97, "learning_rate": 1.8114025366334196e-05, "loss": 0.0137, "step": 39290 }, { "epoch": 0.97, "learning_rate": 1.8113532816155648e-05, "loss": 0.0166, "step": 39300 }, { "epoch": 0.97, "learning_rate": 1.8113040265977097e-05, "loss": 0.013, "step": 39310 }, { "epoch": 0.97, "learning_rate": 1.8112547715798546e-05, "loss": 0.0137, "step": 39320 }, { "epoch": 0.97, "learning_rate": 1.811205516562e-05, "loss": 0.0138, "step": 39330 }, { "epoch": 0.97, "learning_rate": 1.8111562615441448e-05, "loss": 0.0131, "step": 39340 }, { "epoch": 0.97, "learning_rate": 1.81110700652629e-05, "loss": 0.0194, "step": 39350 }, { "epoch": 0.97, "learning_rate": 1.811057751508435e-05, "loss": 0.0136, "step": 39360 }, { "epoch": 0.97, "learning_rate": 1.8110084964905802e-05, "loss": 0.0127, "step": 39370 }, { "epoch": 0.97, "learning_rate": 1.810959241472725e-05, "loss": 0.0146, "step": 39380 }, { "epoch": 0.97, "learning_rate": 1.8109099864548704e-05, "loss": 0.0145, "step": 39390 }, { "epoch": 0.97, "learning_rate": 1.8108607314370153e-05, "loss": 0.012, "step": 39400 }, { "epoch": 0.97, "learning_rate": 1.8108114764191606e-05, "loss": 0.0159, "step": 39410 }, { "epoch": 0.97, "learning_rate": 1.8107622214013055e-05, "loss": 0.0135, "step": 39420 }, { "epoch": 0.97, "learning_rate": 1.8107129663834504e-05, "loss": 0.0251, "step": 39430 }, { "epoch": 0.97, "learning_rate": 1.8106637113655957e-05, "loss": 0.0134, "step": 39440 }, { "epoch": 0.97, "learning_rate": 1.8106144563477406e-05, "loss": 0.0132, "step": 39450 }, { "epoch": 0.97, "learning_rate": 1.8105652013298855e-05, "loss": 0.016, "step": 39460 }, { "epoch": 0.97, "learning_rate": 1.8105159463120307e-05, "loss": 0.0133, "step": 39470 }, { "epoch": 0.97, "learning_rate": 1.8104666912941757e-05, "loss": 0.0137, "step": 39480 }, { "epoch": 0.97, "learning_rate": 1.8104174362763206e-05, "loss": 0.0155, "step": 39490 }, { "epoch": 0.97, "learning_rate": 1.8103681812584658e-05, "loss": 0.0117, "step": 39500 }, { "epoch": 0.97, "learning_rate": 1.8103189262406107e-05, "loss": 0.0196, "step": 39510 }, { "epoch": 0.97, "learning_rate": 1.810269671222756e-05, "loss": 0.013, "step": 39520 }, { "epoch": 0.97, "learning_rate": 1.8102204162049012e-05, "loss": 0.014, "step": 39530 }, { "epoch": 0.97, "learning_rate": 1.810171161187046e-05, "loss": 0.0134, "step": 39540 }, { "epoch": 0.97, "learning_rate": 1.810121906169191e-05, "loss": 0.0149, "step": 39550 }, { "epoch": 0.97, "learning_rate": 1.8100726511513363e-05, "loss": 0.0207, "step": 39560 }, { "epoch": 0.97, "learning_rate": 1.8100233961334812e-05, "loss": 0.0115, "step": 39570 }, { "epoch": 0.97, "learning_rate": 1.809974141115626e-05, "loss": 0.0133, "step": 39580 }, { "epoch": 0.97, "learning_rate": 1.8099248860977714e-05, "loss": 0.0131, "step": 39590 }, { "epoch": 0.97, "learning_rate": 1.8098756310799163e-05, "loss": 0.0171, "step": 39600 }, { "epoch": 0.97, "learning_rate": 1.8098263760620616e-05, "loss": 0.0126, "step": 39610 }, { "epoch": 0.97, "learning_rate": 1.8097771210442065e-05, "loss": 0.0134, "step": 39620 }, { "epoch": 0.97, "learning_rate": 1.8097278660263514e-05, "loss": 0.0139, "step": 39630 }, { "epoch": 0.97, "learning_rate": 1.8096786110084967e-05, "loss": 0.016, "step": 39640 }, { "epoch": 0.97, "learning_rate": 1.8096293559906416e-05, "loss": 0.0134, "step": 39650 }, { "epoch": 0.97, "learning_rate": 1.809580100972787e-05, "loss": 0.0132, "step": 39660 }, { "epoch": 0.97, "learning_rate": 1.8095308459549317e-05, "loss": 0.0146, "step": 39670 }, { "epoch": 0.97, "learning_rate": 1.809481590937077e-05, "loss": 0.0156, "step": 39680 }, { "epoch": 0.98, "learning_rate": 1.809432335919222e-05, "loss": 0.0144, "step": 39690 }, { "epoch": 0.98, "learning_rate": 1.809383080901367e-05, "loss": 0.0235, "step": 39700 }, { "epoch": 0.98, "learning_rate": 1.809333825883512e-05, "loss": 0.0192, "step": 39710 }, { "epoch": 0.98, "learning_rate": 1.809284570865657e-05, "loss": 0.0141, "step": 39720 }, { "epoch": 0.98, "learning_rate": 1.8092353158478023e-05, "loss": 0.0115, "step": 39730 }, { "epoch": 0.98, "learning_rate": 1.809186060829947e-05, "loss": 0.0186, "step": 39740 }, { "epoch": 0.98, "learning_rate": 1.809136805812092e-05, "loss": 0.0121, "step": 39750 }, { "epoch": 0.98, "learning_rate": 1.8090875507942373e-05, "loss": 0.0132, "step": 39760 }, { "epoch": 0.98, "learning_rate": 1.8090382957763823e-05, "loss": 0.0174, "step": 39770 }, { "epoch": 0.98, "learning_rate": 1.808989040758527e-05, "loss": 0.0129, "step": 39780 }, { "epoch": 0.98, "learning_rate": 1.8089397857406724e-05, "loss": 0.0119, "step": 39790 }, { "epoch": 0.98, "learning_rate": 1.8088905307228177e-05, "loss": 0.0136, "step": 39800 }, { "epoch": 0.98, "learning_rate": 1.8088412757049626e-05, "loss": 0.0148, "step": 39810 }, { "epoch": 0.98, "learning_rate": 1.808792020687108e-05, "loss": 0.0135, "step": 39820 }, { "epoch": 0.98, "learning_rate": 1.8087427656692528e-05, "loss": 0.0145, "step": 39830 }, { "epoch": 0.98, "learning_rate": 1.808693510651398e-05, "loss": 0.014, "step": 39840 }, { "epoch": 0.98, "learning_rate": 1.808644255633543e-05, "loss": 0.0134, "step": 39850 }, { "epoch": 0.98, "learning_rate": 1.808595000615688e-05, "loss": 0.0125, "step": 39860 }, { "epoch": 0.98, "learning_rate": 1.808545745597833e-05, "loss": 0.0146, "step": 39870 }, { "epoch": 0.98, "learning_rate": 1.808496490579978e-05, "loss": 0.0127, "step": 39880 }, { "epoch": 0.98, "learning_rate": 1.808447235562123e-05, "loss": 0.0173, "step": 39890 }, { "epoch": 0.98, "learning_rate": 1.8083979805442682e-05, "loss": 0.0187, "step": 39900 }, { "epoch": 0.98, "learning_rate": 1.808348725526413e-05, "loss": 0.0121, "step": 39910 }, { "epoch": 0.98, "learning_rate": 1.808299470508558e-05, "loss": 0.0129, "step": 39920 }, { "epoch": 0.98, "learning_rate": 1.8082502154907033e-05, "loss": 0.0133, "step": 39930 }, { "epoch": 0.98, "learning_rate": 1.8082009604728482e-05, "loss": 0.0165, "step": 39940 }, { "epoch": 0.98, "learning_rate": 1.8081517054549934e-05, "loss": 0.0117, "step": 39950 }, { "epoch": 0.98, "learning_rate": 1.8081024504371383e-05, "loss": 0.0204, "step": 39960 }, { "epoch": 0.98, "learning_rate": 1.8080531954192836e-05, "loss": 0.0143, "step": 39970 }, { "epoch": 0.98, "learning_rate": 1.8080039404014285e-05, "loss": 0.0222, "step": 39980 }, { "epoch": 0.98, "learning_rate": 1.8079546853835738e-05, "loss": 0.0125, "step": 39990 }, { "epoch": 0.98, "learning_rate": 1.8079054303657187e-05, "loss": 0.0146, "step": 40000 }, { "epoch": 0.98, "eval_bleu": 97.9695, "eval_gen_len_ratio": 0.9994, "eval_loss": 0.010049809701740742, "eval_runtime": 254.7267, "eval_samples_per_second": 7.325, "eval_steps_per_second": 0.232, "step": 40000 }, { "epoch": 0.98, "learning_rate": 1.8078561753478636e-05, "loss": 0.0133, "step": 40010 }, { "epoch": 0.98, "learning_rate": 1.807806920330009e-05, "loss": 0.016, "step": 40020 }, { "epoch": 0.98, "learning_rate": 1.8077576653121538e-05, "loss": 0.0222, "step": 40030 }, { "epoch": 0.98, "learning_rate": 1.8077084102942987e-05, "loss": 0.0117, "step": 40040 }, { "epoch": 0.98, "learning_rate": 1.807659155276444e-05, "loss": 0.0159, "step": 40050 }, { "epoch": 0.98, "learning_rate": 1.807609900258589e-05, "loss": 0.015, "step": 40060 }, { "epoch": 0.98, "learning_rate": 1.807560645240734e-05, "loss": 0.0167, "step": 40070 }, { "epoch": 0.98, "learning_rate": 1.807511390222879e-05, "loss": 0.0198, "step": 40080 }, { "epoch": 0.98, "learning_rate": 1.8074621352050243e-05, "loss": 0.0145, "step": 40090 }, { "epoch": 0.99, "learning_rate": 1.8074128801871692e-05, "loss": 0.0163, "step": 40100 }, { "epoch": 0.99, "learning_rate": 1.8073636251693144e-05, "loss": 0.013, "step": 40110 }, { "epoch": 0.99, "learning_rate": 1.8073143701514594e-05, "loss": 0.0197, "step": 40120 }, { "epoch": 0.99, "learning_rate": 1.8072651151336046e-05, "loss": 0.0127, "step": 40130 }, { "epoch": 0.99, "learning_rate": 1.8072158601157495e-05, "loss": 0.0125, "step": 40140 }, { "epoch": 0.99, "learning_rate": 1.8071666050978944e-05, "loss": 0.0172, "step": 40150 }, { "epoch": 0.99, "learning_rate": 1.8071173500800397e-05, "loss": 0.0128, "step": 40160 }, { "epoch": 0.99, "learning_rate": 1.8070680950621846e-05, "loss": 0.0114, "step": 40170 }, { "epoch": 0.99, "learning_rate": 1.8070188400443295e-05, "loss": 0.0114, "step": 40180 }, { "epoch": 0.99, "learning_rate": 1.8069695850264748e-05, "loss": 0.0204, "step": 40190 }, { "epoch": 0.99, "learning_rate": 1.8069203300086197e-05, "loss": 0.0149, "step": 40200 }, { "epoch": 0.99, "learning_rate": 1.8068710749907646e-05, "loss": 0.0137, "step": 40210 }, { "epoch": 0.99, "learning_rate": 1.80682181997291e-05, "loss": 0.0146, "step": 40220 }, { "epoch": 0.99, "learning_rate": 1.8067725649550548e-05, "loss": 0.0221, "step": 40230 }, { "epoch": 0.99, "learning_rate": 1.8067233099372e-05, "loss": 0.0139, "step": 40240 }, { "epoch": 0.99, "learning_rate": 1.8066740549193453e-05, "loss": 0.0171, "step": 40250 }, { "epoch": 0.99, "learning_rate": 1.8066247999014902e-05, "loss": 0.0142, "step": 40260 }, { "epoch": 0.99, "learning_rate": 1.806575544883635e-05, "loss": 0.0098, "step": 40270 }, { "epoch": 0.99, "learning_rate": 1.8065262898657804e-05, "loss": 0.0124, "step": 40280 }, { "epoch": 0.99, "learning_rate": 1.8064770348479253e-05, "loss": 0.0111, "step": 40290 }, { "epoch": 0.99, "learning_rate": 1.8064277798300702e-05, "loss": 0.021, "step": 40300 }, { "epoch": 0.99, "learning_rate": 1.8063785248122154e-05, "loss": 0.0226, "step": 40310 }, { "epoch": 0.99, "learning_rate": 1.8063292697943604e-05, "loss": 0.0119, "step": 40320 }, { "epoch": 0.99, "learning_rate": 1.8062800147765056e-05, "loss": 0.0136, "step": 40330 }, { "epoch": 0.99, "learning_rate": 1.8062307597586505e-05, "loss": 0.028, "step": 40340 }, { "epoch": 0.99, "learning_rate": 1.8061815047407954e-05, "loss": 0.0131, "step": 40350 }, { "epoch": 0.99, "learning_rate": 1.8061322497229407e-05, "loss": 0.0133, "step": 40360 }, { "epoch": 0.99, "learning_rate": 1.8060829947050856e-05, "loss": 0.0134, "step": 40370 }, { "epoch": 0.99, "learning_rate": 1.806033739687231e-05, "loss": 0.0138, "step": 40380 }, { "epoch": 0.99, "learning_rate": 1.8059844846693758e-05, "loss": 0.0147, "step": 40390 }, { "epoch": 0.99, "learning_rate": 1.805935229651521e-05, "loss": 0.0131, "step": 40400 }, { "epoch": 0.99, "learning_rate": 1.805885974633666e-05, "loss": 0.0347, "step": 40410 }, { "epoch": 0.99, "learning_rate": 1.8058367196158112e-05, "loss": 0.0167, "step": 40420 }, { "epoch": 0.99, "learning_rate": 1.805787464597956e-05, "loss": 0.0144, "step": 40430 }, { "epoch": 0.99, "learning_rate": 1.805738209580101e-05, "loss": 0.0162, "step": 40440 }, { "epoch": 0.99, "learning_rate": 1.8056889545622463e-05, "loss": 0.0141, "step": 40450 }, { "epoch": 0.99, "learning_rate": 1.8056396995443912e-05, "loss": 0.0142, "step": 40460 }, { "epoch": 0.99, "learning_rate": 1.805590444526536e-05, "loss": 0.0133, "step": 40470 }, { "epoch": 0.99, "learning_rate": 1.8055411895086814e-05, "loss": 0.0138, "step": 40480 }, { "epoch": 0.99, "learning_rate": 1.8054919344908263e-05, "loss": 0.0148, "step": 40490 }, { "epoch": 0.99, "learning_rate": 1.8054426794729712e-05, "loss": 0.0176, "step": 40500 }, { "epoch": 1.0, "learning_rate": 1.8053934244551165e-05, "loss": 0.0124, "step": 40510 }, { "epoch": 1.0, "learning_rate": 1.8053441694372617e-05, "loss": 0.0141, "step": 40520 }, { "epoch": 1.0, "learning_rate": 1.8052949144194066e-05, "loss": 0.0141, "step": 40530 }, { "epoch": 1.0, "learning_rate": 1.805245659401552e-05, "loss": 0.0137, "step": 40540 }, { "epoch": 1.0, "learning_rate": 1.8051964043836968e-05, "loss": 0.0139, "step": 40550 }, { "epoch": 1.0, "learning_rate": 1.805147149365842e-05, "loss": 0.0153, "step": 40560 }, { "epoch": 1.0, "learning_rate": 1.805097894347987e-05, "loss": 0.0131, "step": 40570 }, { "epoch": 1.0, "learning_rate": 1.805048639330132e-05, "loss": 0.0137, "step": 40580 }, { "epoch": 1.0, "learning_rate": 1.804999384312277e-05, "loss": 0.0146, "step": 40590 }, { "epoch": 1.0, "learning_rate": 1.804950129294422e-05, "loss": 0.0123, "step": 40600 }, { "epoch": 1.0, "learning_rate": 1.804900874276567e-05, "loss": 0.0116, "step": 40610 }, { "epoch": 1.0, "learning_rate": 1.8048516192587122e-05, "loss": 0.0138, "step": 40620 }, { "epoch": 1.0, "learning_rate": 1.804802364240857e-05, "loss": 0.013, "step": 40630 }, { "epoch": 1.0, "learning_rate": 1.804753109223002e-05, "loss": 0.0124, "step": 40640 }, { "epoch": 1.0, "learning_rate": 1.8047038542051473e-05, "loss": 0.0146, "step": 40650 }, { "epoch": 1.0, "learning_rate": 1.8046545991872922e-05, "loss": 0.0157, "step": 40660 }, { "epoch": 1.0, "learning_rate": 1.8046053441694375e-05, "loss": 0.0156, "step": 40670 }, { "epoch": 1.0, "learning_rate": 1.8045560891515824e-05, "loss": 0.0158, "step": 40680 }, { "epoch": 1.0, "learning_rate": 1.8045068341337276e-05, "loss": 0.0136, "step": 40690 }, { "epoch": 1.0, "learning_rate": 1.8044575791158725e-05, "loss": 0.0134, "step": 40700 }, { "epoch": 1.0, "learning_rate": 1.8044083240980178e-05, "loss": 0.0138, "step": 40710 }, { "epoch": 1.0, "learning_rate": 1.8043590690801627e-05, "loss": 0.0116, "step": 40720 }, { "epoch": 1.0, "learning_rate": 1.8043098140623076e-05, "loss": 0.0144, "step": 40730 }, { "epoch": 1.0, "learning_rate": 1.804260559044453e-05, "loss": 0.0119, "step": 40740 }, { "epoch": 1.0, "learning_rate": 1.8042113040265978e-05, "loss": 0.0143, "step": 40750 }, { "epoch": 1.0, "learning_rate": 1.8041620490087427e-05, "loss": 0.0116, "step": 40760 }, { "epoch": 1.0, "learning_rate": 1.804112793990888e-05, "loss": 0.0163, "step": 40770 }, { "epoch": 1.0, "learning_rate": 1.804063538973033e-05, "loss": 0.0109, "step": 40780 }, { "epoch": 1.0, "learning_rate": 1.804014283955178e-05, "loss": 0.02, "step": 40790 }, { "epoch": 1.0, "learning_rate": 1.803965028937323e-05, "loss": 0.0162, "step": 40800 }, { "epoch": 1.0, "learning_rate": 1.8039157739194683e-05, "loss": 0.0136, "step": 40810 }, { "epoch": 1.0, "learning_rate": 1.8038665189016132e-05, "loss": 0.0213, "step": 40820 }, { "epoch": 1.0, "learning_rate": 1.8038172638837585e-05, "loss": 0.0168, "step": 40830 }, { "epoch": 1.0, "learning_rate": 1.8037680088659034e-05, "loss": 0.0134, "step": 40840 }, { "epoch": 1.0, "learning_rate": 1.8037187538480486e-05, "loss": 0.0127, "step": 40850 }, { "epoch": 1.0, "learning_rate": 1.8036694988301936e-05, "loss": 0.0113, "step": 40860 }, { "epoch": 1.0, "learning_rate": 1.8036202438123385e-05, "loss": 0.0124, "step": 40870 }, { "epoch": 1.0, "learning_rate": 1.8035709887944837e-05, "loss": 0.0218, "step": 40880 }, { "epoch": 1.0, "learning_rate": 1.8035217337766286e-05, "loss": 0.0131, "step": 40890 }, { "epoch": 1.0, "learning_rate": 1.8034724787587736e-05, "loss": 0.0119, "step": 40900 }, { "epoch": 1.01, "learning_rate": 1.8034232237409188e-05, "loss": 0.0221, "step": 40910 }, { "epoch": 1.01, "learning_rate": 1.8033739687230637e-05, "loss": 0.0136, "step": 40920 }, { "epoch": 1.01, "learning_rate": 1.8033247137052086e-05, "loss": 0.0147, "step": 40930 }, { "epoch": 1.01, "learning_rate": 1.803275458687354e-05, "loss": 0.016, "step": 40940 }, { "epoch": 1.01, "learning_rate": 1.8032262036694988e-05, "loss": 0.0127, "step": 40950 }, { "epoch": 1.01, "learning_rate": 1.803176948651644e-05, "loss": 0.0121, "step": 40960 }, { "epoch": 1.01, "learning_rate": 1.8031276936337893e-05, "loss": 0.0117, "step": 40970 }, { "epoch": 1.01, "learning_rate": 1.8030784386159342e-05, "loss": 0.0155, "step": 40980 }, { "epoch": 1.01, "learning_rate": 1.803029183598079e-05, "loss": 0.012, "step": 40990 }, { "epoch": 1.01, "learning_rate": 1.8029799285802244e-05, "loss": 0.0126, "step": 41000 }, { "epoch": 1.01, "learning_rate": 1.8029306735623693e-05, "loss": 0.011, "step": 41010 }, { "epoch": 1.01, "learning_rate": 1.8028814185445142e-05, "loss": 0.0146, "step": 41020 }, { "epoch": 1.01, "learning_rate": 1.8028321635266595e-05, "loss": 0.0117, "step": 41030 }, { "epoch": 1.01, "learning_rate": 1.8027829085088044e-05, "loss": 0.0191, "step": 41040 }, { "epoch": 1.01, "learning_rate": 1.8027336534909496e-05, "loss": 0.0125, "step": 41050 }, { "epoch": 1.01, "learning_rate": 1.8026843984730946e-05, "loss": 0.0243, "step": 41060 }, { "epoch": 1.01, "learning_rate": 1.8026351434552395e-05, "loss": 0.0146, "step": 41070 }, { "epoch": 1.01, "learning_rate": 1.8025858884373847e-05, "loss": 0.0106, "step": 41080 }, { "epoch": 1.01, "learning_rate": 1.8025366334195296e-05, "loss": 0.016, "step": 41090 }, { "epoch": 1.01, "learning_rate": 1.802487378401675e-05, "loss": 0.0134, "step": 41100 }, { "epoch": 1.01, "learning_rate": 1.8024381233838198e-05, "loss": 0.0116, "step": 41110 }, { "epoch": 1.01, "learning_rate": 1.802388868365965e-05, "loss": 0.0147, "step": 41120 }, { "epoch": 1.01, "learning_rate": 1.80233961334811e-05, "loss": 0.0145, "step": 41130 }, { "epoch": 1.01, "learning_rate": 1.8022903583302552e-05, "loss": 0.0099, "step": 41140 }, { "epoch": 1.01, "learning_rate": 1.8022411033124e-05, "loss": 0.02, "step": 41150 }, { "epoch": 1.01, "learning_rate": 1.802191848294545e-05, "loss": 0.0158, "step": 41160 }, { "epoch": 1.01, "learning_rate": 1.8021425932766903e-05, "loss": 0.0276, "step": 41170 }, { "epoch": 1.01, "learning_rate": 1.8020933382588352e-05, "loss": 0.0132, "step": 41180 }, { "epoch": 1.01, "learning_rate": 1.80204408324098e-05, "loss": 0.018, "step": 41190 }, { "epoch": 1.01, "learning_rate": 1.8019948282231254e-05, "loss": 0.0124, "step": 41200 }, { "epoch": 1.01, "learning_rate": 1.8019455732052703e-05, "loss": 0.015, "step": 41210 }, { "epoch": 1.01, "learning_rate": 1.8018963181874152e-05, "loss": 0.0109, "step": 41220 }, { "epoch": 1.01, "learning_rate": 1.8018470631695605e-05, "loss": 0.014, "step": 41230 }, { "epoch": 1.01, "learning_rate": 1.8017978081517057e-05, "loss": 0.0265, "step": 41240 }, { "epoch": 1.01, "learning_rate": 1.8017485531338507e-05, "loss": 0.0126, "step": 41250 }, { "epoch": 1.01, "learning_rate": 1.801699298115996e-05, "loss": 0.0129, "step": 41260 }, { "epoch": 1.01, "learning_rate": 1.8016500430981408e-05, "loss": 0.0191, "step": 41270 }, { "epoch": 1.01, "learning_rate": 1.801600788080286e-05, "loss": 0.0128, "step": 41280 }, { "epoch": 1.01, "learning_rate": 1.801551533062431e-05, "loss": 0.0111, "step": 41290 }, { "epoch": 1.01, "learning_rate": 1.801502278044576e-05, "loss": 0.0124, "step": 41300 }, { "epoch": 1.01, "learning_rate": 1.801453023026721e-05, "loss": 0.0127, "step": 41310 }, { "epoch": 1.02, "learning_rate": 1.801403768008866e-05, "loss": 0.0119, "step": 41320 }, { "epoch": 1.02, "learning_rate": 1.801354512991011e-05, "loss": 0.014, "step": 41330 }, { "epoch": 1.02, "learning_rate": 1.8013052579731562e-05, "loss": 0.0114, "step": 41340 }, { "epoch": 1.02, "learning_rate": 1.801256002955301e-05, "loss": 0.0119, "step": 41350 }, { "epoch": 1.02, "learning_rate": 1.801206747937446e-05, "loss": 0.0133, "step": 41360 }, { "epoch": 1.02, "learning_rate": 1.8011574929195913e-05, "loss": 0.0142, "step": 41370 }, { "epoch": 1.02, "learning_rate": 1.8011082379017362e-05, "loss": 0.0118, "step": 41380 }, { "epoch": 1.02, "learning_rate": 1.8010589828838815e-05, "loss": 0.0194, "step": 41390 }, { "epoch": 1.02, "learning_rate": 1.8010097278660264e-05, "loss": 0.0131, "step": 41400 }, { "epoch": 1.02, "learning_rate": 1.8009604728481717e-05, "loss": 0.0138, "step": 41410 }, { "epoch": 1.02, "learning_rate": 1.8009112178303166e-05, "loss": 0.0157, "step": 41420 }, { "epoch": 1.02, "learning_rate": 1.800861962812462e-05, "loss": 0.0142, "step": 41430 }, { "epoch": 1.02, "learning_rate": 1.8008127077946067e-05, "loss": 0.0111, "step": 41440 }, { "epoch": 1.02, "learning_rate": 1.8007634527767517e-05, "loss": 0.0121, "step": 41450 }, { "epoch": 1.02, "learning_rate": 1.800714197758897e-05, "loss": 0.0127, "step": 41460 }, { "epoch": 1.02, "learning_rate": 1.800664942741042e-05, "loss": 0.0117, "step": 41470 }, { "epoch": 1.02, "learning_rate": 1.8006156877231867e-05, "loss": 0.0131, "step": 41480 }, { "epoch": 1.02, "learning_rate": 1.800566432705332e-05, "loss": 0.0121, "step": 41490 }, { "epoch": 1.02, "learning_rate": 1.800517177687477e-05, "loss": 0.0138, "step": 41500 }, { "epoch": 1.02, "learning_rate": 1.800467922669622e-05, "loss": 0.0115, "step": 41510 }, { "epoch": 1.02, "learning_rate": 1.800418667651767e-05, "loss": 0.0125, "step": 41520 }, { "epoch": 1.02, "learning_rate": 1.8003694126339123e-05, "loss": 0.0216, "step": 41530 }, { "epoch": 1.02, "learning_rate": 1.8003201576160573e-05, "loss": 0.0125, "step": 41540 }, { "epoch": 1.02, "learning_rate": 1.8002709025982025e-05, "loss": 0.0113, "step": 41550 }, { "epoch": 1.02, "learning_rate": 1.8002216475803474e-05, "loss": 0.0098, "step": 41560 }, { "epoch": 1.02, "learning_rate": 1.8001723925624927e-05, "loss": 0.0234, "step": 41570 }, { "epoch": 1.02, "learning_rate": 1.8001231375446376e-05, "loss": 0.0123, "step": 41580 }, { "epoch": 1.02, "learning_rate": 1.8000738825267825e-05, "loss": 0.0122, "step": 41590 }, { "epoch": 1.02, "learning_rate": 1.8000246275089278e-05, "loss": 0.0118, "step": 41600 }, { "epoch": 1.02, "learning_rate": 1.7999753724910727e-05, "loss": 0.0115, "step": 41610 }, { "epoch": 1.02, "learning_rate": 1.7999261174732176e-05, "loss": 0.0152, "step": 41620 }, { "epoch": 1.02, "learning_rate": 1.799876862455363e-05, "loss": 0.0135, "step": 41630 }, { "epoch": 1.02, "learning_rate": 1.7998276074375078e-05, "loss": 0.02, "step": 41640 }, { "epoch": 1.02, "learning_rate": 1.7997783524196527e-05, "loss": 0.0159, "step": 41650 }, { "epoch": 1.02, "learning_rate": 1.799729097401798e-05, "loss": 0.0136, "step": 41660 }, { "epoch": 1.02, "learning_rate": 1.799679842383943e-05, "loss": 0.0167, "step": 41670 }, { "epoch": 1.02, "learning_rate": 1.799630587366088e-05, "loss": 0.0115, "step": 41680 }, { "epoch": 1.02, "learning_rate": 1.7995813323482333e-05, "loss": 0.0157, "step": 41690 }, { "epoch": 1.02, "learning_rate": 1.7995320773303783e-05, "loss": 0.0189, "step": 41700 }, { "epoch": 1.02, "learning_rate": 1.7994828223125232e-05, "loss": 0.0132, "step": 41710 }, { "epoch": 1.02, "learning_rate": 1.7994335672946684e-05, "loss": 0.0152, "step": 41720 }, { "epoch": 1.03, "learning_rate": 1.7993843122768133e-05, "loss": 0.0197, "step": 41730 }, { "epoch": 1.03, "learning_rate": 1.7993350572589583e-05, "loss": 0.0204, "step": 41740 }, { "epoch": 1.03, "learning_rate": 1.7992858022411035e-05, "loss": 0.0135, "step": 41750 }, { "epoch": 1.03, "learning_rate": 1.7992365472232484e-05, "loss": 0.0129, "step": 41760 }, { "epoch": 1.03, "learning_rate": 1.7991872922053937e-05, "loss": 0.0126, "step": 41770 }, { "epoch": 1.03, "learning_rate": 1.7991380371875386e-05, "loss": 0.0133, "step": 41780 }, { "epoch": 1.03, "learning_rate": 1.7990887821696835e-05, "loss": 0.0126, "step": 41790 }, { "epoch": 1.03, "learning_rate": 1.7990395271518288e-05, "loss": 0.0126, "step": 41800 }, { "epoch": 1.03, "learning_rate": 1.7989902721339737e-05, "loss": 0.0139, "step": 41810 }, { "epoch": 1.03, "learning_rate": 1.798941017116119e-05, "loss": 0.0214, "step": 41820 }, { "epoch": 1.03, "learning_rate": 1.798891762098264e-05, "loss": 0.013, "step": 41830 }, { "epoch": 1.03, "learning_rate": 1.798842507080409e-05, "loss": 0.0171, "step": 41840 }, { "epoch": 1.03, "learning_rate": 1.798793252062554e-05, "loss": 0.0176, "step": 41850 }, { "epoch": 1.03, "learning_rate": 1.7987439970446993e-05, "loss": 0.0136, "step": 41860 }, { "epoch": 1.03, "learning_rate": 1.7986947420268442e-05, "loss": 0.0128, "step": 41870 }, { "epoch": 1.03, "learning_rate": 1.798645487008989e-05, "loss": 0.0171, "step": 41880 }, { "epoch": 1.03, "learning_rate": 1.7985962319911344e-05, "loss": 0.0243, "step": 41890 }, { "epoch": 1.03, "learning_rate": 1.7985469769732793e-05, "loss": 0.0134, "step": 41900 }, { "epoch": 1.03, "learning_rate": 1.7984977219554242e-05, "loss": 0.0108, "step": 41910 }, { "epoch": 1.03, "learning_rate": 1.7984484669375694e-05, "loss": 0.0119, "step": 41920 }, { "epoch": 1.03, "learning_rate": 1.7983992119197144e-05, "loss": 0.0184, "step": 41930 }, { "epoch": 1.03, "learning_rate": 1.7983499569018593e-05, "loss": 0.0118, "step": 41940 }, { "epoch": 1.03, "learning_rate": 1.7983007018840045e-05, "loss": 0.0193, "step": 41950 }, { "epoch": 1.03, "learning_rate": 1.7982514468661498e-05, "loss": 0.0195, "step": 41960 }, { "epoch": 1.03, "learning_rate": 1.7982021918482947e-05, "loss": 0.0176, "step": 41970 }, { "epoch": 1.03, "learning_rate": 1.79815293683044e-05, "loss": 0.0122, "step": 41980 }, { "epoch": 1.03, "learning_rate": 1.798103681812585e-05, "loss": 0.0126, "step": 41990 }, { "epoch": 1.03, "learning_rate": 1.79805442679473e-05, "loss": 0.0109, "step": 42000 }, { "epoch": 1.03, "learning_rate": 1.798005171776875e-05, "loss": 0.0135, "step": 42010 }, { "epoch": 1.03, "learning_rate": 1.79795591675902e-05, "loss": 0.013, "step": 42020 }, { "epoch": 1.03, "learning_rate": 1.7979066617411652e-05, "loss": 0.0181, "step": 42030 }, { "epoch": 1.03, "learning_rate": 1.79785740672331e-05, "loss": 0.0122, "step": 42040 }, { "epoch": 1.03, "learning_rate": 1.797808151705455e-05, "loss": 0.0138, "step": 42050 }, { "epoch": 1.03, "learning_rate": 1.7977588966876003e-05, "loss": 0.0143, "step": 42060 }, { "epoch": 1.03, "learning_rate": 1.7977096416697452e-05, "loss": 0.0135, "step": 42070 }, { "epoch": 1.03, "learning_rate": 1.79766038665189e-05, "loss": 0.0116, "step": 42080 }, { "epoch": 1.03, "learning_rate": 1.7976111316340354e-05, "loss": 0.0121, "step": 42090 }, { "epoch": 1.03, "learning_rate": 1.7975618766161803e-05, "loss": 0.0152, "step": 42100 }, { "epoch": 1.03, "learning_rate": 1.7975126215983255e-05, "loss": 0.0133, "step": 42110 }, { "epoch": 1.03, "learning_rate": 1.7974633665804704e-05, "loss": 0.0195, "step": 42120 }, { "epoch": 1.04, "learning_rate": 1.7974141115626157e-05, "loss": 0.0132, "step": 42130 }, { "epoch": 1.04, "learning_rate": 1.7973648565447606e-05, "loss": 0.012, "step": 42140 }, { "epoch": 1.04, "learning_rate": 1.797315601526906e-05, "loss": 0.0119, "step": 42150 }, { "epoch": 1.04, "learning_rate": 1.7972663465090508e-05, "loss": 0.0155, "step": 42160 }, { "epoch": 1.04, "learning_rate": 1.7972170914911957e-05, "loss": 0.0125, "step": 42170 }, { "epoch": 1.04, "learning_rate": 1.797167836473341e-05, "loss": 0.0141, "step": 42180 }, { "epoch": 1.04, "learning_rate": 1.797118581455486e-05, "loss": 0.0124, "step": 42190 }, { "epoch": 1.04, "learning_rate": 1.7970693264376308e-05, "loss": 0.0208, "step": 42200 }, { "epoch": 1.04, "learning_rate": 1.797020071419776e-05, "loss": 0.0112, "step": 42210 }, { "epoch": 1.04, "learning_rate": 1.796970816401921e-05, "loss": 0.0203, "step": 42220 }, { "epoch": 1.04, "learning_rate": 1.7969215613840662e-05, "loss": 0.0156, "step": 42230 }, { "epoch": 1.04, "learning_rate": 1.796872306366211e-05, "loss": 0.0134, "step": 42240 }, { "epoch": 1.04, "learning_rate": 1.7968230513483564e-05, "loss": 0.0121, "step": 42250 }, { "epoch": 1.04, "learning_rate": 1.7967737963305013e-05, "loss": 0.0309, "step": 42260 }, { "epoch": 1.04, "learning_rate": 1.7967245413126465e-05, "loss": 0.014, "step": 42270 }, { "epoch": 1.04, "learning_rate": 1.7966752862947915e-05, "loss": 0.0134, "step": 42280 }, { "epoch": 1.04, "learning_rate": 1.7966260312769367e-05, "loss": 0.0131, "step": 42290 }, { "epoch": 1.04, "learning_rate": 1.7965767762590816e-05, "loss": 0.0151, "step": 42300 }, { "epoch": 1.04, "learning_rate": 1.7965275212412265e-05, "loss": 0.0163, "step": 42310 }, { "epoch": 1.04, "learning_rate": 1.7964782662233718e-05, "loss": 0.0107, "step": 42320 }, { "epoch": 1.04, "learning_rate": 1.7964290112055167e-05, "loss": 0.0116, "step": 42330 }, { "epoch": 1.04, "learning_rate": 1.7963797561876616e-05, "loss": 0.0103, "step": 42340 }, { "epoch": 1.04, "learning_rate": 1.796330501169807e-05, "loss": 0.0136, "step": 42350 }, { "epoch": 1.04, "learning_rate": 1.7962812461519518e-05, "loss": 0.0128, "step": 42360 }, { "epoch": 1.04, "learning_rate": 1.7962319911340967e-05, "loss": 0.0149, "step": 42370 }, { "epoch": 1.04, "learning_rate": 1.796182736116242e-05, "loss": 0.0139, "step": 42380 }, { "epoch": 1.04, "learning_rate": 1.796133481098387e-05, "loss": 0.0197, "step": 42390 }, { "epoch": 1.04, "learning_rate": 1.796084226080532e-05, "loss": 0.012, "step": 42400 }, { "epoch": 1.04, "learning_rate": 1.7960349710626774e-05, "loss": 0.0124, "step": 42410 }, { "epoch": 1.04, "learning_rate": 1.7959857160448223e-05, "loss": 0.013, "step": 42420 }, { "epoch": 1.04, "learning_rate": 1.7959364610269672e-05, "loss": 0.0132, "step": 42430 }, { "epoch": 1.04, "learning_rate": 1.7958872060091125e-05, "loss": 0.0139, "step": 42440 }, { "epoch": 1.04, "learning_rate": 1.7958379509912574e-05, "loss": 0.0126, "step": 42450 }, { "epoch": 1.04, "learning_rate": 1.7957886959734023e-05, "loss": 0.022, "step": 42460 }, { "epoch": 1.04, "learning_rate": 1.7957394409555475e-05, "loss": 0.0142, "step": 42470 }, { "epoch": 1.04, "learning_rate": 1.7956901859376925e-05, "loss": 0.0117, "step": 42480 }, { "epoch": 1.04, "learning_rate": 1.7956409309198377e-05, "loss": 0.0123, "step": 42490 }, { "epoch": 1.04, "learning_rate": 1.7955916759019826e-05, "loss": 0.0127, "step": 42500 }, { "epoch": 1.04, "learning_rate": 1.7955424208841275e-05, "loss": 0.0131, "step": 42510 }, { "epoch": 1.04, "learning_rate": 1.7954931658662728e-05, "loss": 0.0121, "step": 42520 }, { "epoch": 1.04, "learning_rate": 1.7954439108484177e-05, "loss": 0.0134, "step": 42530 }, { "epoch": 1.05, "learning_rate": 1.795394655830563e-05, "loss": 0.0153, "step": 42540 }, { "epoch": 1.05, "learning_rate": 1.795345400812708e-05, "loss": 0.0109, "step": 42550 }, { "epoch": 1.05, "learning_rate": 1.795296145794853e-05, "loss": 0.0131, "step": 42560 }, { "epoch": 1.05, "learning_rate": 1.795246890776998e-05, "loss": 0.0124, "step": 42570 }, { "epoch": 1.05, "learning_rate": 1.7951976357591433e-05, "loss": 0.0115, "step": 42580 }, { "epoch": 1.05, "learning_rate": 1.7951483807412882e-05, "loss": 0.0119, "step": 42590 }, { "epoch": 1.05, "learning_rate": 1.795099125723433e-05, "loss": 0.0153, "step": 42600 }, { "epoch": 1.05, "learning_rate": 1.7950498707055784e-05, "loss": 0.0117, "step": 42610 }, { "epoch": 1.05, "learning_rate": 1.7950006156877233e-05, "loss": 0.0267, "step": 42620 }, { "epoch": 1.05, "learning_rate": 1.7949513606698682e-05, "loss": 0.0122, "step": 42630 }, { "epoch": 1.05, "learning_rate": 1.7949021056520135e-05, "loss": 0.0108, "step": 42640 }, { "epoch": 1.05, "learning_rate": 1.7948528506341584e-05, "loss": 0.0142, "step": 42650 }, { "epoch": 1.05, "learning_rate": 1.7948035956163033e-05, "loss": 0.011, "step": 42660 }, { "epoch": 1.05, "learning_rate": 1.7947543405984486e-05, "loss": 0.0134, "step": 42670 }, { "epoch": 1.05, "learning_rate": 1.7947050855805938e-05, "loss": 0.0117, "step": 42680 }, { "epoch": 1.05, "learning_rate": 1.7946558305627387e-05, "loss": 0.0121, "step": 42690 }, { "epoch": 1.05, "learning_rate": 1.794606575544884e-05, "loss": 0.0227, "step": 42700 }, { "epoch": 1.05, "learning_rate": 1.794557320527029e-05, "loss": 0.014, "step": 42710 }, { "epoch": 1.05, "learning_rate": 1.794508065509174e-05, "loss": 0.0203, "step": 42720 }, { "epoch": 1.05, "learning_rate": 1.794458810491319e-05, "loss": 0.012, "step": 42730 }, { "epoch": 1.05, "learning_rate": 1.794409555473464e-05, "loss": 0.0116, "step": 42740 }, { "epoch": 1.05, "learning_rate": 1.7943603004556092e-05, "loss": 0.0115, "step": 42750 }, { "epoch": 1.05, "learning_rate": 1.794311045437754e-05, "loss": 0.0144, "step": 42760 }, { "epoch": 1.05, "learning_rate": 1.794261790419899e-05, "loss": 0.0112, "step": 42770 }, { "epoch": 1.05, "learning_rate": 1.7942125354020443e-05, "loss": 0.0124, "step": 42780 }, { "epoch": 1.05, "learning_rate": 1.7941632803841892e-05, "loss": 0.0143, "step": 42790 }, { "epoch": 1.05, "learning_rate": 1.794114025366334e-05, "loss": 0.0203, "step": 42800 }, { "epoch": 1.05, "learning_rate": 1.7940647703484794e-05, "loss": 0.0206, "step": 42810 }, { "epoch": 1.05, "learning_rate": 1.7940155153306243e-05, "loss": 0.0161, "step": 42820 }, { "epoch": 1.05, "learning_rate": 1.7939662603127696e-05, "loss": 0.0183, "step": 42830 }, { "epoch": 1.05, "learning_rate": 1.7939170052949145e-05, "loss": 0.0111, "step": 42840 }, { "epoch": 1.05, "learning_rate": 1.7938677502770597e-05, "loss": 0.0146, "step": 42850 }, { "epoch": 1.05, "learning_rate": 1.7938184952592046e-05, "loss": 0.0142, "step": 42860 }, { "epoch": 1.05, "learning_rate": 1.79376924024135e-05, "loss": 0.0207, "step": 42870 }, { "epoch": 1.05, "learning_rate": 1.7937199852234948e-05, "loss": 0.0121, "step": 42880 }, { "epoch": 1.05, "learning_rate": 1.7936707302056397e-05, "loss": 0.0224, "step": 42890 }, { "epoch": 1.05, "learning_rate": 1.793621475187785e-05, "loss": 0.0255, "step": 42900 }, { "epoch": 1.05, "learning_rate": 1.79357222016993e-05, "loss": 0.0129, "step": 42910 }, { "epoch": 1.05, "learning_rate": 1.7935229651520748e-05, "loss": 0.0136, "step": 42920 }, { "epoch": 1.05, "learning_rate": 1.79347371013422e-05, "loss": 0.0153, "step": 42930 }, { "epoch": 1.05, "learning_rate": 1.793424455116365e-05, "loss": 0.0137, "step": 42940 }, { "epoch": 1.06, "learning_rate": 1.7933752000985102e-05, "loss": 0.0127, "step": 42950 }, { "epoch": 1.06, "learning_rate": 1.793325945080655e-05, "loss": 0.0119, "step": 42960 }, { "epoch": 1.06, "learning_rate": 1.7932766900628004e-05, "loss": 0.0118, "step": 42970 }, { "epoch": 1.06, "learning_rate": 1.7932274350449453e-05, "loss": 0.0126, "step": 42980 }, { "epoch": 1.06, "learning_rate": 1.7931781800270906e-05, "loss": 0.0107, "step": 42990 }, { "epoch": 1.06, "learning_rate": 1.7931289250092355e-05, "loss": 0.0169, "step": 43000 }, { "epoch": 1.06, "learning_rate": 1.7930796699913807e-05, "loss": 0.0136, "step": 43010 }, { "epoch": 1.06, "learning_rate": 1.7930304149735257e-05, "loss": 0.0121, "step": 43020 }, { "epoch": 1.06, "learning_rate": 1.7929811599556706e-05, "loss": 0.0106, "step": 43030 }, { "epoch": 1.06, "learning_rate": 1.7929319049378158e-05, "loss": 0.0114, "step": 43040 }, { "epoch": 1.06, "learning_rate": 1.7928826499199607e-05, "loss": 0.0135, "step": 43050 }, { "epoch": 1.06, "learning_rate": 1.7928333949021057e-05, "loss": 0.0115, "step": 43060 }, { "epoch": 1.06, "learning_rate": 1.792784139884251e-05, "loss": 0.0125, "step": 43070 }, { "epoch": 1.06, "learning_rate": 1.7927348848663958e-05, "loss": 0.0154, "step": 43080 }, { "epoch": 1.06, "learning_rate": 1.7926856298485407e-05, "loss": 0.0112, "step": 43090 }, { "epoch": 1.06, "learning_rate": 1.792636374830686e-05, "loss": 0.0129, "step": 43100 }, { "epoch": 1.06, "learning_rate": 1.792587119812831e-05, "loss": 0.0141, "step": 43110 }, { "epoch": 1.06, "learning_rate": 1.792537864794976e-05, "loss": 0.0126, "step": 43120 }, { "epoch": 1.06, "learning_rate": 1.7924886097771214e-05, "loss": 0.0122, "step": 43130 }, { "epoch": 1.06, "learning_rate": 1.7924393547592663e-05, "loss": 0.0116, "step": 43140 }, { "epoch": 1.06, "learning_rate": 1.7923900997414112e-05, "loss": 0.0123, "step": 43150 }, { "epoch": 1.06, "learning_rate": 1.7923408447235565e-05, "loss": 0.0106, "step": 43160 }, { "epoch": 1.06, "learning_rate": 1.7922915897057014e-05, "loss": 0.0123, "step": 43170 }, { "epoch": 1.06, "learning_rate": 1.7922423346878463e-05, "loss": 0.0122, "step": 43180 }, { "epoch": 1.06, "learning_rate": 1.7921930796699916e-05, "loss": 0.011, "step": 43190 }, { "epoch": 1.06, "learning_rate": 1.7921438246521365e-05, "loss": 0.012, "step": 43200 }, { "epoch": 1.06, "learning_rate": 1.7920945696342818e-05, "loss": 0.0192, "step": 43210 }, { "epoch": 1.06, "learning_rate": 1.7920453146164267e-05, "loss": 0.0137, "step": 43220 }, { "epoch": 1.06, "learning_rate": 1.7919960595985716e-05, "loss": 0.0117, "step": 43230 }, { "epoch": 1.06, "learning_rate": 1.791946804580717e-05, "loss": 0.0125, "step": 43240 }, { "epoch": 1.06, "learning_rate": 1.7918975495628617e-05, "loss": 0.0125, "step": 43250 }, { "epoch": 1.06, "learning_rate": 1.791848294545007e-05, "loss": 0.0176, "step": 43260 }, { "epoch": 1.06, "learning_rate": 1.791799039527152e-05, "loss": 0.0178, "step": 43270 }, { "epoch": 1.06, "learning_rate": 1.7917497845092972e-05, "loss": 0.0136, "step": 43280 }, { "epoch": 1.06, "learning_rate": 1.791700529491442e-05, "loss": 0.0292, "step": 43290 }, { "epoch": 1.06, "learning_rate": 1.7916512744735873e-05, "loss": 0.0169, "step": 43300 }, { "epoch": 1.06, "learning_rate": 1.7916020194557323e-05, "loss": 0.0124, "step": 43310 }, { "epoch": 1.06, "learning_rate": 1.791552764437877e-05, "loss": 0.0138, "step": 43320 }, { "epoch": 1.06, "learning_rate": 1.7915035094200224e-05, "loss": 0.015, "step": 43330 }, { "epoch": 1.06, "learning_rate": 1.7914542544021673e-05, "loss": 0.0115, "step": 43340 }, { "epoch": 1.06, "learning_rate": 1.7914049993843123e-05, "loss": 0.0114, "step": 43350 }, { "epoch": 1.07, "learning_rate": 1.7913557443664575e-05, "loss": 0.0105, "step": 43360 }, { "epoch": 1.07, "learning_rate": 1.7913064893486024e-05, "loss": 0.0139, "step": 43370 }, { "epoch": 1.07, "learning_rate": 1.7912572343307473e-05, "loss": 0.0119, "step": 43380 }, { "epoch": 1.07, "learning_rate": 1.7912079793128926e-05, "loss": 0.0121, "step": 43390 }, { "epoch": 1.07, "learning_rate": 1.791158724295038e-05, "loss": 0.0165, "step": 43400 }, { "epoch": 1.07, "learning_rate": 1.7911094692771828e-05, "loss": 0.0131, "step": 43410 }, { "epoch": 1.07, "learning_rate": 1.791060214259328e-05, "loss": 0.0127, "step": 43420 }, { "epoch": 1.07, "learning_rate": 1.791010959241473e-05, "loss": 0.0127, "step": 43430 }, { "epoch": 1.07, "learning_rate": 1.7909617042236182e-05, "loss": 0.0243, "step": 43440 }, { "epoch": 1.07, "learning_rate": 1.790912449205763e-05, "loss": 0.0124, "step": 43450 }, { "epoch": 1.07, "learning_rate": 1.790863194187908e-05, "loss": 0.0137, "step": 43460 }, { "epoch": 1.07, "learning_rate": 1.7908139391700533e-05, "loss": 0.0118, "step": 43470 }, { "epoch": 1.07, "learning_rate": 1.7907646841521982e-05, "loss": 0.0132, "step": 43480 }, { "epoch": 1.07, "learning_rate": 1.790715429134343e-05, "loss": 0.0184, "step": 43490 }, { "epoch": 1.07, "learning_rate": 1.7906661741164883e-05, "loss": 0.015, "step": 43500 }, { "epoch": 1.07, "learning_rate": 1.7906169190986333e-05, "loss": 0.0132, "step": 43510 }, { "epoch": 1.07, "learning_rate": 1.7905676640807782e-05, "loss": 0.0196, "step": 43520 }, { "epoch": 1.07, "learning_rate": 1.7905184090629234e-05, "loss": 0.011, "step": 43530 }, { "epoch": 1.07, "learning_rate": 1.7904691540450683e-05, "loss": 0.0135, "step": 43540 }, { "epoch": 1.07, "learning_rate": 1.7904198990272136e-05, "loss": 0.0117, "step": 43550 }, { "epoch": 1.07, "learning_rate": 1.7903706440093585e-05, "loss": 0.0227, "step": 43560 }, { "epoch": 1.07, "learning_rate": 1.7903213889915038e-05, "loss": 0.0138, "step": 43570 }, { "epoch": 1.07, "learning_rate": 1.7902721339736487e-05, "loss": 0.0266, "step": 43580 }, { "epoch": 1.07, "learning_rate": 1.790222878955794e-05, "loss": 0.0124, "step": 43590 }, { "epoch": 1.07, "learning_rate": 1.790173623937939e-05, "loss": 0.015, "step": 43600 }, { "epoch": 1.07, "learning_rate": 1.7901243689200838e-05, "loss": 0.0147, "step": 43610 }, { "epoch": 1.07, "learning_rate": 1.790075113902229e-05, "loss": 0.0123, "step": 43620 }, { "epoch": 1.07, "learning_rate": 1.790025858884374e-05, "loss": 0.0129, "step": 43630 }, { "epoch": 1.07, "learning_rate": 1.789976603866519e-05, "loss": 0.0127, "step": 43640 }, { "epoch": 1.07, "learning_rate": 1.789927348848664e-05, "loss": 0.0115, "step": 43650 }, { "epoch": 1.07, "learning_rate": 1.789878093830809e-05, "loss": 0.0105, "step": 43660 }, { "epoch": 1.07, "learning_rate": 1.7898288388129543e-05, "loss": 0.0132, "step": 43670 }, { "epoch": 1.07, "learning_rate": 1.7897795837950992e-05, "loss": 0.0118, "step": 43680 }, { "epoch": 1.07, "learning_rate": 1.7897303287772444e-05, "loss": 0.0132, "step": 43690 }, { "epoch": 1.07, "learning_rate": 1.7896810737593894e-05, "loss": 0.0143, "step": 43700 }, { "epoch": 1.07, "learning_rate": 1.7896318187415346e-05, "loss": 0.0121, "step": 43710 }, { "epoch": 1.07, "learning_rate": 1.7895825637236795e-05, "loss": 0.0125, "step": 43720 }, { "epoch": 1.07, "learning_rate": 1.7895333087058248e-05, "loss": 0.0128, "step": 43730 }, { "epoch": 1.07, "learning_rate": 1.7894840536879697e-05, "loss": 0.0168, "step": 43740 }, { "epoch": 1.07, "learning_rate": 1.7894347986701146e-05, "loss": 0.0111, "step": 43750 }, { "epoch": 1.08, "learning_rate": 1.78938554365226e-05, "loss": 0.0208, "step": 43760 }, { "epoch": 1.08, "learning_rate": 1.7893362886344048e-05, "loss": 0.0112, "step": 43770 }, { "epoch": 1.08, "learning_rate": 1.7892870336165497e-05, "loss": 0.0112, "step": 43780 }, { "epoch": 1.08, "learning_rate": 1.789237778598695e-05, "loss": 0.0169, "step": 43790 }, { "epoch": 1.08, "learning_rate": 1.78918852358084e-05, "loss": 0.0135, "step": 43800 }, { "epoch": 1.08, "learning_rate": 1.7891392685629848e-05, "loss": 0.0131, "step": 43810 }, { "epoch": 1.08, "learning_rate": 1.78909001354513e-05, "loss": 0.0139, "step": 43820 }, { "epoch": 1.08, "learning_rate": 1.789040758527275e-05, "loss": 0.0129, "step": 43830 }, { "epoch": 1.08, "learning_rate": 1.7889915035094202e-05, "loss": 0.0159, "step": 43840 }, { "epoch": 1.08, "learning_rate": 1.7889422484915654e-05, "loss": 0.0194, "step": 43850 }, { "epoch": 1.08, "learning_rate": 1.7888929934737104e-05, "loss": 0.0187, "step": 43860 }, { "epoch": 1.08, "learning_rate": 1.7888437384558553e-05, "loss": 0.013, "step": 43870 }, { "epoch": 1.08, "learning_rate": 1.7887944834380005e-05, "loss": 0.0108, "step": 43880 }, { "epoch": 1.08, "learning_rate": 1.7887452284201454e-05, "loss": 0.013, "step": 43890 }, { "epoch": 1.08, "learning_rate": 1.7886959734022904e-05, "loss": 0.0136, "step": 43900 }, { "epoch": 1.08, "learning_rate": 1.7886467183844356e-05, "loss": 0.0129, "step": 43910 }, { "epoch": 1.08, "learning_rate": 1.7885974633665805e-05, "loss": 0.0156, "step": 43920 }, { "epoch": 1.08, "learning_rate": 1.7885482083487258e-05, "loss": 0.0204, "step": 43930 }, { "epoch": 1.08, "learning_rate": 1.7884989533308707e-05, "loss": 0.0128, "step": 43940 }, { "epoch": 1.08, "learning_rate": 1.7884496983130156e-05, "loss": 0.0333, "step": 43950 }, { "epoch": 1.08, "learning_rate": 1.788400443295161e-05, "loss": 0.0129, "step": 43960 }, { "epoch": 1.08, "learning_rate": 1.7883511882773058e-05, "loss": 0.0225, "step": 43970 }, { "epoch": 1.08, "learning_rate": 1.788301933259451e-05, "loss": 0.0123, "step": 43980 }, { "epoch": 1.08, "learning_rate": 1.788252678241596e-05, "loss": 0.013, "step": 43990 }, { "epoch": 1.08, "learning_rate": 1.7882034232237412e-05, "loss": 0.0109, "step": 44000 }, { "epoch": 1.08, "learning_rate": 1.788154168205886e-05, "loss": 0.0137, "step": 44010 }, { "epoch": 1.08, "learning_rate": 1.7881049131880314e-05, "loss": 0.0143, "step": 44020 }, { "epoch": 1.08, "learning_rate": 1.7880556581701763e-05, "loss": 0.0181, "step": 44030 }, { "epoch": 1.08, "learning_rate": 1.7880064031523212e-05, "loss": 0.0129, "step": 44040 }, { "epoch": 1.08, "learning_rate": 1.7879571481344665e-05, "loss": 0.0122, "step": 44050 }, { "epoch": 1.08, "learning_rate": 1.7879078931166114e-05, "loss": 0.0133, "step": 44060 }, { "epoch": 1.08, "learning_rate": 1.7878586380987563e-05, "loss": 0.0115, "step": 44070 }, { "epoch": 1.08, "learning_rate": 1.7878093830809015e-05, "loss": 0.0109, "step": 44080 }, { "epoch": 1.08, "learning_rate": 1.7877601280630465e-05, "loss": 0.0133, "step": 44090 }, { "epoch": 1.08, "learning_rate": 1.7877108730451914e-05, "loss": 0.0149, "step": 44100 }, { "epoch": 1.08, "learning_rate": 1.7876616180273366e-05, "loss": 0.0103, "step": 44110 }, { "epoch": 1.08, "learning_rate": 1.787612363009482e-05, "loss": 0.013, "step": 44120 }, { "epoch": 1.08, "learning_rate": 1.7875631079916268e-05, "loss": 0.019, "step": 44130 }, { "epoch": 1.08, "learning_rate": 1.787513852973772e-05, "loss": 0.0133, "step": 44140 }, { "epoch": 1.08, "learning_rate": 1.787464597955917e-05, "loss": 0.0179, "step": 44150 }, { "epoch": 1.08, "learning_rate": 1.7874153429380622e-05, "loss": 0.0113, "step": 44160 }, { "epoch": 1.09, "learning_rate": 1.787366087920207e-05, "loss": 0.0119, "step": 44170 }, { "epoch": 1.09, "learning_rate": 1.787316832902352e-05, "loss": 0.0125, "step": 44180 }, { "epoch": 1.09, "learning_rate": 1.7872675778844973e-05, "loss": 0.0115, "step": 44190 }, { "epoch": 1.09, "learning_rate": 1.7872183228666422e-05, "loss": 0.0099, "step": 44200 }, { "epoch": 1.09, "learning_rate": 1.787169067848787e-05, "loss": 0.0153, "step": 44210 }, { "epoch": 1.09, "learning_rate": 1.7871198128309324e-05, "loss": 0.0191, "step": 44220 }, { "epoch": 1.09, "learning_rate": 1.7870705578130773e-05, "loss": 0.0117, "step": 44230 }, { "epoch": 1.09, "learning_rate": 1.7870213027952222e-05, "loss": 0.0135, "step": 44240 }, { "epoch": 1.09, "learning_rate": 1.7869720477773675e-05, "loss": 0.0128, "step": 44250 }, { "epoch": 1.09, "learning_rate": 1.7869227927595124e-05, "loss": 0.0116, "step": 44260 }, { "epoch": 1.09, "learning_rate": 1.7868735377416576e-05, "loss": 0.0115, "step": 44270 }, { "epoch": 1.09, "learning_rate": 1.7868242827238025e-05, "loss": 0.0127, "step": 44280 }, { "epoch": 1.09, "learning_rate": 1.7867750277059478e-05, "loss": 0.011, "step": 44290 }, { "epoch": 1.09, "learning_rate": 1.7867257726880927e-05, "loss": 0.0135, "step": 44300 }, { "epoch": 1.09, "learning_rate": 1.786676517670238e-05, "loss": 0.0141, "step": 44310 }, { "epoch": 1.09, "learning_rate": 1.786627262652383e-05, "loss": 0.0149, "step": 44320 }, { "epoch": 1.09, "learning_rate": 1.7865780076345278e-05, "loss": 0.0107, "step": 44330 }, { "epoch": 1.09, "learning_rate": 1.786528752616673e-05, "loss": 0.0214, "step": 44340 }, { "epoch": 1.09, "learning_rate": 1.786479497598818e-05, "loss": 0.0119, "step": 44350 }, { "epoch": 1.09, "learning_rate": 1.786430242580963e-05, "loss": 0.0144, "step": 44360 }, { "epoch": 1.09, "learning_rate": 1.786380987563108e-05, "loss": 0.0129, "step": 44370 }, { "epoch": 1.09, "learning_rate": 1.786331732545253e-05, "loss": 0.0117, "step": 44380 }, { "epoch": 1.09, "learning_rate": 1.7862824775273983e-05, "loss": 0.0107, "step": 44390 }, { "epoch": 1.09, "learning_rate": 1.7862332225095432e-05, "loss": 0.0136, "step": 44400 }, { "epoch": 1.09, "learning_rate": 1.7861839674916885e-05, "loss": 0.0131, "step": 44410 }, { "epoch": 1.09, "learning_rate": 1.7861347124738334e-05, "loss": 0.0113, "step": 44420 }, { "epoch": 1.09, "learning_rate": 1.7860854574559786e-05, "loss": 0.0119, "step": 44430 }, { "epoch": 1.09, "learning_rate": 1.7860362024381236e-05, "loss": 0.0238, "step": 44440 }, { "epoch": 1.09, "learning_rate": 1.7859869474202688e-05, "loss": 0.0108, "step": 44450 }, { "epoch": 1.09, "learning_rate": 1.7859376924024137e-05, "loss": 0.0122, "step": 44460 }, { "epoch": 1.09, "learning_rate": 1.7858884373845586e-05, "loss": 0.011, "step": 44470 }, { "epoch": 1.09, "learning_rate": 1.785839182366704e-05, "loss": 0.0138, "step": 44480 }, { "epoch": 1.09, "learning_rate": 1.7857899273488488e-05, "loss": 0.0127, "step": 44490 }, { "epoch": 1.09, "learning_rate": 1.7857406723309937e-05, "loss": 0.0133, "step": 44500 }, { "epoch": 1.09, "learning_rate": 1.785691417313139e-05, "loss": 0.0215, "step": 44510 }, { "epoch": 1.09, "learning_rate": 1.785642162295284e-05, "loss": 0.017, "step": 44520 }, { "epoch": 1.09, "learning_rate": 1.7855929072774288e-05, "loss": 0.0107, "step": 44530 }, { "epoch": 1.09, "learning_rate": 1.785543652259574e-05, "loss": 0.0116, "step": 44540 }, { "epoch": 1.09, "learning_rate": 1.785494397241719e-05, "loss": 0.0122, "step": 44550 }, { "epoch": 1.09, "learning_rate": 1.7854451422238642e-05, "loss": 0.0128, "step": 44560 }, { "epoch": 1.09, "learning_rate": 1.7853958872060095e-05, "loss": 0.0143, "step": 44570 }, { "epoch": 1.1, "learning_rate": 1.7853466321881544e-05, "loss": 0.0146, "step": 44580 }, { "epoch": 1.1, "learning_rate": 1.7852973771702993e-05, "loss": 0.0105, "step": 44590 }, { "epoch": 1.1, "learning_rate": 1.7852481221524446e-05, "loss": 0.0123, "step": 44600 }, { "epoch": 1.1, "learning_rate": 1.7851988671345895e-05, "loss": 0.0147, "step": 44610 }, { "epoch": 1.1, "learning_rate": 1.7851496121167344e-05, "loss": 0.0119, "step": 44620 }, { "epoch": 1.1, "learning_rate": 1.7851003570988796e-05, "loss": 0.0138, "step": 44630 }, { "epoch": 1.1, "learning_rate": 1.7850511020810246e-05, "loss": 0.0117, "step": 44640 }, { "epoch": 1.1, "learning_rate": 1.7850018470631698e-05, "loss": 0.0132, "step": 44650 }, { "epoch": 1.1, "learning_rate": 1.7849525920453147e-05, "loss": 0.0102, "step": 44660 }, { "epoch": 1.1, "learning_rate": 1.7849033370274596e-05, "loss": 0.0145, "step": 44670 }, { "epoch": 1.1, "learning_rate": 1.784854082009605e-05, "loss": 0.0115, "step": 44680 }, { "epoch": 1.1, "learning_rate": 1.7848048269917498e-05, "loss": 0.013, "step": 44690 }, { "epoch": 1.1, "learning_rate": 1.784755571973895e-05, "loss": 0.0142, "step": 44700 }, { "epoch": 1.1, "learning_rate": 1.78470631695604e-05, "loss": 0.0109, "step": 44710 }, { "epoch": 1.1, "learning_rate": 1.7846570619381852e-05, "loss": 0.0118, "step": 44720 }, { "epoch": 1.1, "learning_rate": 1.78460780692033e-05, "loss": 0.0122, "step": 44730 }, { "epoch": 1.1, "learning_rate": 1.7845585519024754e-05, "loss": 0.0116, "step": 44740 }, { "epoch": 1.1, "learning_rate": 1.7845092968846203e-05, "loss": 0.0108, "step": 44750 }, { "epoch": 1.1, "learning_rate": 1.7844600418667652e-05, "loss": 0.0118, "step": 44760 }, { "epoch": 1.1, "learning_rate": 1.7844107868489105e-05, "loss": 0.0126, "step": 44770 }, { "epoch": 1.1, "learning_rate": 1.7843615318310554e-05, "loss": 0.012, "step": 44780 }, { "epoch": 1.1, "learning_rate": 1.7843122768132003e-05, "loss": 0.0134, "step": 44790 }, { "epoch": 1.1, "learning_rate": 1.7842630217953456e-05, "loss": 0.0178, "step": 44800 }, { "epoch": 1.1, "learning_rate": 1.7842137667774905e-05, "loss": 0.0132, "step": 44810 }, { "epoch": 1.1, "learning_rate": 1.7841645117596354e-05, "loss": 0.0105, "step": 44820 }, { "epoch": 1.1, "learning_rate": 1.7841152567417807e-05, "loss": 0.0213, "step": 44830 }, { "epoch": 1.1, "learning_rate": 1.784066001723926e-05, "loss": 0.0119, "step": 44840 }, { "epoch": 1.1, "learning_rate": 1.7840167467060708e-05, "loss": 0.0145, "step": 44850 }, { "epoch": 1.1, "learning_rate": 1.783967491688216e-05, "loss": 0.0117, "step": 44860 }, { "epoch": 1.1, "learning_rate": 1.783918236670361e-05, "loss": 0.0209, "step": 44870 }, { "epoch": 1.1, "learning_rate": 1.7838689816525062e-05, "loss": 0.0109, "step": 44880 }, { "epoch": 1.1, "learning_rate": 1.783819726634651e-05, "loss": 0.0176, "step": 44890 }, { "epoch": 1.1, "learning_rate": 1.783770471616796e-05, "loss": 0.0129, "step": 44900 }, { "epoch": 1.1, "learning_rate": 1.7837212165989413e-05, "loss": 0.0126, "step": 44910 }, { "epoch": 1.1, "learning_rate": 1.7836719615810862e-05, "loss": 0.0111, "step": 44920 }, { "epoch": 1.1, "learning_rate": 1.783622706563231e-05, "loss": 0.011, "step": 44930 }, { "epoch": 1.1, "learning_rate": 1.7835734515453764e-05, "loss": 0.0122, "step": 44940 }, { "epoch": 1.1, "learning_rate": 1.7835241965275213e-05, "loss": 0.0088, "step": 44950 }, { "epoch": 1.1, "learning_rate": 1.7834749415096662e-05, "loss": 0.0101, "step": 44960 }, { "epoch": 1.1, "learning_rate": 1.7834256864918115e-05, "loss": 0.0113, "step": 44970 }, { "epoch": 1.11, "learning_rate": 1.7833764314739564e-05, "loss": 0.0183, "step": 44980 }, { "epoch": 1.11, "learning_rate": 1.7833271764561017e-05, "loss": 0.011, "step": 44990 }, { "epoch": 1.11, "learning_rate": 1.7832779214382466e-05, "loss": 0.0122, "step": 45000 }, { "epoch": 1.11, "eval_bleu": 98.0067, "eval_gen_len_ratio": 0.9996, "eval_loss": 0.009897531941533089, "eval_runtime": 253.2632, "eval_samples_per_second": 7.368, "eval_steps_per_second": 0.233, "step": 45000 }, { "epoch": 1.11, "learning_rate": 1.783228666420392e-05, "loss": 0.0105, "step": 45010 }, { "epoch": 1.11, "learning_rate": 1.7831794114025368e-05, "loss": 0.015, "step": 45020 }, { "epoch": 1.11, "learning_rate": 1.783130156384682e-05, "loss": 0.0138, "step": 45030 }, { "epoch": 1.11, "learning_rate": 1.783080901366827e-05, "loss": 0.014, "step": 45040 }, { "epoch": 1.11, "learning_rate": 1.783031646348972e-05, "loss": 0.0107, "step": 45050 }, { "epoch": 1.11, "learning_rate": 1.782982391331117e-05, "loss": 0.0194, "step": 45060 }, { "epoch": 1.11, "learning_rate": 1.782933136313262e-05, "loss": 0.0114, "step": 45070 }, { "epoch": 1.11, "learning_rate": 1.782883881295407e-05, "loss": 0.0115, "step": 45080 }, { "epoch": 1.11, "learning_rate": 1.7828346262775522e-05, "loss": 0.0124, "step": 45090 }, { "epoch": 1.11, "learning_rate": 1.782785371259697e-05, "loss": 0.013, "step": 45100 }, { "epoch": 1.11, "learning_rate": 1.7827361162418423e-05, "loss": 0.0137, "step": 45110 }, { "epoch": 1.11, "learning_rate": 1.7826868612239873e-05, "loss": 0.0115, "step": 45120 }, { "epoch": 1.11, "learning_rate": 1.7826376062061325e-05, "loss": 0.0123, "step": 45130 }, { "epoch": 1.11, "learning_rate": 1.7825883511882774e-05, "loss": 0.0144, "step": 45140 }, { "epoch": 1.11, "learning_rate": 1.7825390961704227e-05, "loss": 0.0172, "step": 45150 }, { "epoch": 1.11, "learning_rate": 1.7824898411525676e-05, "loss": 0.0122, "step": 45160 }, { "epoch": 1.11, "learning_rate": 1.782440586134713e-05, "loss": 0.0215, "step": 45170 }, { "epoch": 1.11, "learning_rate": 1.7823913311168578e-05, "loss": 0.0122, "step": 45180 }, { "epoch": 1.11, "learning_rate": 1.7823420760990027e-05, "loss": 0.0123, "step": 45190 }, { "epoch": 1.11, "learning_rate": 1.782292821081148e-05, "loss": 0.011, "step": 45200 }, { "epoch": 1.11, "learning_rate": 1.782243566063293e-05, "loss": 0.012, "step": 45210 }, { "epoch": 1.11, "learning_rate": 1.7821943110454378e-05, "loss": 0.018, "step": 45220 }, { "epoch": 1.11, "learning_rate": 1.782145056027583e-05, "loss": 0.0142, "step": 45230 }, { "epoch": 1.11, "learning_rate": 1.782095801009728e-05, "loss": 0.012, "step": 45240 }, { "epoch": 1.11, "learning_rate": 1.782046545991873e-05, "loss": 0.0105, "step": 45250 }, { "epoch": 1.11, "learning_rate": 1.781997290974018e-05, "loss": 0.0142, "step": 45260 }, { "epoch": 1.11, "learning_rate": 1.781948035956163e-05, "loss": 0.0135, "step": 45270 }, { "epoch": 1.11, "learning_rate": 1.7818987809383083e-05, "loss": 0.014, "step": 45280 }, { "epoch": 1.11, "learning_rate": 1.7818495259204535e-05, "loss": 0.0117, "step": 45290 }, { "epoch": 1.11, "learning_rate": 1.7818002709025984e-05, "loss": 0.0119, "step": 45300 }, { "epoch": 1.11, "learning_rate": 1.7817510158847433e-05, "loss": 0.0148, "step": 45310 }, { "epoch": 1.11, "learning_rate": 1.7817017608668886e-05, "loss": 0.0124, "step": 45320 }, { "epoch": 1.11, "learning_rate": 1.7816525058490335e-05, "loss": 0.0119, "step": 45330 }, { "epoch": 1.11, "learning_rate": 1.7816032508311784e-05, "loss": 0.0132, "step": 45340 }, { "epoch": 1.11, "learning_rate": 1.7815539958133237e-05, "loss": 0.0122, "step": 45350 }, { "epoch": 1.11, "learning_rate": 1.7815047407954686e-05, "loss": 0.013, "step": 45360 }, { "epoch": 1.11, "learning_rate": 1.781455485777614e-05, "loss": 0.0134, "step": 45370 }, { "epoch": 1.11, "learning_rate": 1.7814062307597588e-05, "loss": 0.0121, "step": 45380 }, { "epoch": 1.12, "learning_rate": 1.7813569757419037e-05, "loss": 0.0137, "step": 45390 }, { "epoch": 1.12, "learning_rate": 1.781307720724049e-05, "loss": 0.0092, "step": 45400 }, { "epoch": 1.12, "learning_rate": 1.781258465706194e-05, "loss": 0.0175, "step": 45410 }, { "epoch": 1.12, "learning_rate": 1.781209210688339e-05, "loss": 0.0144, "step": 45420 }, { "epoch": 1.12, "learning_rate": 1.781159955670484e-05, "loss": 0.014, "step": 45430 }, { "epoch": 1.12, "learning_rate": 1.7811107006526293e-05, "loss": 0.0148, "step": 45440 }, { "epoch": 1.12, "learning_rate": 1.7810614456347742e-05, "loss": 0.0123, "step": 45450 }, { "epoch": 1.12, "learning_rate": 1.7810121906169194e-05, "loss": 0.0196, "step": 45460 }, { "epoch": 1.12, "learning_rate": 1.7809629355990644e-05, "loss": 0.0212, "step": 45470 }, { "epoch": 1.12, "learning_rate": 1.7809136805812093e-05, "loss": 0.0099, "step": 45480 }, { "epoch": 1.12, "learning_rate": 1.7808644255633545e-05, "loss": 0.0127, "step": 45490 }, { "epoch": 1.12, "learning_rate": 1.7808151705454994e-05, "loss": 0.0114, "step": 45500 }, { "epoch": 1.12, "learning_rate": 1.7807659155276444e-05, "loss": 0.0156, "step": 45510 }, { "epoch": 1.12, "learning_rate": 1.7807166605097896e-05, "loss": 0.0203, "step": 45520 }, { "epoch": 1.12, "learning_rate": 1.7806674054919345e-05, "loss": 0.01, "step": 45530 }, { "epoch": 1.12, "learning_rate": 1.7806181504740794e-05, "loss": 0.0115, "step": 45540 }, { "epoch": 1.12, "learning_rate": 1.7805688954562247e-05, "loss": 0.0139, "step": 45550 }, { "epoch": 1.12, "learning_rate": 1.78051964043837e-05, "loss": 0.0128, "step": 45560 }, { "epoch": 1.12, "learning_rate": 1.780470385420515e-05, "loss": 0.0126, "step": 45570 }, { "epoch": 1.12, "learning_rate": 1.78042113040266e-05, "loss": 0.0121, "step": 45580 }, { "epoch": 1.12, "learning_rate": 1.780371875384805e-05, "loss": 0.0126, "step": 45590 }, { "epoch": 1.12, "learning_rate": 1.7803226203669503e-05, "loss": 0.0113, "step": 45600 }, { "epoch": 1.12, "learning_rate": 1.7802733653490952e-05, "loss": 0.012, "step": 45610 }, { "epoch": 1.12, "learning_rate": 1.78022411033124e-05, "loss": 0.0139, "step": 45620 }, { "epoch": 1.12, "learning_rate": 1.7801748553133854e-05, "loss": 0.0169, "step": 45630 }, { "epoch": 1.12, "learning_rate": 1.7801256002955303e-05, "loss": 0.0108, "step": 45640 }, { "epoch": 1.12, "learning_rate": 1.7800763452776752e-05, "loss": 0.0201, "step": 45650 }, { "epoch": 1.12, "learning_rate": 1.7800270902598204e-05, "loss": 0.0198, "step": 45660 }, { "epoch": 1.12, "learning_rate": 1.7799778352419654e-05, "loss": 0.0115, "step": 45670 }, { "epoch": 1.12, "learning_rate": 1.7799285802241103e-05, "loss": 0.0149, "step": 45680 }, { "epoch": 1.12, "learning_rate": 1.7798793252062555e-05, "loss": 0.0141, "step": 45690 }, { "epoch": 1.12, "learning_rate": 1.7798300701884004e-05, "loss": 0.0117, "step": 45700 }, { "epoch": 1.12, "learning_rate": 1.7797808151705457e-05, "loss": 0.0149, "step": 45710 }, { "epoch": 1.12, "learning_rate": 1.7797315601526906e-05, "loss": 0.0129, "step": 45720 }, { "epoch": 1.12, "learning_rate": 1.779682305134836e-05, "loss": 0.0134, "step": 45730 }, { "epoch": 1.12, "learning_rate": 1.7796330501169808e-05, "loss": 0.0132, "step": 45740 }, { "epoch": 1.12, "learning_rate": 1.779583795099126e-05, "loss": 0.0104, "step": 45750 }, { "epoch": 1.12, "learning_rate": 1.779534540081271e-05, "loss": 0.0115, "step": 45760 }, { "epoch": 1.12, "learning_rate": 1.779485285063416e-05, "loss": 0.0174, "step": 45770 }, { "epoch": 1.12, "learning_rate": 1.779436030045561e-05, "loss": 0.0126, "step": 45780 }, { "epoch": 1.12, "learning_rate": 1.779386775027706e-05, "loss": 0.0153, "step": 45790 }, { "epoch": 1.13, "learning_rate": 1.779337520009851e-05, "loss": 0.0102, "step": 45800 }, { "epoch": 1.13, "learning_rate": 1.7792882649919962e-05, "loss": 0.0124, "step": 45810 }, { "epoch": 1.13, "learning_rate": 1.779239009974141e-05, "loss": 0.0109, "step": 45820 }, { "epoch": 1.13, "learning_rate": 1.7791897549562864e-05, "loss": 0.0232, "step": 45830 }, { "epoch": 1.13, "learning_rate": 1.7791404999384313e-05, "loss": 0.0298, "step": 45840 }, { "epoch": 1.13, "learning_rate": 1.7790912449205765e-05, "loss": 0.0192, "step": 45850 }, { "epoch": 1.13, "learning_rate": 1.7790419899027215e-05, "loss": 0.0147, "step": 45860 }, { "epoch": 1.13, "learning_rate": 1.7789927348848667e-05, "loss": 0.0197, "step": 45870 }, { "epoch": 1.13, "learning_rate": 1.7789434798670116e-05, "loss": 0.0224, "step": 45880 }, { "epoch": 1.13, "learning_rate": 1.778894224849157e-05, "loss": 0.0125, "step": 45890 }, { "epoch": 1.13, "learning_rate": 1.7788449698313018e-05, "loss": 0.0134, "step": 45900 }, { "epoch": 1.13, "learning_rate": 1.7787957148134467e-05, "loss": 0.0116, "step": 45910 }, { "epoch": 1.13, "learning_rate": 1.778746459795592e-05, "loss": 0.019, "step": 45920 }, { "epoch": 1.13, "learning_rate": 1.778697204777737e-05, "loss": 0.0181, "step": 45930 }, { "epoch": 1.13, "learning_rate": 1.7786479497598818e-05, "loss": 0.0222, "step": 45940 }, { "epoch": 1.13, "learning_rate": 1.778598694742027e-05, "loss": 0.0133, "step": 45950 }, { "epoch": 1.13, "learning_rate": 1.778549439724172e-05, "loss": 0.0205, "step": 45960 }, { "epoch": 1.13, "learning_rate": 1.778500184706317e-05, "loss": 0.0116, "step": 45970 }, { "epoch": 1.13, "learning_rate": 1.778450929688462e-05, "loss": 0.0149, "step": 45980 }, { "epoch": 1.13, "learning_rate": 1.778401674670607e-05, "loss": 0.0123, "step": 45990 }, { "epoch": 1.13, "learning_rate": 1.7783524196527523e-05, "loss": 0.019, "step": 46000 }, { "epoch": 1.13, "learning_rate": 1.7783031646348976e-05, "loss": 0.0133, "step": 46010 }, { "epoch": 1.13, "learning_rate": 1.7782539096170425e-05, "loss": 0.0128, "step": 46020 }, { "epoch": 1.13, "learning_rate": 1.7782046545991874e-05, "loss": 0.0132, "step": 46030 }, { "epoch": 1.13, "learning_rate": 1.7781553995813326e-05, "loss": 0.0145, "step": 46040 }, { "epoch": 1.13, "learning_rate": 1.7781061445634775e-05, "loss": 0.013, "step": 46050 }, { "epoch": 1.13, "learning_rate": 1.7780568895456225e-05, "loss": 0.0175, "step": 46060 }, { "epoch": 1.13, "learning_rate": 1.7780076345277677e-05, "loss": 0.0127, "step": 46070 }, { "epoch": 1.13, "learning_rate": 1.7779583795099126e-05, "loss": 0.0114, "step": 46080 }, { "epoch": 1.13, "learning_rate": 1.777909124492058e-05, "loss": 0.0116, "step": 46090 }, { "epoch": 1.13, "learning_rate": 1.7778598694742028e-05, "loss": 0.0184, "step": 46100 }, { "epoch": 1.13, "learning_rate": 1.7778106144563477e-05, "loss": 0.0119, "step": 46110 }, { "epoch": 1.13, "learning_rate": 1.777761359438493e-05, "loss": 0.0211, "step": 46120 }, { "epoch": 1.13, "learning_rate": 1.777712104420638e-05, "loss": 0.0106, "step": 46130 }, { "epoch": 1.13, "learning_rate": 1.777662849402783e-05, "loss": 0.0111, "step": 46140 }, { "epoch": 1.13, "learning_rate": 1.777613594384928e-05, "loss": 0.0128, "step": 46150 }, { "epoch": 1.13, "learning_rate": 1.7775643393670733e-05, "loss": 0.0123, "step": 46160 }, { "epoch": 1.13, "learning_rate": 1.7775150843492182e-05, "loss": 0.011, "step": 46170 }, { "epoch": 1.13, "learning_rate": 1.7774658293313635e-05, "loss": 0.0137, "step": 46180 }, { "epoch": 1.13, "learning_rate": 1.7774165743135084e-05, "loss": 0.0104, "step": 46190 }, { "epoch": 1.13, "learning_rate": 1.7773673192956533e-05, "loss": 0.0125, "step": 46200 }, { "epoch": 1.14, "learning_rate": 1.7773180642777986e-05, "loss": 0.0122, "step": 46210 }, { "epoch": 1.14, "learning_rate": 1.7772688092599435e-05, "loss": 0.0132, "step": 46220 }, { "epoch": 1.14, "learning_rate": 1.7772195542420884e-05, "loss": 0.0252, "step": 46230 }, { "epoch": 1.14, "learning_rate": 1.7771702992242336e-05, "loss": 0.0206, "step": 46240 }, { "epoch": 1.14, "learning_rate": 1.7771210442063786e-05, "loss": 0.0122, "step": 46250 }, { "epoch": 1.14, "learning_rate": 1.7770717891885235e-05, "loss": 0.0112, "step": 46260 }, { "epoch": 1.14, "learning_rate": 1.7770225341706687e-05, "loss": 0.0144, "step": 46270 }, { "epoch": 1.14, "learning_rate": 1.776973279152814e-05, "loss": 0.0146, "step": 46280 }, { "epoch": 1.14, "learning_rate": 1.776924024134959e-05, "loss": 0.0133, "step": 46290 }, { "epoch": 1.14, "learning_rate": 1.776874769117104e-05, "loss": 0.0219, "step": 46300 }, { "epoch": 1.14, "learning_rate": 1.776825514099249e-05, "loss": 0.0122, "step": 46310 }, { "epoch": 1.14, "learning_rate": 1.7767762590813943e-05, "loss": 0.0192, "step": 46320 }, { "epoch": 1.14, "learning_rate": 1.7767270040635392e-05, "loss": 0.0123, "step": 46330 }, { "epoch": 1.14, "learning_rate": 1.776677749045684e-05, "loss": 0.0109, "step": 46340 }, { "epoch": 1.14, "learning_rate": 1.7766284940278294e-05, "loss": 0.0113, "step": 46350 }, { "epoch": 1.14, "learning_rate": 1.7765792390099743e-05, "loss": 0.0146, "step": 46360 }, { "epoch": 1.14, "learning_rate": 1.7765299839921192e-05, "loss": 0.0139, "step": 46370 }, { "epoch": 1.14, "learning_rate": 1.7764807289742645e-05, "loss": 0.0112, "step": 46380 }, { "epoch": 1.14, "learning_rate": 1.7764314739564094e-05, "loss": 0.0124, "step": 46390 }, { "epoch": 1.14, "learning_rate": 1.7763822189385543e-05, "loss": 0.0204, "step": 46400 }, { "epoch": 1.14, "learning_rate": 1.7763329639206996e-05, "loss": 0.0176, "step": 46410 }, { "epoch": 1.14, "learning_rate": 1.7762837089028445e-05, "loss": 0.0144, "step": 46420 }, { "epoch": 1.14, "learning_rate": 1.7762344538849897e-05, "loss": 0.0122, "step": 46430 }, { "epoch": 1.14, "learning_rate": 1.7761851988671347e-05, "loss": 0.0113, "step": 46440 }, { "epoch": 1.14, "learning_rate": 1.77613594384928e-05, "loss": 0.0116, "step": 46450 }, { "epoch": 1.14, "learning_rate": 1.7760866888314248e-05, "loss": 0.012, "step": 46460 }, { "epoch": 1.14, "learning_rate": 1.77603743381357e-05, "loss": 0.0196, "step": 46470 }, { "epoch": 1.14, "learning_rate": 1.775988178795715e-05, "loss": 0.0117, "step": 46480 }, { "epoch": 1.14, "learning_rate": 1.77593892377786e-05, "loss": 0.0139, "step": 46490 }, { "epoch": 1.14, "learning_rate": 1.775889668760005e-05, "loss": 0.0116, "step": 46500 }, { "epoch": 1.14, "learning_rate": 1.77584041374215e-05, "loss": 0.0121, "step": 46510 }, { "epoch": 1.14, "learning_rate": 1.775791158724295e-05, "loss": 0.0113, "step": 46520 }, { "epoch": 1.14, "learning_rate": 1.7757419037064402e-05, "loss": 0.0128, "step": 46530 }, { "epoch": 1.14, "learning_rate": 1.775692648688585e-05, "loss": 0.0148, "step": 46540 }, { "epoch": 1.14, "learning_rate": 1.7756433936707304e-05, "loss": 0.0151, "step": 46550 }, { "epoch": 1.14, "learning_rate": 1.7755941386528753e-05, "loss": 0.0121, "step": 46560 }, { "epoch": 1.14, "learning_rate": 1.7755448836350206e-05, "loss": 0.0238, "step": 46570 }, { "epoch": 1.14, "learning_rate": 1.7754956286171655e-05, "loss": 0.0136, "step": 46580 }, { "epoch": 1.14, "learning_rate": 1.7754463735993107e-05, "loss": 0.0127, "step": 46590 }, { "epoch": 1.14, "learning_rate": 1.7753971185814557e-05, "loss": 0.012, "step": 46600 }, { "epoch": 1.15, "learning_rate": 1.775347863563601e-05, "loss": 0.011, "step": 46610 }, { "epoch": 1.15, "learning_rate": 1.7752986085457458e-05, "loss": 0.0181, "step": 46620 }, { "epoch": 1.15, "learning_rate": 1.7752493535278907e-05, "loss": 0.0182, "step": 46630 }, { "epoch": 1.15, "learning_rate": 1.775200098510036e-05, "loss": 0.0194, "step": 46640 }, { "epoch": 1.15, "learning_rate": 1.775150843492181e-05, "loss": 0.012, "step": 46650 }, { "epoch": 1.15, "learning_rate": 1.7751015884743258e-05, "loss": 0.016, "step": 46660 }, { "epoch": 1.15, "learning_rate": 1.775052333456471e-05, "loss": 0.0114, "step": 46670 }, { "epoch": 1.15, "learning_rate": 1.775003078438616e-05, "loss": 0.0163, "step": 46680 }, { "epoch": 1.15, "learning_rate": 1.774953823420761e-05, "loss": 0.0134, "step": 46690 }, { "epoch": 1.15, "learning_rate": 1.774904568402906e-05, "loss": 0.0123, "step": 46700 }, { "epoch": 1.15, "learning_rate": 1.774855313385051e-05, "loss": 0.0122, "step": 46710 }, { "epoch": 1.15, "learning_rate": 1.7748060583671963e-05, "loss": 0.0116, "step": 46720 }, { "epoch": 1.15, "learning_rate": 1.7747568033493416e-05, "loss": 0.0147, "step": 46730 }, { "epoch": 1.15, "learning_rate": 1.7747075483314865e-05, "loss": 0.0149, "step": 46740 }, { "epoch": 1.15, "learning_rate": 1.7746582933136314e-05, "loss": 0.0118, "step": 46750 }, { "epoch": 1.15, "learning_rate": 1.7746090382957767e-05, "loss": 0.0105, "step": 46760 }, { "epoch": 1.15, "learning_rate": 1.7745597832779216e-05, "loss": 0.0191, "step": 46770 }, { "epoch": 1.15, "learning_rate": 1.7745105282600665e-05, "loss": 0.0195, "step": 46780 }, { "epoch": 1.15, "learning_rate": 1.7744612732422118e-05, "loss": 0.0113, "step": 46790 }, { "epoch": 1.15, "learning_rate": 1.7744120182243567e-05, "loss": 0.011, "step": 46800 }, { "epoch": 1.15, "learning_rate": 1.774362763206502e-05, "loss": 0.0125, "step": 46810 }, { "epoch": 1.15, "learning_rate": 1.774313508188647e-05, "loss": 0.0106, "step": 46820 }, { "epoch": 1.15, "learning_rate": 1.7742642531707918e-05, "loss": 0.0093, "step": 46830 }, { "epoch": 1.15, "learning_rate": 1.774214998152937e-05, "loss": 0.0125, "step": 46840 }, { "epoch": 1.15, "learning_rate": 1.774165743135082e-05, "loss": 0.0166, "step": 46850 }, { "epoch": 1.15, "learning_rate": 1.7741164881172272e-05, "loss": 0.0114, "step": 46860 }, { "epoch": 1.15, "learning_rate": 1.774067233099372e-05, "loss": 0.0198, "step": 46870 }, { "epoch": 1.15, "learning_rate": 1.7740179780815173e-05, "loss": 0.0098, "step": 46880 }, { "epoch": 1.15, "learning_rate": 1.7739687230636623e-05, "loss": 0.0126, "step": 46890 }, { "epoch": 1.15, "learning_rate": 1.7739194680458075e-05, "loss": 0.0133, "step": 46900 }, { "epoch": 1.15, "learning_rate": 1.7738702130279524e-05, "loss": 0.0115, "step": 46910 }, { "epoch": 1.15, "learning_rate": 1.7738209580100973e-05, "loss": 0.0182, "step": 46920 }, { "epoch": 1.15, "learning_rate": 1.7737717029922426e-05, "loss": 0.0121, "step": 46930 }, { "epoch": 1.15, "learning_rate": 1.7737224479743875e-05, "loss": 0.0123, "step": 46940 }, { "epoch": 1.15, "learning_rate": 1.7736731929565324e-05, "loss": 0.0124, "step": 46950 }, { "epoch": 1.15, "learning_rate": 1.7736239379386777e-05, "loss": 0.0121, "step": 46960 }, { "epoch": 1.15, "learning_rate": 1.7735746829208226e-05, "loss": 0.0109, "step": 46970 }, { "epoch": 1.15, "learning_rate": 1.7735254279029675e-05, "loss": 0.0117, "step": 46980 }, { "epoch": 1.15, "learning_rate": 1.7734761728851128e-05, "loss": 0.0106, "step": 46990 }, { "epoch": 1.15, "learning_rate": 1.773426917867258e-05, "loss": 0.0138, "step": 47000 }, { "epoch": 1.15, "learning_rate": 1.773377662849403e-05, "loss": 0.0168, "step": 47010 }, { "epoch": 1.16, "learning_rate": 1.7733284078315482e-05, "loss": 0.0158, "step": 47020 }, { "epoch": 1.16, "learning_rate": 1.773279152813693e-05, "loss": 0.0179, "step": 47030 }, { "epoch": 1.16, "learning_rate": 1.7732298977958384e-05, "loss": 0.0133, "step": 47040 }, { "epoch": 1.16, "learning_rate": 1.7731806427779833e-05, "loss": 0.0131, "step": 47050 }, { "epoch": 1.16, "learning_rate": 1.7731313877601282e-05, "loss": 0.0175, "step": 47060 }, { "epoch": 1.16, "learning_rate": 1.7730821327422734e-05, "loss": 0.0197, "step": 47070 }, { "epoch": 1.16, "learning_rate": 1.7730328777244183e-05, "loss": 0.0127, "step": 47080 }, { "epoch": 1.16, "learning_rate": 1.7729836227065633e-05, "loss": 0.0105, "step": 47090 }, { "epoch": 1.16, "learning_rate": 1.7729343676887085e-05, "loss": 0.0124, "step": 47100 }, { "epoch": 1.16, "learning_rate": 1.7728851126708534e-05, "loss": 0.0113, "step": 47110 }, { "epoch": 1.16, "learning_rate": 1.7728358576529983e-05, "loss": 0.0116, "step": 47120 }, { "epoch": 1.16, "learning_rate": 1.7727866026351436e-05, "loss": 0.0138, "step": 47130 }, { "epoch": 1.16, "learning_rate": 1.7727373476172885e-05, "loss": 0.0106, "step": 47140 }, { "epoch": 1.16, "learning_rate": 1.7726880925994338e-05, "loss": 0.0239, "step": 47150 }, { "epoch": 1.16, "learning_rate": 1.7726388375815787e-05, "loss": 0.0105, "step": 47160 }, { "epoch": 1.16, "learning_rate": 1.772589582563724e-05, "loss": 0.0112, "step": 47170 }, { "epoch": 1.16, "learning_rate": 1.772540327545869e-05, "loss": 0.012, "step": 47180 }, { "epoch": 1.16, "learning_rate": 1.772491072528014e-05, "loss": 0.0132, "step": 47190 }, { "epoch": 1.16, "learning_rate": 1.772441817510159e-05, "loss": 0.0188, "step": 47200 }, { "epoch": 1.16, "learning_rate": 1.772392562492304e-05, "loss": 0.0323, "step": 47210 }, { "epoch": 1.16, "learning_rate": 1.7723433074744492e-05, "loss": 0.0125, "step": 47220 }, { "epoch": 1.16, "learning_rate": 1.772294052456594e-05, "loss": 0.0136, "step": 47230 }, { "epoch": 1.16, "learning_rate": 1.772244797438739e-05, "loss": 0.018, "step": 47240 }, { "epoch": 1.16, "learning_rate": 1.7721955424208843e-05, "loss": 0.015, "step": 47250 }, { "epoch": 1.16, "learning_rate": 1.7721462874030292e-05, "loss": 0.0102, "step": 47260 }, { "epoch": 1.16, "learning_rate": 1.7720970323851744e-05, "loss": 0.0194, "step": 47270 }, { "epoch": 1.16, "learning_rate": 1.7720477773673194e-05, "loss": 0.0214, "step": 47280 }, { "epoch": 1.16, "learning_rate": 1.7719985223494646e-05, "loss": 0.0117, "step": 47290 }, { "epoch": 1.16, "learning_rate": 1.7719492673316095e-05, "loss": 0.0154, "step": 47300 }, { "epoch": 1.16, "learning_rate": 1.7719000123137548e-05, "loss": 0.0221, "step": 47310 }, { "epoch": 1.16, "learning_rate": 1.7718507572958997e-05, "loss": 0.0109, "step": 47320 }, { "epoch": 1.16, "learning_rate": 1.771801502278045e-05, "loss": 0.0166, "step": 47330 }, { "epoch": 1.16, "learning_rate": 1.77175224726019e-05, "loss": 0.0147, "step": 47340 }, { "epoch": 1.16, "learning_rate": 1.7717029922423348e-05, "loss": 0.0149, "step": 47350 }, { "epoch": 1.16, "learning_rate": 1.77165373722448e-05, "loss": 0.0118, "step": 47360 }, { "epoch": 1.16, "learning_rate": 1.771604482206625e-05, "loss": 0.0118, "step": 47370 }, { "epoch": 1.16, "learning_rate": 1.77155522718877e-05, "loss": 0.0128, "step": 47380 }, { "epoch": 1.16, "learning_rate": 1.771505972170915e-05, "loss": 0.0129, "step": 47390 }, { "epoch": 1.16, "learning_rate": 1.77145671715306e-05, "loss": 0.0192, "step": 47400 }, { "epoch": 1.16, "learning_rate": 1.771407462135205e-05, "loss": 0.0111, "step": 47410 }, { "epoch": 1.16, "learning_rate": 1.7713582071173502e-05, "loss": 0.0112, "step": 47420 }, { "epoch": 1.17, "learning_rate": 1.771308952099495e-05, "loss": 0.0135, "step": 47430 }, { "epoch": 1.17, "learning_rate": 1.7712596970816404e-05, "loss": 0.0129, "step": 47440 }, { "epoch": 1.17, "learning_rate": 1.7712104420637856e-05, "loss": 0.0107, "step": 47450 }, { "epoch": 1.17, "learning_rate": 1.7711611870459305e-05, "loss": 0.0122, "step": 47460 }, { "epoch": 1.17, "learning_rate": 1.7711119320280754e-05, "loss": 0.0134, "step": 47470 }, { "epoch": 1.17, "learning_rate": 1.7710626770102207e-05, "loss": 0.0247, "step": 47480 }, { "epoch": 1.17, "learning_rate": 1.7710134219923656e-05, "loss": 0.0126, "step": 47490 }, { "epoch": 1.17, "learning_rate": 1.7709641669745105e-05, "loss": 0.0183, "step": 47500 }, { "epoch": 1.17, "learning_rate": 1.7709149119566558e-05, "loss": 0.012, "step": 47510 }, { "epoch": 1.17, "learning_rate": 1.7708656569388007e-05, "loss": 0.0133, "step": 47520 }, { "epoch": 1.17, "learning_rate": 1.770816401920946e-05, "loss": 0.0189, "step": 47530 }, { "epoch": 1.17, "learning_rate": 1.770767146903091e-05, "loss": 0.0114, "step": 47540 }, { "epoch": 1.17, "learning_rate": 1.7707178918852358e-05, "loss": 0.0117, "step": 47550 }, { "epoch": 1.17, "learning_rate": 1.770668636867381e-05, "loss": 0.0103, "step": 47560 }, { "epoch": 1.17, "learning_rate": 1.770619381849526e-05, "loss": 0.0129, "step": 47570 }, { "epoch": 1.17, "learning_rate": 1.7705701268316712e-05, "loss": 0.0152, "step": 47580 }, { "epoch": 1.17, "learning_rate": 1.770520871813816e-05, "loss": 0.0147, "step": 47590 }, { "epoch": 1.17, "learning_rate": 1.7704716167959614e-05, "loss": 0.0129, "step": 47600 }, { "epoch": 1.17, "learning_rate": 1.7704223617781063e-05, "loss": 0.012, "step": 47610 }, { "epoch": 1.17, "learning_rate": 1.7703731067602515e-05, "loss": 0.0115, "step": 47620 }, { "epoch": 1.17, "learning_rate": 1.7703238517423965e-05, "loss": 0.0105, "step": 47630 }, { "epoch": 1.17, "learning_rate": 1.7702745967245414e-05, "loss": 0.0105, "step": 47640 }, { "epoch": 1.17, "learning_rate": 1.7702253417066866e-05, "loss": 0.0153, "step": 47650 }, { "epoch": 1.17, "learning_rate": 1.7701760866888315e-05, "loss": 0.0132, "step": 47660 }, { "epoch": 1.17, "learning_rate": 1.7701268316709765e-05, "loss": 0.0125, "step": 47670 }, { "epoch": 1.17, "learning_rate": 1.7700775766531217e-05, "loss": 0.0132, "step": 47680 }, { "epoch": 1.17, "learning_rate": 1.7700283216352666e-05, "loss": 0.0122, "step": 47690 }, { "epoch": 1.17, "learning_rate": 1.7699790666174115e-05, "loss": 0.0132, "step": 47700 }, { "epoch": 1.17, "learning_rate": 1.7699298115995568e-05, "loss": 0.0108, "step": 47710 }, { "epoch": 1.17, "learning_rate": 1.769880556581702e-05, "loss": 0.025, "step": 47720 }, { "epoch": 1.17, "learning_rate": 1.769831301563847e-05, "loss": 0.0113, "step": 47730 }, { "epoch": 1.17, "learning_rate": 1.7697820465459922e-05, "loss": 0.0134, "step": 47740 }, { "epoch": 1.17, "learning_rate": 1.769732791528137e-05, "loss": 0.0114, "step": 47750 }, { "epoch": 1.17, "learning_rate": 1.7696835365102824e-05, "loss": 0.0142, "step": 47760 }, { "epoch": 1.17, "learning_rate": 1.7696342814924273e-05, "loss": 0.0166, "step": 47770 }, { "epoch": 1.17, "learning_rate": 1.7695850264745722e-05, "loss": 0.0119, "step": 47780 }, { "epoch": 1.17, "learning_rate": 1.7695357714567175e-05, "loss": 0.011, "step": 47790 }, { "epoch": 1.17, "learning_rate": 1.7694865164388624e-05, "loss": 0.0113, "step": 47800 }, { "epoch": 1.17, "learning_rate": 1.7694372614210073e-05, "loss": 0.0111, "step": 47810 }, { "epoch": 1.17, "learning_rate": 1.7693880064031526e-05, "loss": 0.0124, "step": 47820 }, { "epoch": 1.18, "learning_rate": 1.7693387513852975e-05, "loss": 0.0149, "step": 47830 }, { "epoch": 1.18, "learning_rate": 1.7692894963674424e-05, "loss": 0.0115, "step": 47840 }, { "epoch": 1.18, "learning_rate": 1.7692402413495876e-05, "loss": 0.0116, "step": 47850 }, { "epoch": 1.18, "learning_rate": 1.7691909863317325e-05, "loss": 0.0168, "step": 47860 }, { "epoch": 1.18, "learning_rate": 1.7691417313138778e-05, "loss": 0.012, "step": 47870 }, { "epoch": 1.18, "learning_rate": 1.7690924762960227e-05, "loss": 0.0114, "step": 47880 }, { "epoch": 1.18, "learning_rate": 1.769043221278168e-05, "loss": 0.0111, "step": 47890 }, { "epoch": 1.18, "learning_rate": 1.768993966260313e-05, "loss": 0.0104, "step": 47900 }, { "epoch": 1.18, "learning_rate": 1.768944711242458e-05, "loss": 0.0247, "step": 47910 }, { "epoch": 1.18, "learning_rate": 1.768895456224603e-05, "loss": 0.0126, "step": 47920 }, { "epoch": 1.18, "learning_rate": 1.768846201206748e-05, "loss": 0.0158, "step": 47930 }, { "epoch": 1.18, "learning_rate": 1.7687969461888932e-05, "loss": 0.0124, "step": 47940 }, { "epoch": 1.18, "learning_rate": 1.768747691171038e-05, "loss": 0.0193, "step": 47950 }, { "epoch": 1.18, "learning_rate": 1.768698436153183e-05, "loss": 0.021, "step": 47960 }, { "epoch": 1.18, "learning_rate": 1.7686491811353283e-05, "loss": 0.0135, "step": 47970 }, { "epoch": 1.18, "learning_rate": 1.7685999261174732e-05, "loss": 0.0135, "step": 47980 }, { "epoch": 1.18, "learning_rate": 1.7685506710996185e-05, "loss": 0.0121, "step": 47990 }, { "epoch": 1.18, "learning_rate": 1.7685014160817634e-05, "loss": 0.0116, "step": 48000 }, { "epoch": 1.18, "learning_rate": 1.7684521610639086e-05, "loss": 0.0159, "step": 48010 }, { "epoch": 1.18, "learning_rate": 1.7684029060460536e-05, "loss": 0.0129, "step": 48020 }, { "epoch": 1.18, "learning_rate": 1.7683536510281988e-05, "loss": 0.0121, "step": 48030 }, { "epoch": 1.18, "learning_rate": 1.7683043960103437e-05, "loss": 0.0114, "step": 48040 }, { "epoch": 1.18, "learning_rate": 1.768255140992489e-05, "loss": 0.0121, "step": 48050 }, { "epoch": 1.18, "learning_rate": 1.768205885974634e-05, "loss": 0.0194, "step": 48060 }, { "epoch": 1.18, "learning_rate": 1.7681566309567788e-05, "loss": 0.0115, "step": 48070 }, { "epoch": 1.18, "learning_rate": 1.768107375938924e-05, "loss": 0.0147, "step": 48080 }, { "epoch": 1.18, "learning_rate": 1.768058120921069e-05, "loss": 0.011, "step": 48090 }, { "epoch": 1.18, "learning_rate": 1.768008865903214e-05, "loss": 0.0114, "step": 48100 }, { "epoch": 1.18, "learning_rate": 1.767959610885359e-05, "loss": 0.0111, "step": 48110 }, { "epoch": 1.18, "learning_rate": 1.767910355867504e-05, "loss": 0.0121, "step": 48120 }, { "epoch": 1.18, "learning_rate": 1.767861100849649e-05, "loss": 0.0233, "step": 48130 }, { "epoch": 1.18, "learning_rate": 1.7678118458317942e-05, "loss": 0.0192, "step": 48140 }, { "epoch": 1.18, "learning_rate": 1.767762590813939e-05, "loss": 0.0175, "step": 48150 }, { "epoch": 1.18, "learning_rate": 1.7677133357960844e-05, "loss": 0.0205, "step": 48160 }, { "epoch": 1.18, "learning_rate": 1.7676640807782297e-05, "loss": 0.0126, "step": 48170 }, { "epoch": 1.18, "learning_rate": 1.7676148257603746e-05, "loss": 0.0119, "step": 48180 }, { "epoch": 1.18, "learning_rate": 1.7675655707425195e-05, "loss": 0.0166, "step": 48190 }, { "epoch": 1.18, "learning_rate": 1.7675163157246647e-05, "loss": 0.0127, "step": 48200 }, { "epoch": 1.18, "learning_rate": 1.7674670607068097e-05, "loss": 0.0128, "step": 48210 }, { "epoch": 1.18, "learning_rate": 1.7674178056889546e-05, "loss": 0.0129, "step": 48220 }, { "epoch": 1.18, "learning_rate": 1.7673685506710998e-05, "loss": 0.0159, "step": 48230 }, { "epoch": 1.19, "learning_rate": 1.7673192956532447e-05, "loss": 0.0127, "step": 48240 }, { "epoch": 1.19, "learning_rate": 1.76727004063539e-05, "loss": 0.0138, "step": 48250 }, { "epoch": 1.19, "learning_rate": 1.767220785617535e-05, "loss": 0.0101, "step": 48260 }, { "epoch": 1.19, "learning_rate": 1.7671715305996798e-05, "loss": 0.0117, "step": 48270 }, { "epoch": 1.19, "learning_rate": 1.767122275581825e-05, "loss": 0.0112, "step": 48280 }, { "epoch": 1.19, "learning_rate": 1.76707302056397e-05, "loss": 0.0117, "step": 48290 }, { "epoch": 1.19, "learning_rate": 1.7670237655461152e-05, "loss": 0.0136, "step": 48300 }, { "epoch": 1.19, "learning_rate": 1.76697451052826e-05, "loss": 0.013, "step": 48310 }, { "epoch": 1.19, "learning_rate": 1.7669252555104054e-05, "loss": 0.0116, "step": 48320 }, { "epoch": 1.19, "learning_rate": 1.7668760004925503e-05, "loss": 0.0128, "step": 48330 }, { "epoch": 1.19, "learning_rate": 1.7668267454746956e-05, "loss": 0.0131, "step": 48340 }, { "epoch": 1.19, "learning_rate": 1.7667774904568405e-05, "loss": 0.0133, "step": 48350 }, { "epoch": 1.19, "learning_rate": 1.7667282354389854e-05, "loss": 0.0131, "step": 48360 }, { "epoch": 1.19, "learning_rate": 1.7666789804211307e-05, "loss": 0.0144, "step": 48370 }, { "epoch": 1.19, "learning_rate": 1.7666297254032756e-05, "loss": 0.0121, "step": 48380 }, { "epoch": 1.19, "learning_rate": 1.7665804703854205e-05, "loss": 0.0136, "step": 48390 }, { "epoch": 1.19, "learning_rate": 1.7665312153675657e-05, "loss": 0.0135, "step": 48400 }, { "epoch": 1.19, "learning_rate": 1.7664819603497107e-05, "loss": 0.0131, "step": 48410 }, { "epoch": 1.19, "learning_rate": 1.7664327053318556e-05, "loss": 0.0105, "step": 48420 }, { "epoch": 1.19, "learning_rate": 1.7663834503140008e-05, "loss": 0.012, "step": 48430 }, { "epoch": 1.19, "learning_rate": 1.766334195296146e-05, "loss": 0.0128, "step": 48440 }, { "epoch": 1.19, "learning_rate": 1.766284940278291e-05, "loss": 0.0138, "step": 48450 }, { "epoch": 1.19, "learning_rate": 1.7662356852604363e-05, "loss": 0.0148, "step": 48460 }, { "epoch": 1.19, "learning_rate": 1.766186430242581e-05, "loss": 0.0147, "step": 48470 }, { "epoch": 1.19, "learning_rate": 1.766137175224726e-05, "loss": 0.0157, "step": 48480 }, { "epoch": 1.19, "learning_rate": 1.7660879202068713e-05, "loss": 0.0142, "step": 48490 }, { "epoch": 1.19, "learning_rate": 1.7660386651890162e-05, "loss": 0.0115, "step": 48500 }, { "epoch": 1.19, "learning_rate": 1.7659894101711615e-05, "loss": 0.0115, "step": 48510 }, { "epoch": 1.19, "learning_rate": 1.7659401551533064e-05, "loss": 0.0117, "step": 48520 }, { "epoch": 1.19, "learning_rate": 1.7658909001354513e-05, "loss": 0.0147, "step": 48530 }, { "epoch": 1.19, "learning_rate": 1.7658416451175966e-05, "loss": 0.0111, "step": 48540 }, { "epoch": 1.19, "learning_rate": 1.7657923900997415e-05, "loss": 0.0111, "step": 48550 }, { "epoch": 1.19, "learning_rate": 1.7657431350818864e-05, "loss": 0.0113, "step": 48560 }, { "epoch": 1.19, "learning_rate": 1.7656938800640317e-05, "loss": 0.0118, "step": 48570 }, { "epoch": 1.19, "learning_rate": 1.7656446250461766e-05, "loss": 0.0173, "step": 48580 }, { "epoch": 1.19, "learning_rate": 1.765595370028322e-05, "loss": 0.0137, "step": 48590 }, { "epoch": 1.19, "learning_rate": 1.7655461150104668e-05, "loss": 0.0204, "step": 48600 }, { "epoch": 1.19, "learning_rate": 1.765496859992612e-05, "loss": 0.0215, "step": 48610 }, { "epoch": 1.19, "learning_rate": 1.765447604974757e-05, "loss": 0.0113, "step": 48620 }, { "epoch": 1.19, "learning_rate": 1.7653983499569022e-05, "loss": 0.0116, "step": 48630 }, { "epoch": 1.19, "learning_rate": 1.765349094939047e-05, "loss": 0.0104, "step": 48640 }, { "epoch": 1.2, "learning_rate": 1.765299839921192e-05, "loss": 0.0155, "step": 48650 }, { "epoch": 1.2, "learning_rate": 1.7652505849033373e-05, "loss": 0.022, "step": 48660 }, { "epoch": 1.2, "learning_rate": 1.7652013298854822e-05, "loss": 0.0211, "step": 48670 }, { "epoch": 1.2, "learning_rate": 1.765152074867627e-05, "loss": 0.0106, "step": 48680 }, { "epoch": 1.2, "learning_rate": 1.7651028198497723e-05, "loss": 0.0158, "step": 48690 }, { "epoch": 1.2, "learning_rate": 1.7650535648319173e-05, "loss": 0.0125, "step": 48700 }, { "epoch": 1.2, "learning_rate": 1.7650043098140625e-05, "loss": 0.0132, "step": 48710 }, { "epoch": 1.2, "learning_rate": 1.7649550547962074e-05, "loss": 0.0208, "step": 48720 }, { "epoch": 1.2, "learning_rate": 1.7649057997783527e-05, "loss": 0.0145, "step": 48730 }, { "epoch": 1.2, "learning_rate": 1.7648565447604976e-05, "loss": 0.0242, "step": 48740 }, { "epoch": 1.2, "learning_rate": 1.764807289742643e-05, "loss": 0.0114, "step": 48750 }, { "epoch": 1.2, "learning_rate": 1.7647580347247878e-05, "loss": 0.0142, "step": 48760 }, { "epoch": 1.2, "learning_rate": 1.764708779706933e-05, "loss": 0.0112, "step": 48770 }, { "epoch": 1.2, "learning_rate": 1.764659524689078e-05, "loss": 0.0133, "step": 48780 }, { "epoch": 1.2, "learning_rate": 1.764610269671223e-05, "loss": 0.0163, "step": 48790 }, { "epoch": 1.2, "learning_rate": 1.764561014653368e-05, "loss": 0.012, "step": 48800 }, { "epoch": 1.2, "learning_rate": 1.764511759635513e-05, "loss": 0.0137, "step": 48810 }, { "epoch": 1.2, "learning_rate": 1.764462504617658e-05, "loss": 0.0125, "step": 48820 }, { "epoch": 1.2, "learning_rate": 1.7644132495998032e-05, "loss": 0.0105, "step": 48830 }, { "epoch": 1.2, "learning_rate": 1.764363994581948e-05, "loss": 0.0101, "step": 48840 }, { "epoch": 1.2, "learning_rate": 1.764314739564093e-05, "loss": 0.012, "step": 48850 }, { "epoch": 1.2, "learning_rate": 1.7642654845462383e-05, "loss": 0.0124, "step": 48860 }, { "epoch": 1.2, "learning_rate": 1.7642162295283832e-05, "loss": 0.0126, "step": 48870 }, { "epoch": 1.2, "learning_rate": 1.7641669745105284e-05, "loss": 0.0118, "step": 48880 }, { "epoch": 1.2, "learning_rate": 1.7641177194926737e-05, "loss": 0.012, "step": 48890 }, { "epoch": 1.2, "learning_rate": 1.7640684644748186e-05, "loss": 0.0116, "step": 48900 }, { "epoch": 1.2, "learning_rate": 1.7640192094569635e-05, "loss": 0.0112, "step": 48910 }, { "epoch": 1.2, "learning_rate": 1.7639699544391088e-05, "loss": 0.0129, "step": 48920 }, { "epoch": 1.2, "learning_rate": 1.7639206994212537e-05, "loss": 0.0135, "step": 48930 }, { "epoch": 1.2, "learning_rate": 1.7638714444033986e-05, "loss": 0.0207, "step": 48940 }, { "epoch": 1.2, "learning_rate": 1.763822189385544e-05, "loss": 0.0125, "step": 48950 }, { "epoch": 1.2, "learning_rate": 1.7637729343676888e-05, "loss": 0.0106, "step": 48960 }, { "epoch": 1.2, "learning_rate": 1.763723679349834e-05, "loss": 0.0125, "step": 48970 }, { "epoch": 1.2, "learning_rate": 1.763674424331979e-05, "loss": 0.0102, "step": 48980 }, { "epoch": 1.2, "learning_rate": 1.763625169314124e-05, "loss": 0.0155, "step": 48990 }, { "epoch": 1.2, "learning_rate": 1.763575914296269e-05, "loss": 0.0244, "step": 49000 }, { "epoch": 1.2, "learning_rate": 1.763526659278414e-05, "loss": 0.0125, "step": 49010 }, { "epoch": 1.2, "learning_rate": 1.7634774042605593e-05, "loss": 0.018, "step": 49020 }, { "epoch": 1.2, "learning_rate": 1.7634281492427042e-05, "loss": 0.017, "step": 49030 }, { "epoch": 1.2, "learning_rate": 1.7633788942248494e-05, "loss": 0.023, "step": 49040 }, { "epoch": 1.21, "learning_rate": 1.7633296392069944e-05, "loss": 0.0206, "step": 49050 }, { "epoch": 1.21, "learning_rate": 1.7632803841891396e-05, "loss": 0.0166, "step": 49060 }, { "epoch": 1.21, "learning_rate": 1.7632311291712845e-05, "loss": 0.0187, "step": 49070 }, { "epoch": 1.21, "learning_rate": 1.7631818741534294e-05, "loss": 0.0118, "step": 49080 }, { "epoch": 1.21, "learning_rate": 1.7631326191355747e-05, "loss": 0.0116, "step": 49090 }, { "epoch": 1.21, "learning_rate": 1.7630833641177196e-05, "loss": 0.0104, "step": 49100 }, { "epoch": 1.21, "learning_rate": 1.7630341090998645e-05, "loss": 0.0115, "step": 49110 }, { "epoch": 1.21, "learning_rate": 1.7629848540820098e-05, "loss": 0.0123, "step": 49120 }, { "epoch": 1.21, "learning_rate": 1.7629355990641547e-05, "loss": 0.0311, "step": 49130 }, { "epoch": 1.21, "learning_rate": 1.7628863440462996e-05, "loss": 0.0131, "step": 49140 }, { "epoch": 1.21, "learning_rate": 1.762837089028445e-05, "loss": 0.0123, "step": 49150 }, { "epoch": 1.21, "learning_rate": 1.76278783401059e-05, "loss": 0.0097, "step": 49160 }, { "epoch": 1.21, "learning_rate": 1.762738578992735e-05, "loss": 0.0119, "step": 49170 }, { "epoch": 1.21, "learning_rate": 1.7626893239748803e-05, "loss": 0.0109, "step": 49180 }, { "epoch": 1.21, "learning_rate": 1.7626400689570252e-05, "loss": 0.0122, "step": 49190 }, { "epoch": 1.21, "learning_rate": 1.76259081393917e-05, "loss": 0.0147, "step": 49200 }, { "epoch": 1.21, "learning_rate": 1.7625415589213154e-05, "loss": 0.0125, "step": 49210 }, { "epoch": 1.21, "learning_rate": 1.7624923039034603e-05, "loss": 0.0127, "step": 49220 }, { "epoch": 1.21, "learning_rate": 1.7624430488856055e-05, "loss": 0.0135, "step": 49230 }, { "epoch": 1.21, "learning_rate": 1.7623937938677505e-05, "loss": 0.0197, "step": 49240 }, { "epoch": 1.21, "learning_rate": 1.7623445388498954e-05, "loss": 0.0129, "step": 49250 }, { "epoch": 1.21, "learning_rate": 1.7622952838320406e-05, "loss": 0.0176, "step": 49260 }, { "epoch": 1.21, "learning_rate": 1.7622460288141855e-05, "loss": 0.0168, "step": 49270 }, { "epoch": 1.21, "learning_rate": 1.7621967737963304e-05, "loss": 0.0129, "step": 49280 }, { "epoch": 1.21, "learning_rate": 1.7621475187784757e-05, "loss": 0.011, "step": 49290 }, { "epoch": 1.21, "learning_rate": 1.7620982637606206e-05, "loss": 0.012, "step": 49300 }, { "epoch": 1.21, "learning_rate": 1.762049008742766e-05, "loss": 0.0124, "step": 49310 }, { "epoch": 1.21, "learning_rate": 1.7619997537249108e-05, "loss": 0.0219, "step": 49320 }, { "epoch": 1.21, "learning_rate": 1.761950498707056e-05, "loss": 0.0128, "step": 49330 }, { "epoch": 1.21, "learning_rate": 1.761901243689201e-05, "loss": 0.0144, "step": 49340 }, { "epoch": 1.21, "learning_rate": 1.7618519886713462e-05, "loss": 0.0183, "step": 49350 }, { "epoch": 1.21, "learning_rate": 1.761802733653491e-05, "loss": 0.0154, "step": 49360 }, { "epoch": 1.21, "learning_rate": 1.761753478635636e-05, "loss": 0.0217, "step": 49370 }, { "epoch": 1.21, "learning_rate": 1.7617042236177813e-05, "loss": 0.0121, "step": 49380 }, { "epoch": 1.21, "learning_rate": 1.7616549685999262e-05, "loss": 0.0204, "step": 49390 }, { "epoch": 1.21, "learning_rate": 1.761605713582071e-05, "loss": 0.0124, "step": 49400 }, { "epoch": 1.21, "learning_rate": 1.7615564585642164e-05, "loss": 0.0241, "step": 49410 }, { "epoch": 1.21, "learning_rate": 1.7615072035463613e-05, "loss": 0.0108, "step": 49420 }, { "epoch": 1.21, "learning_rate": 1.7614579485285065e-05, "loss": 0.0228, "step": 49430 }, { "epoch": 1.21, "learning_rate": 1.7614086935106515e-05, "loss": 0.0144, "step": 49440 }, { "epoch": 1.21, "learning_rate": 1.7613594384927967e-05, "loss": 0.0122, "step": 49450 }, { "epoch": 1.22, "learning_rate": 1.7613101834749416e-05, "loss": 0.0116, "step": 49460 }, { "epoch": 1.22, "learning_rate": 1.761260928457087e-05, "loss": 0.012, "step": 49470 }, { "epoch": 1.22, "learning_rate": 1.7612116734392318e-05, "loss": 0.0128, "step": 49480 }, { "epoch": 1.22, "learning_rate": 1.761162418421377e-05, "loss": 0.0183, "step": 49490 }, { "epoch": 1.22, "learning_rate": 1.761113163403522e-05, "loss": 0.0127, "step": 49500 }, { "epoch": 1.22, "learning_rate": 1.761063908385667e-05, "loss": 0.0108, "step": 49510 }, { "epoch": 1.22, "learning_rate": 1.761014653367812e-05, "loss": 0.0158, "step": 49520 }, { "epoch": 1.22, "learning_rate": 1.760965398349957e-05, "loss": 0.0103, "step": 49530 }, { "epoch": 1.22, "learning_rate": 1.760916143332102e-05, "loss": 0.0147, "step": 49540 }, { "epoch": 1.22, "learning_rate": 1.7608668883142472e-05, "loss": 0.0118, "step": 49550 }, { "epoch": 1.22, "learning_rate": 1.760817633296392e-05, "loss": 0.0201, "step": 49560 }, { "epoch": 1.22, "learning_rate": 1.760768378278537e-05, "loss": 0.0118, "step": 49570 }, { "epoch": 1.22, "learning_rate": 1.7607191232606823e-05, "loss": 0.011, "step": 49580 }, { "epoch": 1.22, "learning_rate": 1.7606698682428272e-05, "loss": 0.0153, "step": 49590 }, { "epoch": 1.22, "learning_rate": 1.7606206132249725e-05, "loss": 0.0275, "step": 49600 }, { "epoch": 1.22, "learning_rate": 1.7605713582071177e-05, "loss": 0.0127, "step": 49610 }, { "epoch": 1.22, "learning_rate": 1.7605221031892626e-05, "loss": 0.0178, "step": 49620 }, { "epoch": 1.22, "learning_rate": 1.7604728481714076e-05, "loss": 0.0163, "step": 49630 }, { "epoch": 1.22, "learning_rate": 1.7604235931535528e-05, "loss": 0.0122, "step": 49640 }, { "epoch": 1.22, "learning_rate": 1.7603743381356977e-05, "loss": 0.0106, "step": 49650 }, { "epoch": 1.22, "learning_rate": 1.7603250831178426e-05, "loss": 0.0119, "step": 49660 }, { "epoch": 1.22, "learning_rate": 1.760275828099988e-05, "loss": 0.0116, "step": 49670 }, { "epoch": 1.22, "learning_rate": 1.7602265730821328e-05, "loss": 0.0123, "step": 49680 }, { "epoch": 1.22, "learning_rate": 1.760177318064278e-05, "loss": 0.0201, "step": 49690 }, { "epoch": 1.22, "learning_rate": 1.760128063046423e-05, "loss": 0.0322, "step": 49700 }, { "epoch": 1.22, "learning_rate": 1.760078808028568e-05, "loss": 0.0116, "step": 49710 }, { "epoch": 1.22, "learning_rate": 1.760029553010713e-05, "loss": 0.0126, "step": 49720 }, { "epoch": 1.22, "learning_rate": 1.759980297992858e-05, "loss": 0.0208, "step": 49730 }, { "epoch": 1.22, "learning_rate": 1.7599310429750033e-05, "loss": 0.0131, "step": 49740 }, { "epoch": 1.22, "learning_rate": 1.7598817879571482e-05, "loss": 0.0121, "step": 49750 }, { "epoch": 1.22, "learning_rate": 1.7598325329392935e-05, "loss": 0.0118, "step": 49760 }, { "epoch": 1.22, "learning_rate": 1.7597832779214384e-05, "loss": 0.0114, "step": 49770 }, { "epoch": 1.22, "learning_rate": 1.7597340229035836e-05, "loss": 0.0115, "step": 49780 }, { "epoch": 1.22, "learning_rate": 1.7596847678857286e-05, "loss": 0.0115, "step": 49790 }, { "epoch": 1.22, "learning_rate": 1.7596355128678735e-05, "loss": 0.0161, "step": 49800 }, { "epoch": 1.22, "learning_rate": 1.7595862578500187e-05, "loss": 0.0187, "step": 49810 }, { "epoch": 1.22, "learning_rate": 1.7595370028321636e-05, "loss": 0.0159, "step": 49820 }, { "epoch": 1.22, "learning_rate": 1.7594877478143086e-05, "loss": 0.011, "step": 49830 }, { "epoch": 1.22, "learning_rate": 1.7594384927964538e-05, "loss": 0.011, "step": 49840 }, { "epoch": 1.22, "learning_rate": 1.7593892377785987e-05, "loss": 0.0125, "step": 49850 }, { "epoch": 1.22, "learning_rate": 1.7593399827607436e-05, "loss": 0.0129, "step": 49860 }, { "epoch": 1.23, "learning_rate": 1.759290727742889e-05, "loss": 0.0133, "step": 49870 }, { "epoch": 1.23, "learning_rate": 1.759241472725034e-05, "loss": 0.0127, "step": 49880 }, { "epoch": 1.23, "learning_rate": 1.759192217707179e-05, "loss": 0.0125, "step": 49890 }, { "epoch": 1.23, "learning_rate": 1.7591429626893243e-05, "loss": 0.0142, "step": 49900 }, { "epoch": 1.23, "learning_rate": 1.7590937076714692e-05, "loss": 0.0191, "step": 49910 }, { "epoch": 1.23, "learning_rate": 1.759044452653614e-05, "loss": 0.0184, "step": 49920 }, { "epoch": 1.23, "learning_rate": 1.7589951976357594e-05, "loss": 0.0103, "step": 49930 }, { "epoch": 1.23, "learning_rate": 1.7589459426179043e-05, "loss": 0.0256, "step": 49940 }, { "epoch": 1.23, "learning_rate": 1.7588966876000496e-05, "loss": 0.0103, "step": 49950 }, { "epoch": 1.23, "learning_rate": 1.7588474325821945e-05, "loss": 0.0165, "step": 49960 }, { "epoch": 1.23, "learning_rate": 1.7587981775643394e-05, "loss": 0.012, "step": 49970 }, { "epoch": 1.23, "learning_rate": 1.7587489225464847e-05, "loss": 0.0132, "step": 49980 }, { "epoch": 1.23, "learning_rate": 1.7586996675286296e-05, "loss": 0.0118, "step": 49990 }, { "epoch": 1.23, "learning_rate": 1.7586504125107745e-05, "loss": 0.0123, "step": 50000 }, { "epoch": 1.23, "eval_bleu": 97.8163, "eval_gen_len_ratio": 1.0016, "eval_loss": 0.009803852066397667, "eval_runtime": 276.443, "eval_samples_per_second": 6.75, "eval_steps_per_second": 0.213, "step": 50000 }, { "epoch": 1.23, "learning_rate": 1.7586011574929197e-05, "loss": 0.012, "step": 50010 }, { "epoch": 1.23, "learning_rate": 1.7585519024750647e-05, "loss": 0.014, "step": 50020 }, { "epoch": 1.23, "learning_rate": 1.75850264745721e-05, "loss": 0.0169, "step": 50030 }, { "epoch": 1.23, "learning_rate": 1.7584533924393548e-05, "loss": 0.0121, "step": 50040 }, { "epoch": 1.23, "learning_rate": 1.7584041374215e-05, "loss": 0.0104, "step": 50050 }, { "epoch": 1.23, "learning_rate": 1.758354882403645e-05, "loss": 0.0119, "step": 50060 }, { "epoch": 1.23, "learning_rate": 1.7583056273857902e-05, "loss": 0.0171, "step": 50070 }, { "epoch": 1.23, "learning_rate": 1.758256372367935e-05, "loss": 0.013, "step": 50080 }, { "epoch": 1.23, "learning_rate": 1.75820711735008e-05, "loss": 0.0126, "step": 50090 }, { "epoch": 1.23, "learning_rate": 1.7581578623322253e-05, "loss": 0.0135, "step": 50100 }, { "epoch": 1.23, "learning_rate": 1.7581086073143702e-05, "loss": 0.013, "step": 50110 }, { "epoch": 1.23, "learning_rate": 1.758059352296515e-05, "loss": 0.014, "step": 50120 }, { "epoch": 1.23, "learning_rate": 1.7580100972786604e-05, "loss": 0.0189, "step": 50130 }, { "epoch": 1.23, "learning_rate": 1.7579608422608053e-05, "loss": 0.0111, "step": 50140 }, { "epoch": 1.23, "learning_rate": 1.7579115872429506e-05, "loss": 0.0201, "step": 50150 }, { "epoch": 1.23, "learning_rate": 1.7578623322250955e-05, "loss": 0.0198, "step": 50160 }, { "epoch": 1.23, "learning_rate": 1.7578130772072407e-05, "loss": 0.0171, "step": 50170 }, { "epoch": 1.23, "learning_rate": 1.7577638221893857e-05, "loss": 0.0111, "step": 50180 }, { "epoch": 1.23, "learning_rate": 1.757714567171531e-05, "loss": 0.0123, "step": 50190 }, { "epoch": 1.23, "learning_rate": 1.7576653121536758e-05, "loss": 0.0128, "step": 50200 }, { "epoch": 1.23, "learning_rate": 1.757616057135821e-05, "loss": 0.0116, "step": 50210 }, { "epoch": 1.23, "learning_rate": 1.757566802117966e-05, "loss": 0.0101, "step": 50220 }, { "epoch": 1.23, "learning_rate": 1.757517547100111e-05, "loss": 0.014, "step": 50230 }, { "epoch": 1.23, "learning_rate": 1.757468292082256e-05, "loss": 0.0246, "step": 50240 }, { "epoch": 1.23, "learning_rate": 1.757419037064401e-05, "loss": 0.0181, "step": 50250 }, { "epoch": 1.23, "learning_rate": 1.757369782046546e-05, "loss": 0.0128, "step": 50260 }, { "epoch": 1.23, "learning_rate": 1.7573205270286913e-05, "loss": 0.011, "step": 50270 }, { "epoch": 1.24, "learning_rate": 1.757271272010836e-05, "loss": 0.013, "step": 50280 }, { "epoch": 1.24, "learning_rate": 1.757222016992981e-05, "loss": 0.0123, "step": 50290 }, { "epoch": 1.24, "learning_rate": 1.7571727619751263e-05, "loss": 0.0125, "step": 50300 }, { "epoch": 1.24, "learning_rate": 1.7571235069572712e-05, "loss": 0.0193, "step": 50310 }, { "epoch": 1.24, "learning_rate": 1.7570742519394165e-05, "loss": 0.0096, "step": 50320 }, { "epoch": 1.24, "learning_rate": 1.7570249969215618e-05, "loss": 0.0106, "step": 50330 }, { "epoch": 1.24, "learning_rate": 1.7569757419037067e-05, "loss": 0.0114, "step": 50340 }, { "epoch": 1.24, "learning_rate": 1.7569264868858516e-05, "loss": 0.0114, "step": 50350 }, { "epoch": 1.24, "learning_rate": 1.756877231867997e-05, "loss": 0.0132, "step": 50360 }, { "epoch": 1.24, "learning_rate": 1.7568279768501418e-05, "loss": 0.0198, "step": 50370 }, { "epoch": 1.24, "learning_rate": 1.7567787218322867e-05, "loss": 0.0165, "step": 50380 }, { "epoch": 1.24, "learning_rate": 1.756729466814432e-05, "loss": 0.0105, "step": 50390 }, { "epoch": 1.24, "learning_rate": 1.756680211796577e-05, "loss": 0.0102, "step": 50400 }, { "epoch": 1.24, "learning_rate": 1.756630956778722e-05, "loss": 0.0201, "step": 50410 }, { "epoch": 1.24, "learning_rate": 1.756581701760867e-05, "loss": 0.0197, "step": 50420 }, { "epoch": 1.24, "learning_rate": 1.756532446743012e-05, "loss": 0.0114, "step": 50430 }, { "epoch": 1.24, "learning_rate": 1.7564831917251572e-05, "loss": 0.0127, "step": 50440 }, { "epoch": 1.24, "learning_rate": 1.756433936707302e-05, "loss": 0.0127, "step": 50450 }, { "epoch": 1.24, "learning_rate": 1.7563846816894473e-05, "loss": 0.0107, "step": 50460 }, { "epoch": 1.24, "learning_rate": 1.7563354266715923e-05, "loss": 0.0105, "step": 50470 }, { "epoch": 1.24, "learning_rate": 1.7562861716537375e-05, "loss": 0.0187, "step": 50480 }, { "epoch": 1.24, "learning_rate": 1.7562369166358824e-05, "loss": 0.0102, "step": 50490 }, { "epoch": 1.24, "learning_rate": 1.7561876616180277e-05, "loss": 0.011, "step": 50500 }, { "epoch": 1.24, "learning_rate": 1.7561384066001726e-05, "loss": 0.0132, "step": 50510 }, { "epoch": 1.24, "learning_rate": 1.7560891515823175e-05, "loss": 0.0219, "step": 50520 }, { "epoch": 1.24, "learning_rate": 1.7560398965644628e-05, "loss": 0.0118, "step": 50530 }, { "epoch": 1.24, "learning_rate": 1.7559906415466077e-05, "loss": 0.0138, "step": 50540 }, { "epoch": 1.24, "learning_rate": 1.7559413865287526e-05, "loss": 0.0136, "step": 50550 }, { "epoch": 1.24, "learning_rate": 1.755892131510898e-05, "loss": 0.0115, "step": 50560 }, { "epoch": 1.24, "learning_rate": 1.7558428764930428e-05, "loss": 0.0126, "step": 50570 }, { "epoch": 1.24, "learning_rate": 1.7557936214751877e-05, "loss": 0.0106, "step": 50580 }, { "epoch": 1.24, "learning_rate": 1.755744366457333e-05, "loss": 0.0121, "step": 50590 }, { "epoch": 1.24, "learning_rate": 1.7556951114394782e-05, "loss": 0.0116, "step": 50600 }, { "epoch": 1.24, "learning_rate": 1.755645856421623e-05, "loss": 0.0113, "step": 50610 }, { "epoch": 1.24, "learning_rate": 1.7555966014037684e-05, "loss": 0.0119, "step": 50620 }, { "epoch": 1.24, "learning_rate": 1.7555473463859133e-05, "loss": 0.0108, "step": 50630 }, { "epoch": 1.24, "learning_rate": 1.7554980913680582e-05, "loss": 0.0111, "step": 50640 }, { "epoch": 1.24, "learning_rate": 1.7554488363502034e-05, "loss": 0.0155, "step": 50650 }, { "epoch": 1.24, "learning_rate": 1.7553995813323484e-05, "loss": 0.0136, "step": 50660 }, { "epoch": 1.24, "learning_rate": 1.7553503263144936e-05, "loss": 0.0107, "step": 50670 }, { "epoch": 1.25, "learning_rate": 1.7553010712966385e-05, "loss": 0.0129, "step": 50680 }, { "epoch": 1.25, "learning_rate": 1.7552518162787834e-05, "loss": 0.0165, "step": 50690 }, { "epoch": 1.25, "learning_rate": 1.7552025612609287e-05, "loss": 0.0125, "step": 50700 }, { "epoch": 1.25, "learning_rate": 1.7551533062430736e-05, "loss": 0.0137, "step": 50710 }, { "epoch": 1.25, "learning_rate": 1.7551040512252185e-05, "loss": 0.0201, "step": 50720 }, { "epoch": 1.25, "learning_rate": 1.7550547962073638e-05, "loss": 0.013, "step": 50730 }, { "epoch": 1.25, "learning_rate": 1.7550055411895087e-05, "loss": 0.0118, "step": 50740 }, { "epoch": 1.25, "learning_rate": 1.754956286171654e-05, "loss": 0.0175, "step": 50750 }, { "epoch": 1.25, "learning_rate": 1.754907031153799e-05, "loss": 0.0182, "step": 50760 }, { "epoch": 1.25, "learning_rate": 1.754857776135944e-05, "loss": 0.0108, "step": 50770 }, { "epoch": 1.25, "learning_rate": 1.754808521118089e-05, "loss": 0.013, "step": 50780 }, { "epoch": 1.25, "learning_rate": 1.7547592661002343e-05, "loss": 0.0128, "step": 50790 }, { "epoch": 1.25, "learning_rate": 1.7547100110823792e-05, "loss": 0.0136, "step": 50800 }, { "epoch": 1.25, "learning_rate": 1.754660756064524e-05, "loss": 0.0176, "step": 50810 }, { "epoch": 1.25, "learning_rate": 1.7546115010466694e-05, "loss": 0.0125, "step": 50820 }, { "epoch": 1.25, "learning_rate": 1.7545622460288143e-05, "loss": 0.0131, "step": 50830 }, { "epoch": 1.25, "learning_rate": 1.7545129910109592e-05, "loss": 0.0121, "step": 50840 }, { "epoch": 1.25, "learning_rate": 1.7544637359931044e-05, "loss": 0.0133, "step": 50850 }, { "epoch": 1.25, "learning_rate": 1.7544144809752494e-05, "loss": 0.0175, "step": 50860 }, { "epoch": 1.25, "learning_rate": 1.7543652259573946e-05, "loss": 0.0115, "step": 50870 }, { "epoch": 1.25, "learning_rate": 1.7543159709395395e-05, "loss": 0.0126, "step": 50880 }, { "epoch": 1.25, "learning_rate": 1.7542667159216848e-05, "loss": 0.012, "step": 50890 }, { "epoch": 1.25, "learning_rate": 1.7542174609038297e-05, "loss": 0.0122, "step": 50900 }, { "epoch": 1.25, "learning_rate": 1.754168205885975e-05, "loss": 0.0207, "step": 50910 }, { "epoch": 1.25, "learning_rate": 1.75411895086812e-05, "loss": 0.011, "step": 50920 }, { "epoch": 1.25, "learning_rate": 1.754069695850265e-05, "loss": 0.0231, "step": 50930 }, { "epoch": 1.25, "learning_rate": 1.75402044083241e-05, "loss": 0.0121, "step": 50940 }, { "epoch": 1.25, "learning_rate": 1.753971185814555e-05, "loss": 0.0113, "step": 50950 }, { "epoch": 1.25, "learning_rate": 1.7539219307967002e-05, "loss": 0.0135, "step": 50960 }, { "epoch": 1.25, "learning_rate": 1.753872675778845e-05, "loss": 0.0165, "step": 50970 }, { "epoch": 1.25, "learning_rate": 1.75382342076099e-05, "loss": 0.0213, "step": 50980 }, { "epoch": 1.25, "learning_rate": 1.7537741657431353e-05, "loss": 0.0131, "step": 50990 }, { "epoch": 1.25, "learning_rate": 1.7537249107252802e-05, "loss": 0.0126, "step": 51000 }, { "epoch": 1.25, "learning_rate": 1.753675655707425e-05, "loss": 0.0134, "step": 51010 }, { "epoch": 1.25, "learning_rate": 1.7536264006895704e-05, "loss": 0.0113, "step": 51020 }, { "epoch": 1.25, "learning_rate": 1.7535771456717153e-05, "loss": 0.0141, "step": 51030 }, { "epoch": 1.25, "learning_rate": 1.7535278906538605e-05, "loss": 0.0121, "step": 51040 }, { "epoch": 1.25, "learning_rate": 1.7534786356360058e-05, "loss": 0.0127, "step": 51050 }, { "epoch": 1.25, "learning_rate": 1.7534293806181507e-05, "loss": 0.0114, "step": 51060 }, { "epoch": 1.25, "learning_rate": 1.7533801256002956e-05, "loss": 0.0187, "step": 51070 }, { "epoch": 1.25, "learning_rate": 1.753330870582441e-05, "loss": 0.0117, "step": 51080 }, { "epoch": 1.26, "learning_rate": 1.7532816155645858e-05, "loss": 0.0101, "step": 51090 }, { "epoch": 1.26, "learning_rate": 1.7532323605467307e-05, "loss": 0.011, "step": 51100 }, { "epoch": 1.26, "learning_rate": 1.753183105528876e-05, "loss": 0.0201, "step": 51110 }, { "epoch": 1.26, "learning_rate": 1.753133850511021e-05, "loss": 0.0105, "step": 51120 }, { "epoch": 1.26, "learning_rate": 1.753084595493166e-05, "loss": 0.0117, "step": 51130 }, { "epoch": 1.26, "learning_rate": 1.753035340475311e-05, "loss": 0.0137, "step": 51140 }, { "epoch": 1.26, "learning_rate": 1.752986085457456e-05, "loss": 0.0124, "step": 51150 }, { "epoch": 1.26, "learning_rate": 1.7529368304396012e-05, "loss": 0.0135, "step": 51160 }, { "epoch": 1.26, "learning_rate": 1.752887575421746e-05, "loss": 0.0146, "step": 51170 }, { "epoch": 1.26, "learning_rate": 1.7528383204038914e-05, "loss": 0.0133, "step": 51180 }, { "epoch": 1.26, "learning_rate": 1.7527890653860363e-05, "loss": 0.0213, "step": 51190 }, { "epoch": 1.26, "learning_rate": 1.7527398103681815e-05, "loss": 0.0232, "step": 51200 }, { "epoch": 1.26, "learning_rate": 1.7526905553503265e-05, "loss": 0.0122, "step": 51210 }, { "epoch": 1.26, "learning_rate": 1.7526413003324717e-05, "loss": 0.0155, "step": 51220 }, { "epoch": 1.26, "learning_rate": 1.7525920453146166e-05, "loss": 0.0111, "step": 51230 }, { "epoch": 1.26, "learning_rate": 1.7525427902967615e-05, "loss": 0.0143, "step": 51240 }, { "epoch": 1.26, "learning_rate": 1.7524935352789068e-05, "loss": 0.0136, "step": 51250 }, { "epoch": 1.26, "learning_rate": 1.7524442802610517e-05, "loss": 0.0121, "step": 51260 }, { "epoch": 1.26, "learning_rate": 1.7523950252431966e-05, "loss": 0.0108, "step": 51270 }, { "epoch": 1.26, "learning_rate": 1.752345770225342e-05, "loss": 0.0184, "step": 51280 }, { "epoch": 1.26, "learning_rate": 1.7522965152074868e-05, "loss": 0.0173, "step": 51290 }, { "epoch": 1.26, "learning_rate": 1.7522472601896317e-05, "loss": 0.0108, "step": 51300 }, { "epoch": 1.26, "learning_rate": 1.752198005171777e-05, "loss": 0.0121, "step": 51310 }, { "epoch": 1.26, "learning_rate": 1.7521487501539222e-05, "loss": 0.0201, "step": 51320 }, { "epoch": 1.26, "learning_rate": 1.752099495136067e-05, "loss": 0.0106, "step": 51330 }, { "epoch": 1.26, "learning_rate": 1.7520502401182124e-05, "loss": 0.0104, "step": 51340 }, { "epoch": 1.26, "learning_rate": 1.7520009851003573e-05, "loss": 0.0204, "step": 51350 }, { "epoch": 1.26, "learning_rate": 1.7519517300825022e-05, "loss": 0.012, "step": 51360 }, { "epoch": 1.26, "learning_rate": 1.7519024750646475e-05, "loss": 0.0128, "step": 51370 }, { "epoch": 1.26, "learning_rate": 1.7518532200467924e-05, "loss": 0.0119, "step": 51380 }, { "epoch": 1.26, "learning_rate": 1.7518039650289376e-05, "loss": 0.0127, "step": 51390 }, { "epoch": 1.26, "learning_rate": 1.7517547100110826e-05, "loss": 0.0292, "step": 51400 }, { "epoch": 1.26, "learning_rate": 1.7517054549932275e-05, "loss": 0.0111, "step": 51410 }, { "epoch": 1.26, "learning_rate": 1.7516561999753727e-05, "loss": 0.0155, "step": 51420 }, { "epoch": 1.26, "learning_rate": 1.7516069449575176e-05, "loss": 0.0126, "step": 51430 }, { "epoch": 1.26, "learning_rate": 1.7515576899396626e-05, "loss": 0.0113, "step": 51440 }, { "epoch": 1.26, "learning_rate": 1.7515084349218078e-05, "loss": 0.0124, "step": 51450 }, { "epoch": 1.26, "learning_rate": 1.7514591799039527e-05, "loss": 0.0131, "step": 51460 }, { "epoch": 1.26, "learning_rate": 1.751409924886098e-05, "loss": 0.0136, "step": 51470 }, { "epoch": 1.26, "learning_rate": 1.751360669868243e-05, "loss": 0.0111, "step": 51480 }, { "epoch": 1.26, "learning_rate": 1.751311414850388e-05, "loss": 0.012, "step": 51490 }, { "epoch": 1.27, "learning_rate": 1.751262159832533e-05, "loss": 0.0135, "step": 51500 }, { "epoch": 1.27, "learning_rate": 1.7512129048146783e-05, "loss": 0.0127, "step": 51510 }, { "epoch": 1.27, "learning_rate": 1.7511636497968232e-05, "loss": 0.0123, "step": 51520 }, { "epoch": 1.27, "learning_rate": 1.751114394778968e-05, "loss": 0.0118, "step": 51530 }, { "epoch": 1.27, "learning_rate": 1.7510651397611134e-05, "loss": 0.0122, "step": 51540 }, { "epoch": 1.27, "learning_rate": 1.7510158847432583e-05, "loss": 0.0115, "step": 51550 }, { "epoch": 1.27, "learning_rate": 1.7509666297254032e-05, "loss": 0.0286, "step": 51560 }, { "epoch": 1.27, "learning_rate": 1.7509173747075485e-05, "loss": 0.0125, "step": 51570 }, { "epoch": 1.27, "learning_rate": 1.7508681196896934e-05, "loss": 0.0105, "step": 51580 }, { "epoch": 1.27, "learning_rate": 1.7508188646718386e-05, "loss": 0.0108, "step": 51590 }, { "epoch": 1.27, "learning_rate": 1.7507696096539836e-05, "loss": 0.0119, "step": 51600 }, { "epoch": 1.27, "learning_rate": 1.7507203546361288e-05, "loss": 0.0135, "step": 51610 }, { "epoch": 1.27, "learning_rate": 1.7506710996182737e-05, "loss": 0.0125, "step": 51620 }, { "epoch": 1.27, "learning_rate": 1.750621844600419e-05, "loss": 0.0103, "step": 51630 }, { "epoch": 1.27, "learning_rate": 1.750572589582564e-05, "loss": 0.0176, "step": 51640 }, { "epoch": 1.27, "learning_rate": 1.750523334564709e-05, "loss": 0.0117, "step": 51650 }, { "epoch": 1.27, "learning_rate": 1.750474079546854e-05, "loss": 0.012, "step": 51660 }, { "epoch": 1.27, "learning_rate": 1.750424824528999e-05, "loss": 0.0183, "step": 51670 }, { "epoch": 1.27, "learning_rate": 1.7503755695111442e-05, "loss": 0.012, "step": 51680 }, { "epoch": 1.27, "learning_rate": 1.750326314493289e-05, "loss": 0.013, "step": 51690 }, { "epoch": 1.27, "learning_rate": 1.750277059475434e-05, "loss": 0.0125, "step": 51700 }, { "epoch": 1.27, "learning_rate": 1.7502278044575793e-05, "loss": 0.0135, "step": 51710 }, { "epoch": 1.27, "learning_rate": 1.7501785494397242e-05, "loss": 0.0213, "step": 51720 }, { "epoch": 1.27, "learning_rate": 1.750129294421869e-05, "loss": 0.0115, "step": 51730 }, { "epoch": 1.27, "learning_rate": 1.7500800394040144e-05, "loss": 0.0114, "step": 51740 }, { "epoch": 1.27, "learning_rate": 1.7500307843861593e-05, "loss": 0.0158, "step": 51750 }, { "epoch": 1.27, "learning_rate": 1.7499815293683046e-05, "loss": 0.013, "step": 51760 }, { "epoch": 1.27, "learning_rate": 1.7499322743504498e-05, "loss": 0.0108, "step": 51770 }, { "epoch": 1.27, "learning_rate": 1.7498830193325947e-05, "loss": 0.0149, "step": 51780 }, { "epoch": 1.27, "learning_rate": 1.7498337643147397e-05, "loss": 0.0117, "step": 51790 }, { "epoch": 1.27, "learning_rate": 1.749784509296885e-05, "loss": 0.0124, "step": 51800 }, { "epoch": 1.27, "learning_rate": 1.7497352542790298e-05, "loss": 0.0121, "step": 51810 }, { "epoch": 1.27, "learning_rate": 1.7496859992611747e-05, "loss": 0.0113, "step": 51820 }, { "epoch": 1.27, "learning_rate": 1.74963674424332e-05, "loss": 0.0128, "step": 51830 }, { "epoch": 1.27, "learning_rate": 1.749587489225465e-05, "loss": 0.0113, "step": 51840 }, { "epoch": 1.27, "learning_rate": 1.74953823420761e-05, "loss": 0.0226, "step": 51850 }, { "epoch": 1.27, "learning_rate": 1.749488979189755e-05, "loss": 0.0099, "step": 51860 }, { "epoch": 1.27, "learning_rate": 1.7494397241719e-05, "loss": 0.0125, "step": 51870 }, { "epoch": 1.27, "learning_rate": 1.7493904691540452e-05, "loss": 0.0199, "step": 51880 }, { "epoch": 1.27, "learning_rate": 1.74934121413619e-05, "loss": 0.0183, "step": 51890 }, { "epoch": 1.28, "learning_rate": 1.7492919591183354e-05, "loss": 0.0151, "step": 51900 }, { "epoch": 1.28, "learning_rate": 1.7492427041004803e-05, "loss": 0.0112, "step": 51910 }, { "epoch": 1.28, "learning_rate": 1.7491934490826256e-05, "loss": 0.0254, "step": 51920 }, { "epoch": 1.28, "learning_rate": 1.7491441940647705e-05, "loss": 0.0104, "step": 51930 }, { "epoch": 1.28, "learning_rate": 1.7490949390469157e-05, "loss": 0.0132, "step": 51940 }, { "epoch": 1.28, "learning_rate": 1.7490456840290607e-05, "loss": 0.0209, "step": 51950 }, { "epoch": 1.28, "learning_rate": 1.7489964290112056e-05, "loss": 0.0122, "step": 51960 }, { "epoch": 1.28, "learning_rate": 1.748947173993351e-05, "loss": 0.0098, "step": 51970 }, { "epoch": 1.28, "learning_rate": 1.7488979189754957e-05, "loss": 0.0176, "step": 51980 }, { "epoch": 1.28, "learning_rate": 1.7488486639576407e-05, "loss": 0.0133, "step": 51990 }, { "epoch": 1.28, "learning_rate": 1.748799408939786e-05, "loss": 0.017, "step": 52000 }, { "epoch": 1.28, "learning_rate": 1.7487501539219308e-05, "loss": 0.0136, "step": 52010 }, { "epoch": 1.28, "learning_rate": 1.7487008989040757e-05, "loss": 0.0114, "step": 52020 }, { "epoch": 1.28, "learning_rate": 1.748651643886221e-05, "loss": 0.0108, "step": 52030 }, { "epoch": 1.28, "learning_rate": 1.7486023888683663e-05, "loss": 0.0124, "step": 52040 }, { "epoch": 1.28, "learning_rate": 1.748553133850511e-05, "loss": 0.0108, "step": 52050 }, { "epoch": 1.28, "learning_rate": 1.7485038788326564e-05, "loss": 0.0116, "step": 52060 }, { "epoch": 1.28, "learning_rate": 1.7484546238148013e-05, "loss": 0.0145, "step": 52070 }, { "epoch": 1.28, "learning_rate": 1.7484053687969463e-05, "loss": 0.0238, "step": 52080 }, { "epoch": 1.28, "learning_rate": 1.7483561137790915e-05, "loss": 0.0123, "step": 52090 }, { "epoch": 1.28, "learning_rate": 1.7483068587612364e-05, "loss": 0.0131, "step": 52100 }, { "epoch": 1.28, "learning_rate": 1.7482576037433817e-05, "loss": 0.0125, "step": 52110 }, { "epoch": 1.28, "learning_rate": 1.7482083487255266e-05, "loss": 0.0113, "step": 52120 }, { "epoch": 1.28, "learning_rate": 1.7481590937076715e-05, "loss": 0.0122, "step": 52130 }, { "epoch": 1.28, "learning_rate": 1.7481098386898168e-05, "loss": 0.0116, "step": 52140 }, { "epoch": 1.28, "learning_rate": 1.7480605836719617e-05, "loss": 0.0118, "step": 52150 }, { "epoch": 1.28, "learning_rate": 1.7480113286541066e-05, "loss": 0.0119, "step": 52160 }, { "epoch": 1.28, "learning_rate": 1.747962073636252e-05, "loss": 0.0117, "step": 52170 }, { "epoch": 1.28, "learning_rate": 1.7479128186183968e-05, "loss": 0.0126, "step": 52180 }, { "epoch": 1.28, "learning_rate": 1.747863563600542e-05, "loss": 0.0114, "step": 52190 }, { "epoch": 1.28, "learning_rate": 1.747814308582687e-05, "loss": 0.0174, "step": 52200 }, { "epoch": 1.28, "learning_rate": 1.7477650535648322e-05, "loss": 0.0123, "step": 52210 }, { "epoch": 1.28, "learning_rate": 1.747715798546977e-05, "loss": 0.0125, "step": 52220 }, { "epoch": 1.28, "learning_rate": 1.7476665435291223e-05, "loss": 0.0157, "step": 52230 }, { "epoch": 1.28, "learning_rate": 1.7476172885112673e-05, "loss": 0.0103, "step": 52240 }, { "epoch": 1.28, "learning_rate": 1.7475680334934122e-05, "loss": 0.0207, "step": 52250 }, { "epoch": 1.28, "learning_rate": 1.7475187784755574e-05, "loss": 0.0166, "step": 52260 }, { "epoch": 1.28, "learning_rate": 1.7474695234577023e-05, "loss": 0.0183, "step": 52270 }, { "epoch": 1.28, "learning_rate": 1.7474202684398473e-05, "loss": 0.0116, "step": 52280 }, { "epoch": 1.28, "learning_rate": 1.7473710134219925e-05, "loss": 0.0126, "step": 52290 }, { "epoch": 1.28, "learning_rate": 1.7473217584041374e-05, "loss": 0.0122, "step": 52300 }, { "epoch": 1.29, "learning_rate": 1.7472725033862827e-05, "loss": 0.0125, "step": 52310 }, { "epoch": 1.29, "learning_rate": 1.7472232483684276e-05, "loss": 0.0151, "step": 52320 }, { "epoch": 1.29, "learning_rate": 1.747173993350573e-05, "loss": 0.0149, "step": 52330 }, { "epoch": 1.29, "learning_rate": 1.7471247383327178e-05, "loss": 0.0204, "step": 52340 }, { "epoch": 1.29, "learning_rate": 1.747075483314863e-05, "loss": 0.0129, "step": 52350 }, { "epoch": 1.29, "learning_rate": 1.747026228297008e-05, "loss": 0.0111, "step": 52360 }, { "epoch": 1.29, "learning_rate": 1.7469769732791532e-05, "loss": 0.0133, "step": 52370 }, { "epoch": 1.29, "learning_rate": 1.746927718261298e-05, "loss": 0.0131, "step": 52380 }, { "epoch": 1.29, "learning_rate": 1.746878463243443e-05, "loss": 0.0118, "step": 52390 }, { "epoch": 1.29, "learning_rate": 1.7468292082255883e-05, "loss": 0.0117, "step": 52400 }, { "epoch": 1.29, "learning_rate": 1.7467799532077332e-05, "loss": 0.0217, "step": 52410 }, { "epoch": 1.29, "learning_rate": 1.746730698189878e-05, "loss": 0.0109, "step": 52420 }, { "epoch": 1.29, "learning_rate": 1.7466814431720234e-05, "loss": 0.012, "step": 52430 }, { "epoch": 1.29, "learning_rate": 1.7466321881541683e-05, "loss": 0.0113, "step": 52440 }, { "epoch": 1.29, "learning_rate": 1.7465829331363132e-05, "loss": 0.0101, "step": 52450 }, { "epoch": 1.29, "learning_rate": 1.7465336781184584e-05, "loss": 0.0114, "step": 52460 }, { "epoch": 1.29, "learning_rate": 1.7464844231006034e-05, "loss": 0.0122, "step": 52470 }, { "epoch": 1.29, "learning_rate": 1.7464351680827486e-05, "loss": 0.013, "step": 52480 }, { "epoch": 1.29, "learning_rate": 1.746385913064894e-05, "loss": 0.0201, "step": 52490 }, { "epoch": 1.29, "learning_rate": 1.7463366580470388e-05, "loss": 0.0182, "step": 52500 }, { "epoch": 1.29, "learning_rate": 1.7462874030291837e-05, "loss": 0.0179, "step": 52510 }, { "epoch": 1.29, "learning_rate": 1.746238148011329e-05, "loss": 0.0135, "step": 52520 }, { "epoch": 1.29, "learning_rate": 1.746188892993474e-05, "loss": 0.0225, "step": 52530 }, { "epoch": 1.29, "learning_rate": 1.7461396379756188e-05, "loss": 0.0183, "step": 52540 }, { "epoch": 1.29, "learning_rate": 1.746090382957764e-05, "loss": 0.0192, "step": 52550 }, { "epoch": 1.29, "learning_rate": 1.746041127939909e-05, "loss": 0.0114, "step": 52560 }, { "epoch": 1.29, "learning_rate": 1.7459918729220542e-05, "loss": 0.0137, "step": 52570 }, { "epoch": 1.29, "learning_rate": 1.745942617904199e-05, "loss": 0.0168, "step": 52580 }, { "epoch": 1.29, "learning_rate": 1.745893362886344e-05, "loss": 0.0166, "step": 52590 }, { "epoch": 1.29, "learning_rate": 1.7458441078684893e-05, "loss": 0.0133, "step": 52600 }, { "epoch": 1.29, "learning_rate": 1.7457948528506342e-05, "loss": 0.0145, "step": 52610 }, { "epoch": 1.29, "learning_rate": 1.7457455978327794e-05, "loss": 0.0206, "step": 52620 }, { "epoch": 1.29, "learning_rate": 1.7456963428149244e-05, "loss": 0.011, "step": 52630 }, { "epoch": 1.29, "learning_rate": 1.7456470877970696e-05, "loss": 0.0116, "step": 52640 }, { "epoch": 1.29, "learning_rate": 1.7455978327792145e-05, "loss": 0.0108, "step": 52650 }, { "epoch": 1.29, "learning_rate": 1.7455485777613598e-05, "loss": 0.0175, "step": 52660 }, { "epoch": 1.29, "learning_rate": 1.7454993227435047e-05, "loss": 0.0115, "step": 52670 }, { "epoch": 1.29, "learning_rate": 1.7454500677256496e-05, "loss": 0.0136, "step": 52680 }, { "epoch": 1.29, "learning_rate": 1.745400812707795e-05, "loss": 0.0178, "step": 52690 }, { "epoch": 1.29, "learning_rate": 1.7453515576899398e-05, "loss": 0.0108, "step": 52700 }, { "epoch": 1.29, "learning_rate": 1.7453023026720847e-05, "loss": 0.0122, "step": 52710 }, { "epoch": 1.3, "learning_rate": 1.74525304765423e-05, "loss": 0.0109, "step": 52720 }, { "epoch": 1.3, "learning_rate": 1.745203792636375e-05, "loss": 0.0163, "step": 52730 }, { "epoch": 1.3, "learning_rate": 1.7451545376185198e-05, "loss": 0.0116, "step": 52740 }, { "epoch": 1.3, "learning_rate": 1.745105282600665e-05, "loss": 0.0115, "step": 52750 }, { "epoch": 1.3, "learning_rate": 1.7450560275828103e-05, "loss": 0.0241, "step": 52760 }, { "epoch": 1.3, "learning_rate": 1.7450067725649552e-05, "loss": 0.0174, "step": 52770 }, { "epoch": 1.3, "learning_rate": 1.7449575175471005e-05, "loss": 0.0119, "step": 52780 }, { "epoch": 1.3, "learning_rate": 1.7449082625292454e-05, "loss": 0.0158, "step": 52790 }, { "epoch": 1.3, "learning_rate": 1.7448590075113903e-05, "loss": 0.0138, "step": 52800 }, { "epoch": 1.3, "learning_rate": 1.7448097524935355e-05, "loss": 0.0154, "step": 52810 }, { "epoch": 1.3, "learning_rate": 1.7447604974756805e-05, "loss": 0.0107, "step": 52820 }, { "epoch": 1.3, "learning_rate": 1.7447112424578257e-05, "loss": 0.0129, "step": 52830 }, { "epoch": 1.3, "learning_rate": 1.7446619874399706e-05, "loss": 0.0188, "step": 52840 }, { "epoch": 1.3, "learning_rate": 1.7446127324221155e-05, "loss": 0.0123, "step": 52850 }, { "epoch": 1.3, "learning_rate": 1.7445634774042608e-05, "loss": 0.011, "step": 52860 }, { "epoch": 1.3, "learning_rate": 1.7445142223864057e-05, "loss": 0.0135, "step": 52870 }, { "epoch": 1.3, "learning_rate": 1.7444649673685506e-05, "loss": 0.0118, "step": 52880 }, { "epoch": 1.3, "learning_rate": 1.744415712350696e-05, "loss": 0.0099, "step": 52890 }, { "epoch": 1.3, "learning_rate": 1.7443664573328408e-05, "loss": 0.0109, "step": 52900 }, { "epoch": 1.3, "learning_rate": 1.744317202314986e-05, "loss": 0.0204, "step": 52910 }, { "epoch": 1.3, "learning_rate": 1.744267947297131e-05, "loss": 0.0134, "step": 52920 }, { "epoch": 1.3, "learning_rate": 1.7442186922792762e-05, "loss": 0.014, "step": 52930 }, { "epoch": 1.3, "learning_rate": 1.744169437261421e-05, "loss": 0.0133, "step": 52940 }, { "epoch": 1.3, "learning_rate": 1.7441201822435664e-05, "loss": 0.0121, "step": 52950 }, { "epoch": 1.3, "learning_rate": 1.7440709272257113e-05, "loss": 0.0112, "step": 52960 }, { "epoch": 1.3, "learning_rate": 1.7440216722078562e-05, "loss": 0.016, "step": 52970 }, { "epoch": 1.3, "learning_rate": 1.7439724171900015e-05, "loss": 0.0113, "step": 52980 }, { "epoch": 1.3, "learning_rate": 1.7439231621721464e-05, "loss": 0.0124, "step": 52990 }, { "epoch": 1.3, "learning_rate": 1.7438739071542913e-05, "loss": 0.0177, "step": 53000 }, { "epoch": 1.3, "learning_rate": 1.7438246521364365e-05, "loss": 0.012, "step": 53010 }, { "epoch": 1.3, "learning_rate": 1.7437753971185815e-05, "loss": 0.0174, "step": 53020 }, { "epoch": 1.3, "learning_rate": 1.7437261421007267e-05, "loss": 0.0112, "step": 53030 }, { "epoch": 1.3, "learning_rate": 1.7436768870828716e-05, "loss": 0.0166, "step": 53040 }, { "epoch": 1.3, "learning_rate": 1.743627632065017e-05, "loss": 0.0232, "step": 53050 }, { "epoch": 1.3, "learning_rate": 1.7435783770471618e-05, "loss": 0.0183, "step": 53060 }, { "epoch": 1.3, "learning_rate": 1.743529122029307e-05, "loss": 0.0121, "step": 53070 }, { "epoch": 1.3, "learning_rate": 1.743479867011452e-05, "loss": 0.0231, "step": 53080 }, { "epoch": 1.3, "learning_rate": 1.7434306119935972e-05, "loss": 0.0113, "step": 53090 }, { "epoch": 1.3, "learning_rate": 1.743381356975742e-05, "loss": 0.0163, "step": 53100 }, { "epoch": 1.3, "learning_rate": 1.743332101957887e-05, "loss": 0.011, "step": 53110 }, { "epoch": 1.3, "learning_rate": 1.7432828469400323e-05, "loss": 0.0126, "step": 53120 }, { "epoch": 1.31, "learning_rate": 1.7432335919221772e-05, "loss": 0.0158, "step": 53130 }, { "epoch": 1.31, "learning_rate": 1.743184336904322e-05, "loss": 0.0143, "step": 53140 }, { "epoch": 1.31, "learning_rate": 1.7431350818864674e-05, "loss": 0.0135, "step": 53150 }, { "epoch": 1.31, "learning_rate": 1.7430858268686123e-05, "loss": 0.0137, "step": 53160 }, { "epoch": 1.31, "learning_rate": 1.7430365718507572e-05, "loss": 0.0228, "step": 53170 }, { "epoch": 1.31, "learning_rate": 1.7429873168329025e-05, "loss": 0.0111, "step": 53180 }, { "epoch": 1.31, "learning_rate": 1.7429380618150474e-05, "loss": 0.0142, "step": 53190 }, { "epoch": 1.31, "learning_rate": 1.7428888067971926e-05, "loss": 0.0208, "step": 53200 }, { "epoch": 1.31, "learning_rate": 1.742839551779338e-05, "loss": 0.012, "step": 53210 }, { "epoch": 1.31, "learning_rate": 1.7427902967614828e-05, "loss": 0.0111, "step": 53220 }, { "epoch": 1.31, "learning_rate": 1.7427410417436277e-05, "loss": 0.0125, "step": 53230 }, { "epoch": 1.31, "learning_rate": 1.742691786725773e-05, "loss": 0.0132, "step": 53240 }, { "epoch": 1.31, "learning_rate": 1.742642531707918e-05, "loss": 0.0101, "step": 53250 }, { "epoch": 1.31, "learning_rate": 1.7425932766900628e-05, "loss": 0.0112, "step": 53260 }, { "epoch": 1.31, "learning_rate": 1.742544021672208e-05, "loss": 0.0112, "step": 53270 }, { "epoch": 1.31, "learning_rate": 1.742494766654353e-05, "loss": 0.0207, "step": 53280 }, { "epoch": 1.31, "learning_rate": 1.7424455116364982e-05, "loss": 0.0107, "step": 53290 }, { "epoch": 1.31, "learning_rate": 1.742396256618643e-05, "loss": 0.0147, "step": 53300 }, { "epoch": 1.31, "learning_rate": 1.742347001600788e-05, "loss": 0.0121, "step": 53310 }, { "epoch": 1.31, "learning_rate": 1.7422977465829333e-05, "loss": 0.0108, "step": 53320 }, { "epoch": 1.31, "learning_rate": 1.7422484915650782e-05, "loss": 0.0131, "step": 53330 }, { "epoch": 1.31, "learning_rate": 1.7421992365472235e-05, "loss": 0.0112, "step": 53340 }, { "epoch": 1.31, "learning_rate": 1.7421499815293684e-05, "loss": 0.0137, "step": 53350 }, { "epoch": 1.31, "learning_rate": 1.7421007265115136e-05, "loss": 0.0248, "step": 53360 }, { "epoch": 1.31, "learning_rate": 1.7420514714936586e-05, "loss": 0.0108, "step": 53370 }, { "epoch": 1.31, "learning_rate": 1.7420022164758038e-05, "loss": 0.0246, "step": 53380 }, { "epoch": 1.31, "learning_rate": 1.7419529614579487e-05, "loss": 0.0125, "step": 53390 }, { "epoch": 1.31, "learning_rate": 1.7419037064400936e-05, "loss": 0.0111, "step": 53400 }, { "epoch": 1.31, "learning_rate": 1.741854451422239e-05, "loss": 0.0129, "step": 53410 }, { "epoch": 1.31, "learning_rate": 1.7418051964043838e-05, "loss": 0.0167, "step": 53420 }, { "epoch": 1.31, "learning_rate": 1.7417559413865287e-05, "loss": 0.0147, "step": 53430 }, { "epoch": 1.31, "learning_rate": 1.741706686368674e-05, "loss": 0.0146, "step": 53440 }, { "epoch": 1.31, "learning_rate": 1.741657431350819e-05, "loss": 0.014, "step": 53450 }, { "epoch": 1.31, "learning_rate": 1.7416081763329638e-05, "loss": 0.0139, "step": 53460 }, { "epoch": 1.31, "learning_rate": 1.741558921315109e-05, "loss": 0.0123, "step": 53470 }, { "epoch": 1.31, "learning_rate": 1.7415096662972543e-05, "loss": 0.0155, "step": 53480 }, { "epoch": 1.31, "learning_rate": 1.7414604112793992e-05, "loss": 0.0109, "step": 53490 }, { "epoch": 1.31, "learning_rate": 1.7414111562615445e-05, "loss": 0.0156, "step": 53500 }, { "epoch": 1.31, "learning_rate": 1.7413619012436894e-05, "loss": 0.0151, "step": 53510 }, { "epoch": 1.31, "learning_rate": 1.7413126462258343e-05, "loss": 0.011, "step": 53520 }, { "epoch": 1.32, "learning_rate": 1.7412633912079796e-05, "loss": 0.0147, "step": 53530 }, { "epoch": 1.32, "learning_rate": 1.7412141361901245e-05, "loss": 0.0092, "step": 53540 }, { "epoch": 1.32, "learning_rate": 1.7411648811722697e-05, "loss": 0.0136, "step": 53550 }, { "epoch": 1.32, "learning_rate": 1.7411156261544147e-05, "loss": 0.0342, "step": 53560 }, { "epoch": 1.32, "learning_rate": 1.7410663711365596e-05, "loss": 0.0152, "step": 53570 }, { "epoch": 1.32, "learning_rate": 1.7410171161187048e-05, "loss": 0.01, "step": 53580 }, { "epoch": 1.32, "learning_rate": 1.7409678611008497e-05, "loss": 0.0151, "step": 53590 }, { "epoch": 1.32, "learning_rate": 1.7409186060829947e-05, "loss": 0.0115, "step": 53600 }, { "epoch": 1.32, "learning_rate": 1.74086935106514e-05, "loss": 0.0201, "step": 53610 }, { "epoch": 1.32, "learning_rate": 1.7408200960472848e-05, "loss": 0.0117, "step": 53620 }, { "epoch": 1.32, "learning_rate": 1.74077084102943e-05, "loss": 0.0101, "step": 53630 }, { "epoch": 1.32, "learning_rate": 1.740721586011575e-05, "loss": 0.0119, "step": 53640 }, { "epoch": 1.32, "learning_rate": 1.7406723309937202e-05, "loss": 0.013, "step": 53650 }, { "epoch": 1.32, "learning_rate": 1.740623075975865e-05, "loss": 0.0127, "step": 53660 }, { "epoch": 1.32, "learning_rate": 1.7405738209580104e-05, "loss": 0.0114, "step": 53670 }, { "epoch": 1.32, "learning_rate": 1.7405245659401553e-05, "loss": 0.0128, "step": 53680 }, { "epoch": 1.32, "learning_rate": 1.7404753109223002e-05, "loss": 0.0107, "step": 53690 }, { "epoch": 1.32, "learning_rate": 1.7404260559044455e-05, "loss": 0.0107, "step": 53700 }, { "epoch": 1.32, "learning_rate": 1.7403768008865904e-05, "loss": 0.0135, "step": 53710 }, { "epoch": 1.32, "learning_rate": 1.7403275458687353e-05, "loss": 0.0101, "step": 53720 }, { "epoch": 1.32, "learning_rate": 1.7402782908508806e-05, "loss": 0.0199, "step": 53730 }, { "epoch": 1.32, "learning_rate": 1.7402290358330255e-05, "loss": 0.0124, "step": 53740 }, { "epoch": 1.32, "learning_rate": 1.7401797808151707e-05, "loss": 0.0106, "step": 53750 }, { "epoch": 1.32, "learning_rate": 1.7401305257973157e-05, "loss": 0.0144, "step": 53760 }, { "epoch": 1.32, "learning_rate": 1.740081270779461e-05, "loss": 0.0107, "step": 53770 }, { "epoch": 1.32, "learning_rate": 1.740032015761606e-05, "loss": 0.0114, "step": 53780 }, { "epoch": 1.32, "learning_rate": 1.739982760743751e-05, "loss": 0.0121, "step": 53790 }, { "epoch": 1.32, "learning_rate": 1.739933505725896e-05, "loss": 0.0128, "step": 53800 }, { "epoch": 1.32, "learning_rate": 1.7398842507080413e-05, "loss": 0.0119, "step": 53810 }, { "epoch": 1.32, "learning_rate": 1.7398349956901862e-05, "loss": 0.0106, "step": 53820 }, { "epoch": 1.32, "learning_rate": 1.739785740672331e-05, "loss": 0.0102, "step": 53830 }, { "epoch": 1.32, "learning_rate": 1.7397364856544763e-05, "loss": 0.0151, "step": 53840 }, { "epoch": 1.32, "learning_rate": 1.7396872306366213e-05, "loss": 0.0115, "step": 53850 }, { "epoch": 1.32, "learning_rate": 1.739637975618766e-05, "loss": 0.0152, "step": 53860 }, { "epoch": 1.32, "learning_rate": 1.7395887206009114e-05, "loss": 0.0186, "step": 53870 }, { "epoch": 1.32, "learning_rate": 1.7395394655830563e-05, "loss": 0.013, "step": 53880 }, { "epoch": 1.32, "learning_rate": 1.7394902105652013e-05, "loss": 0.0171, "step": 53890 }, { "epoch": 1.32, "learning_rate": 1.7394409555473465e-05, "loss": 0.013, "step": 53900 }, { "epoch": 1.32, "learning_rate": 1.7393917005294914e-05, "loss": 0.0126, "step": 53910 }, { "epoch": 1.32, "learning_rate": 1.7393424455116367e-05, "loss": 0.0141, "step": 53920 }, { "epoch": 1.32, "learning_rate": 1.739293190493782e-05, "loss": 0.0121, "step": 53930 }, { "epoch": 1.33, "learning_rate": 1.739243935475927e-05, "loss": 0.0109, "step": 53940 }, { "epoch": 1.33, "learning_rate": 1.7391946804580718e-05, "loss": 0.0094, "step": 53950 }, { "epoch": 1.33, "learning_rate": 1.739145425440217e-05, "loss": 0.0132, "step": 53960 }, { "epoch": 1.33, "learning_rate": 1.739096170422362e-05, "loss": 0.0148, "step": 53970 }, { "epoch": 1.33, "learning_rate": 1.739046915404507e-05, "loss": 0.0114, "step": 53980 }, { "epoch": 1.33, "learning_rate": 1.738997660386652e-05, "loss": 0.0103, "step": 53990 }, { "epoch": 1.33, "learning_rate": 1.738948405368797e-05, "loss": 0.0139, "step": 54000 }, { "epoch": 1.33, "learning_rate": 1.738899150350942e-05, "loss": 0.0236, "step": 54010 }, { "epoch": 1.33, "learning_rate": 1.7388498953330872e-05, "loss": 0.0138, "step": 54020 }, { "epoch": 1.33, "learning_rate": 1.738800640315232e-05, "loss": 0.0202, "step": 54030 }, { "epoch": 1.33, "learning_rate": 1.7387513852973773e-05, "loss": 0.0146, "step": 54040 }, { "epoch": 1.33, "learning_rate": 1.7387021302795223e-05, "loss": 0.014, "step": 54050 }, { "epoch": 1.33, "learning_rate": 1.7386528752616675e-05, "loss": 0.0131, "step": 54060 }, { "epoch": 1.33, "learning_rate": 1.7386036202438124e-05, "loss": 0.0142, "step": 54070 }, { "epoch": 1.33, "learning_rate": 1.7385543652259577e-05, "loss": 0.016, "step": 54080 }, { "epoch": 1.33, "learning_rate": 1.7385051102081026e-05, "loss": 0.0129, "step": 54090 }, { "epoch": 1.33, "learning_rate": 1.738455855190248e-05, "loss": 0.0112, "step": 54100 }, { "epoch": 1.33, "learning_rate": 1.7384066001723928e-05, "loss": 0.0153, "step": 54110 }, { "epoch": 1.33, "learning_rate": 1.7383573451545377e-05, "loss": 0.0131, "step": 54120 }, { "epoch": 1.33, "learning_rate": 1.738308090136683e-05, "loss": 0.0156, "step": 54130 }, { "epoch": 1.33, "learning_rate": 1.738258835118828e-05, "loss": 0.0205, "step": 54140 }, { "epoch": 1.33, "learning_rate": 1.7382095801009728e-05, "loss": 0.0159, "step": 54150 }, { "epoch": 1.33, "learning_rate": 1.738160325083118e-05, "loss": 0.0129, "step": 54160 }, { "epoch": 1.33, "learning_rate": 1.738111070065263e-05, "loss": 0.0117, "step": 54170 }, { "epoch": 1.33, "learning_rate": 1.738061815047408e-05, "loss": 0.0106, "step": 54180 }, { "epoch": 1.33, "learning_rate": 1.738012560029553e-05, "loss": 0.0114, "step": 54190 }, { "epoch": 1.33, "learning_rate": 1.7379633050116984e-05, "loss": 0.0108, "step": 54200 }, { "epoch": 1.33, "learning_rate": 1.7379140499938433e-05, "loss": 0.031, "step": 54210 }, { "epoch": 1.33, "learning_rate": 1.7378647949759885e-05, "loss": 0.0123, "step": 54220 }, { "epoch": 1.33, "learning_rate": 1.7378155399581334e-05, "loss": 0.0133, "step": 54230 }, { "epoch": 1.33, "learning_rate": 1.7377662849402784e-05, "loss": 0.0105, "step": 54240 }, { "epoch": 1.33, "learning_rate": 1.7377170299224236e-05, "loss": 0.0114, "step": 54250 }, { "epoch": 1.33, "learning_rate": 1.7376677749045685e-05, "loss": 0.0135, "step": 54260 }, { "epoch": 1.33, "learning_rate": 1.7376185198867138e-05, "loss": 0.0109, "step": 54270 }, { "epoch": 1.33, "learning_rate": 1.7375692648688587e-05, "loss": 0.0185, "step": 54280 }, { "epoch": 1.33, "learning_rate": 1.7375200098510036e-05, "loss": 0.0123, "step": 54290 }, { "epoch": 1.33, "learning_rate": 1.737470754833149e-05, "loss": 0.0122, "step": 54300 }, { "epoch": 1.33, "learning_rate": 1.7374214998152938e-05, "loss": 0.0128, "step": 54310 }, { "epoch": 1.33, "learning_rate": 1.7373722447974387e-05, "loss": 0.0105, "step": 54320 }, { "epoch": 1.33, "learning_rate": 1.737322989779584e-05, "loss": 0.0209, "step": 54330 }, { "epoch": 1.33, "learning_rate": 1.737273734761729e-05, "loss": 0.0121, "step": 54340 }, { "epoch": 1.34, "learning_rate": 1.737224479743874e-05, "loss": 0.0132, "step": 54350 }, { "epoch": 1.34, "learning_rate": 1.737175224726019e-05, "loss": 0.0223, "step": 54360 }, { "epoch": 1.34, "learning_rate": 1.7371259697081643e-05, "loss": 0.0131, "step": 54370 }, { "epoch": 1.34, "learning_rate": 1.7370767146903092e-05, "loss": 0.0125, "step": 54380 }, { "epoch": 1.34, "learning_rate": 1.7370274596724544e-05, "loss": 0.0127, "step": 54390 }, { "epoch": 1.34, "learning_rate": 1.7369782046545994e-05, "loss": 0.0171, "step": 54400 }, { "epoch": 1.34, "learning_rate": 1.7369289496367443e-05, "loss": 0.01, "step": 54410 }, { "epoch": 1.34, "learning_rate": 1.7368796946188895e-05, "loss": 0.011, "step": 54420 }, { "epoch": 1.34, "learning_rate": 1.7368304396010344e-05, "loss": 0.0236, "step": 54430 }, { "epoch": 1.34, "learning_rate": 1.7367811845831794e-05, "loss": 0.0106, "step": 54440 }, { "epoch": 1.34, "learning_rate": 1.7367319295653246e-05, "loss": 0.0107, "step": 54450 }, { "epoch": 1.34, "learning_rate": 1.7366826745474695e-05, "loss": 0.0103, "step": 54460 }, { "epoch": 1.34, "learning_rate": 1.7366334195296148e-05, "loss": 0.0104, "step": 54470 }, { "epoch": 1.34, "learning_rate": 1.7365841645117597e-05, "loss": 0.0184, "step": 54480 }, { "epoch": 1.34, "learning_rate": 1.736534909493905e-05, "loss": 0.013, "step": 54490 }, { "epoch": 1.34, "learning_rate": 1.73648565447605e-05, "loss": 0.0109, "step": 54500 }, { "epoch": 1.34, "learning_rate": 1.736436399458195e-05, "loss": 0.0122, "step": 54510 }, { "epoch": 1.34, "learning_rate": 1.73638714444034e-05, "loss": 0.0108, "step": 54520 }, { "epoch": 1.34, "learning_rate": 1.7363378894224853e-05, "loss": 0.0133, "step": 54530 }, { "epoch": 1.34, "learning_rate": 1.7362886344046302e-05, "loss": 0.0125, "step": 54540 }, { "epoch": 1.34, "learning_rate": 1.736239379386775e-05, "loss": 0.0154, "step": 54550 }, { "epoch": 1.34, "learning_rate": 1.7361901243689204e-05, "loss": 0.0141, "step": 54560 }, { "epoch": 1.34, "learning_rate": 1.7361408693510653e-05, "loss": 0.0111, "step": 54570 }, { "epoch": 1.34, "learning_rate": 1.7360916143332102e-05, "loss": 0.0252, "step": 54580 }, { "epoch": 1.34, "learning_rate": 1.7360423593153555e-05, "loss": 0.0136, "step": 54590 }, { "epoch": 1.34, "learning_rate": 1.7359931042975004e-05, "loss": 0.0123, "step": 54600 }, { "epoch": 1.34, "learning_rate": 1.7359438492796453e-05, "loss": 0.0114, "step": 54610 }, { "epoch": 1.34, "learning_rate": 1.7358945942617905e-05, "loss": 0.0122, "step": 54620 }, { "epoch": 1.34, "learning_rate": 1.7358453392439355e-05, "loss": 0.0117, "step": 54630 }, { "epoch": 1.34, "learning_rate": 1.7357960842260807e-05, "loss": 0.0112, "step": 54640 }, { "epoch": 1.34, "learning_rate": 1.735746829208226e-05, "loss": 0.0144, "step": 54650 }, { "epoch": 1.34, "learning_rate": 1.735697574190371e-05, "loss": 0.0122, "step": 54660 }, { "epoch": 1.34, "learning_rate": 1.7356483191725158e-05, "loss": 0.0098, "step": 54670 }, { "epoch": 1.34, "learning_rate": 1.735599064154661e-05, "loss": 0.011, "step": 54680 }, { "epoch": 1.34, "learning_rate": 1.735549809136806e-05, "loss": 0.023, "step": 54690 }, { "epoch": 1.34, "learning_rate": 1.735500554118951e-05, "loss": 0.0207, "step": 54700 }, { "epoch": 1.34, "learning_rate": 1.735451299101096e-05, "loss": 0.0163, "step": 54710 }, { "epoch": 1.34, "learning_rate": 1.735402044083241e-05, "loss": 0.0162, "step": 54720 }, { "epoch": 1.34, "learning_rate": 1.735352789065386e-05, "loss": 0.0162, "step": 54730 }, { "epoch": 1.34, "learning_rate": 1.7353035340475312e-05, "loss": 0.0105, "step": 54740 }, { "epoch": 1.35, "learning_rate": 1.735254279029676e-05, "loss": 0.0254, "step": 54750 }, { "epoch": 1.35, "learning_rate": 1.7352050240118214e-05, "loss": 0.0237, "step": 54760 }, { "epoch": 1.35, "learning_rate": 1.7351557689939663e-05, "loss": 0.015, "step": 54770 }, { "epoch": 1.35, "learning_rate": 1.7351065139761115e-05, "loss": 0.0128, "step": 54780 }, { "epoch": 1.35, "learning_rate": 1.7350572589582565e-05, "loss": 0.011, "step": 54790 }, { "epoch": 1.35, "learning_rate": 1.7350080039404017e-05, "loss": 0.0134, "step": 54800 }, { "epoch": 1.35, "learning_rate": 1.7349587489225466e-05, "loss": 0.0124, "step": 54810 }, { "epoch": 1.35, "learning_rate": 1.734909493904692e-05, "loss": 0.0117, "step": 54820 }, { "epoch": 1.35, "learning_rate": 1.7348602388868368e-05, "loss": 0.013, "step": 54830 }, { "epoch": 1.35, "learning_rate": 1.7348109838689817e-05, "loss": 0.0102, "step": 54840 }, { "epoch": 1.35, "learning_rate": 1.734761728851127e-05, "loss": 0.0121, "step": 54850 }, { "epoch": 1.35, "learning_rate": 1.734712473833272e-05, "loss": 0.0104, "step": 54860 }, { "epoch": 1.35, "learning_rate": 1.7346632188154168e-05, "loss": 0.0219, "step": 54870 }, { "epoch": 1.35, "learning_rate": 1.734613963797562e-05, "loss": 0.0148, "step": 54880 }, { "epoch": 1.35, "learning_rate": 1.734564708779707e-05, "loss": 0.0104, "step": 54890 }, { "epoch": 1.35, "learning_rate": 1.734515453761852e-05, "loss": 0.0112, "step": 54900 }, { "epoch": 1.35, "learning_rate": 1.734466198743997e-05, "loss": 0.0116, "step": 54910 }, { "epoch": 1.35, "learning_rate": 1.7344169437261424e-05, "loss": 0.0126, "step": 54920 }, { "epoch": 1.35, "learning_rate": 1.7343676887082873e-05, "loss": 0.0122, "step": 54930 }, { "epoch": 1.35, "learning_rate": 1.7343184336904326e-05, "loss": 0.011, "step": 54940 }, { "epoch": 1.35, "learning_rate": 1.7342691786725775e-05, "loss": 0.0129, "step": 54950 }, { "epoch": 1.35, "learning_rate": 1.7342199236547224e-05, "loss": 0.0157, "step": 54960 }, { "epoch": 1.35, "learning_rate": 1.7341706686368676e-05, "loss": 0.011, "step": 54970 }, { "epoch": 1.35, "learning_rate": 1.7341214136190126e-05, "loss": 0.0148, "step": 54980 }, { "epoch": 1.35, "learning_rate": 1.7340721586011578e-05, "loss": 0.014, "step": 54990 }, { "epoch": 1.35, "learning_rate": 1.7340229035833027e-05, "loss": 0.0279, "step": 55000 }, { "epoch": 1.35, "eval_bleu": 97.8806, "eval_gen_len_ratio": 1.0014, "eval_loss": 0.009714646264910698, "eval_runtime": 260.4421, "eval_samples_per_second": 7.165, "eval_steps_per_second": 0.227, "step": 55000 }, { "epoch": 1.35, "learning_rate": 1.7339736485654476e-05, "loss": 0.0127, "step": 55010 }, { "epoch": 1.35, "learning_rate": 1.733924393547593e-05, "loss": 0.0119, "step": 55020 }, { "epoch": 1.35, "learning_rate": 1.7338751385297378e-05, "loss": 0.0109, "step": 55030 }, { "epoch": 1.35, "learning_rate": 1.7338258835118827e-05, "loss": 0.0108, "step": 55040 }, { "epoch": 1.35, "learning_rate": 1.733776628494028e-05, "loss": 0.0117, "step": 55050 }, { "epoch": 1.35, "learning_rate": 1.733727373476173e-05, "loss": 0.0101, "step": 55060 }, { "epoch": 1.35, "learning_rate": 1.733678118458318e-05, "loss": 0.0109, "step": 55070 }, { "epoch": 1.35, "learning_rate": 1.733628863440463e-05, "loss": 0.0153, "step": 55080 }, { "epoch": 1.35, "learning_rate": 1.7335796084226083e-05, "loss": 0.0131, "step": 55090 }, { "epoch": 1.35, "learning_rate": 1.7335303534047532e-05, "loss": 0.0196, "step": 55100 }, { "epoch": 1.35, "learning_rate": 1.7334810983868985e-05, "loss": 0.0123, "step": 55110 }, { "epoch": 1.35, "learning_rate": 1.7334318433690434e-05, "loss": 0.011, "step": 55120 }, { "epoch": 1.35, "learning_rate": 1.7333825883511883e-05, "loss": 0.0123, "step": 55130 }, { "epoch": 1.35, "learning_rate": 1.7333333333333336e-05, "loss": 0.0119, "step": 55140 }, { "epoch": 1.35, "learning_rate": 1.7332840783154785e-05, "loss": 0.0139, "step": 55150 }, { "epoch": 1.36, "learning_rate": 1.7332348232976234e-05, "loss": 0.0106, "step": 55160 }, { "epoch": 1.36, "learning_rate": 1.7331855682797686e-05, "loss": 0.0117, "step": 55170 }, { "epoch": 1.36, "learning_rate": 1.7331363132619136e-05, "loss": 0.0104, "step": 55180 }, { "epoch": 1.36, "learning_rate": 1.7330870582440588e-05, "loss": 0.0125, "step": 55190 }, { "epoch": 1.36, "learning_rate": 1.7330378032262037e-05, "loss": 0.0169, "step": 55200 }, { "epoch": 1.36, "learning_rate": 1.732988548208349e-05, "loss": 0.0178, "step": 55210 }, { "epoch": 1.36, "learning_rate": 1.732939293190494e-05, "loss": 0.0165, "step": 55220 }, { "epoch": 1.36, "learning_rate": 1.732890038172639e-05, "loss": 0.01, "step": 55230 }, { "epoch": 1.36, "learning_rate": 1.732840783154784e-05, "loss": 0.0119, "step": 55240 }, { "epoch": 1.36, "learning_rate": 1.7327915281369293e-05, "loss": 0.0128, "step": 55250 }, { "epoch": 1.36, "learning_rate": 1.7327422731190742e-05, "loss": 0.0174, "step": 55260 }, { "epoch": 1.36, "learning_rate": 1.732693018101219e-05, "loss": 0.013, "step": 55270 }, { "epoch": 1.36, "learning_rate": 1.7326437630833644e-05, "loss": 0.012, "step": 55280 }, { "epoch": 1.36, "learning_rate": 1.7325945080655093e-05, "loss": 0.0165, "step": 55290 }, { "epoch": 1.36, "learning_rate": 1.7325452530476542e-05, "loss": 0.011, "step": 55300 }, { "epoch": 1.36, "learning_rate": 1.7324959980297995e-05, "loss": 0.0112, "step": 55310 }, { "epoch": 1.36, "learning_rate": 1.7324467430119444e-05, "loss": 0.0112, "step": 55320 }, { "epoch": 1.36, "learning_rate": 1.7323974879940893e-05, "loss": 0.0138, "step": 55330 }, { "epoch": 1.36, "learning_rate": 1.7323482329762346e-05, "loss": 0.0103, "step": 55340 }, { "epoch": 1.36, "learning_rate": 1.7322989779583795e-05, "loss": 0.0117, "step": 55350 }, { "epoch": 1.36, "learning_rate": 1.7322497229405247e-05, "loss": 0.0136, "step": 55360 }, { "epoch": 1.36, "learning_rate": 1.73220046792267e-05, "loss": 0.0101, "step": 55370 }, { "epoch": 1.36, "learning_rate": 1.732151212904815e-05, "loss": 0.0121, "step": 55380 }, { "epoch": 1.36, "learning_rate": 1.7321019578869598e-05, "loss": 0.0135, "step": 55390 }, { "epoch": 1.36, "learning_rate": 1.732052702869105e-05, "loss": 0.0116, "step": 55400 }, { "epoch": 1.36, "learning_rate": 1.73200344785125e-05, "loss": 0.0141, "step": 55410 }, { "epoch": 1.36, "learning_rate": 1.731954192833395e-05, "loss": 0.013, "step": 55420 }, { "epoch": 1.36, "learning_rate": 1.73190493781554e-05, "loss": 0.0125, "step": 55430 }, { "epoch": 1.36, "learning_rate": 1.731855682797685e-05, "loss": 0.0114, "step": 55440 }, { "epoch": 1.36, "learning_rate": 1.73180642777983e-05, "loss": 0.0144, "step": 55450 }, { "epoch": 1.36, "learning_rate": 1.7317571727619752e-05, "loss": 0.0129, "step": 55460 }, { "epoch": 1.36, "learning_rate": 1.73170791774412e-05, "loss": 0.0184, "step": 55470 }, { "epoch": 1.36, "learning_rate": 1.7316586627262654e-05, "loss": 0.0114, "step": 55480 }, { "epoch": 1.36, "learning_rate": 1.7316094077084103e-05, "loss": 0.0135, "step": 55490 }, { "epoch": 1.36, "learning_rate": 1.7315601526905556e-05, "loss": 0.012, "step": 55500 }, { "epoch": 1.36, "learning_rate": 1.7315108976727005e-05, "loss": 0.0311, "step": 55510 }, { "epoch": 1.36, "learning_rate": 1.7314616426548458e-05, "loss": 0.0123, "step": 55520 }, { "epoch": 1.36, "learning_rate": 1.7314123876369907e-05, "loss": 0.0125, "step": 55530 }, { "epoch": 1.36, "learning_rate": 1.731363132619136e-05, "loss": 0.0118, "step": 55540 }, { "epoch": 1.36, "learning_rate": 1.731313877601281e-05, "loss": 0.0121, "step": 55550 }, { "epoch": 1.36, "learning_rate": 1.7312646225834257e-05, "loss": 0.0132, "step": 55560 }, { "epoch": 1.37, "learning_rate": 1.731215367565571e-05, "loss": 0.0119, "step": 55570 }, { "epoch": 1.37, "learning_rate": 1.731166112547716e-05, "loss": 0.01, "step": 55580 }, { "epoch": 1.37, "learning_rate": 1.731116857529861e-05, "loss": 0.0131, "step": 55590 }, { "epoch": 1.37, "learning_rate": 1.731067602512006e-05, "loss": 0.0126, "step": 55600 }, { "epoch": 1.37, "learning_rate": 1.731018347494151e-05, "loss": 0.0114, "step": 55610 }, { "epoch": 1.37, "learning_rate": 1.730969092476296e-05, "loss": 0.0138, "step": 55620 }, { "epoch": 1.37, "learning_rate": 1.7309198374584412e-05, "loss": 0.0099, "step": 55630 }, { "epoch": 1.37, "learning_rate": 1.7308705824405864e-05, "loss": 0.0118, "step": 55640 }, { "epoch": 1.37, "learning_rate": 1.7308213274227313e-05, "loss": 0.0165, "step": 55650 }, { "epoch": 1.37, "learning_rate": 1.7307720724048766e-05, "loss": 0.0193, "step": 55660 }, { "epoch": 1.37, "learning_rate": 1.7307228173870215e-05, "loss": 0.0106, "step": 55670 }, { "epoch": 1.37, "learning_rate": 1.7306735623691664e-05, "loss": 0.0175, "step": 55680 }, { "epoch": 1.37, "learning_rate": 1.7306243073513117e-05, "loss": 0.0107, "step": 55690 }, { "epoch": 1.37, "learning_rate": 1.7305750523334566e-05, "loss": 0.0125, "step": 55700 }, { "epoch": 1.37, "learning_rate": 1.730525797315602e-05, "loss": 0.0126, "step": 55710 }, { "epoch": 1.37, "learning_rate": 1.7304765422977468e-05, "loss": 0.0135, "step": 55720 }, { "epoch": 1.37, "learning_rate": 1.7304272872798917e-05, "loss": 0.0198, "step": 55730 }, { "epoch": 1.37, "learning_rate": 1.730378032262037e-05, "loss": 0.0138, "step": 55740 }, { "epoch": 1.37, "learning_rate": 1.730328777244182e-05, "loss": 0.0115, "step": 55750 }, { "epoch": 1.37, "learning_rate": 1.7302795222263268e-05, "loss": 0.021, "step": 55760 }, { "epoch": 1.37, "learning_rate": 1.730230267208472e-05, "loss": 0.0145, "step": 55770 }, { "epoch": 1.37, "learning_rate": 1.730181012190617e-05, "loss": 0.0168, "step": 55780 }, { "epoch": 1.37, "learning_rate": 1.7301317571727622e-05, "loss": 0.0107, "step": 55790 }, { "epoch": 1.37, "learning_rate": 1.730082502154907e-05, "loss": 0.0123, "step": 55800 }, { "epoch": 1.37, "learning_rate": 1.7300332471370523e-05, "loss": 0.0411, "step": 55810 }, { "epoch": 1.37, "learning_rate": 1.7299839921191973e-05, "loss": 0.0118, "step": 55820 }, { "epoch": 1.37, "learning_rate": 1.7299347371013425e-05, "loss": 0.0114, "step": 55830 }, { "epoch": 1.37, "learning_rate": 1.7298854820834874e-05, "loss": 0.0121, "step": 55840 }, { "epoch": 1.37, "learning_rate": 1.7298362270656323e-05, "loss": 0.0263, "step": 55850 }, { "epoch": 1.37, "learning_rate": 1.7297869720477776e-05, "loss": 0.0117, "step": 55860 }, { "epoch": 1.37, "learning_rate": 1.7297377170299225e-05, "loss": 0.0115, "step": 55870 }, { "epoch": 1.37, "learning_rate": 1.7296884620120674e-05, "loss": 0.0142, "step": 55880 }, { "epoch": 1.37, "learning_rate": 1.7296392069942127e-05, "loss": 0.0118, "step": 55890 }, { "epoch": 1.37, "learning_rate": 1.7295899519763576e-05, "loss": 0.0187, "step": 55900 }, { "epoch": 1.37, "learning_rate": 1.729540696958503e-05, "loss": 0.0119, "step": 55910 }, { "epoch": 1.37, "learning_rate": 1.7294914419406478e-05, "loss": 0.0126, "step": 55920 }, { "epoch": 1.37, "learning_rate": 1.729442186922793e-05, "loss": 0.0132, "step": 55930 }, { "epoch": 1.37, "learning_rate": 1.729392931904938e-05, "loss": 0.0135, "step": 55940 }, { "epoch": 1.37, "learning_rate": 1.7293436768870832e-05, "loss": 0.0158, "step": 55950 }, { "epoch": 1.37, "learning_rate": 1.729294421869228e-05, "loss": 0.0099, "step": 55960 }, { "epoch": 1.38, "learning_rate": 1.7292451668513734e-05, "loss": 0.0108, "step": 55970 }, { "epoch": 1.38, "learning_rate": 1.7291959118335183e-05, "loss": 0.0128, "step": 55980 }, { "epoch": 1.38, "learning_rate": 1.7291466568156632e-05, "loss": 0.0116, "step": 55990 }, { "epoch": 1.38, "learning_rate": 1.7290974017978084e-05, "loss": 0.0104, "step": 56000 }, { "epoch": 1.38, "learning_rate": 1.7290481467799534e-05, "loss": 0.0106, "step": 56010 }, { "epoch": 1.38, "learning_rate": 1.7289988917620983e-05, "loss": 0.0155, "step": 56020 }, { "epoch": 1.38, "learning_rate": 1.7289496367442435e-05, "loss": 0.0119, "step": 56030 }, { "epoch": 1.38, "learning_rate": 1.7289003817263884e-05, "loss": 0.013, "step": 56040 }, { "epoch": 1.38, "learning_rate": 1.7288511267085334e-05, "loss": 0.0193, "step": 56050 }, { "epoch": 1.38, "learning_rate": 1.7288018716906786e-05, "loss": 0.0182, "step": 56060 }, { "epoch": 1.38, "learning_rate": 1.7287526166728235e-05, "loss": 0.0105, "step": 56070 }, { "epoch": 1.38, "learning_rate": 1.7287033616549688e-05, "loss": 0.0106, "step": 56080 }, { "epoch": 1.38, "learning_rate": 1.728654106637114e-05, "loss": 0.0107, "step": 56090 }, { "epoch": 1.38, "learning_rate": 1.728604851619259e-05, "loss": 0.0216, "step": 56100 }, { "epoch": 1.38, "learning_rate": 1.728555596601404e-05, "loss": 0.0108, "step": 56110 }, { "epoch": 1.38, "learning_rate": 1.728506341583549e-05, "loss": 0.012, "step": 56120 }, { "epoch": 1.38, "learning_rate": 1.728457086565694e-05, "loss": 0.0148, "step": 56130 }, { "epoch": 1.38, "learning_rate": 1.728407831547839e-05, "loss": 0.0137, "step": 56140 }, { "epoch": 1.38, "learning_rate": 1.7283585765299842e-05, "loss": 0.0124, "step": 56150 }, { "epoch": 1.38, "learning_rate": 1.728309321512129e-05, "loss": 0.0111, "step": 56160 }, { "epoch": 1.38, "learning_rate": 1.728260066494274e-05, "loss": 0.0114, "step": 56170 }, { "epoch": 1.38, "learning_rate": 1.7282108114764193e-05, "loss": 0.0316, "step": 56180 }, { "epoch": 1.38, "learning_rate": 1.7281615564585642e-05, "loss": 0.0225, "step": 56190 }, { "epoch": 1.38, "learning_rate": 1.7281123014407094e-05, "loss": 0.0145, "step": 56200 }, { "epoch": 1.38, "learning_rate": 1.7280630464228544e-05, "loss": 0.0207, "step": 56210 }, { "epoch": 1.38, "learning_rate": 1.7280137914049996e-05, "loss": 0.012, "step": 56220 }, { "epoch": 1.38, "learning_rate": 1.7279645363871445e-05, "loss": 0.0161, "step": 56230 }, { "epoch": 1.38, "learning_rate": 1.7279152813692898e-05, "loss": 0.0107, "step": 56240 }, { "epoch": 1.38, "learning_rate": 1.7278660263514347e-05, "loss": 0.0102, "step": 56250 }, { "epoch": 1.38, "learning_rate": 1.72781677133358e-05, "loss": 0.0144, "step": 56260 }, { "epoch": 1.38, "learning_rate": 1.727767516315725e-05, "loss": 0.0122, "step": 56270 }, { "epoch": 1.38, "learning_rate": 1.7277182612978698e-05, "loss": 0.011, "step": 56280 }, { "epoch": 1.38, "learning_rate": 1.727669006280015e-05, "loss": 0.012, "step": 56290 }, { "epoch": 1.38, "learning_rate": 1.72761975126216e-05, "loss": 0.0121, "step": 56300 }, { "epoch": 1.38, "learning_rate": 1.727570496244305e-05, "loss": 0.0225, "step": 56310 }, { "epoch": 1.38, "learning_rate": 1.72752124122645e-05, "loss": 0.0108, "step": 56320 }, { "epoch": 1.38, "learning_rate": 1.727471986208595e-05, "loss": 0.0108, "step": 56330 }, { "epoch": 1.38, "learning_rate": 1.72742273119074e-05, "loss": 0.0205, "step": 56340 }, { "epoch": 1.38, "learning_rate": 1.7273734761728852e-05, "loss": 0.0125, "step": 56350 }, { "epoch": 1.38, "learning_rate": 1.7273242211550305e-05, "loss": 0.0156, "step": 56360 }, { "epoch": 1.38, "learning_rate": 1.7272749661371754e-05, "loss": 0.0104, "step": 56370 }, { "epoch": 1.39, "learning_rate": 1.7272257111193206e-05, "loss": 0.0138, "step": 56380 }, { "epoch": 1.39, "learning_rate": 1.7271764561014655e-05, "loss": 0.019, "step": 56390 }, { "epoch": 1.39, "learning_rate": 1.7271272010836105e-05, "loss": 0.0169, "step": 56400 }, { "epoch": 1.39, "learning_rate": 1.7270779460657557e-05, "loss": 0.0127, "step": 56410 }, { "epoch": 1.39, "learning_rate": 1.7270286910479006e-05, "loss": 0.01, "step": 56420 }, { "epoch": 1.39, "learning_rate": 1.726979436030046e-05, "loss": 0.0118, "step": 56430 }, { "epoch": 1.39, "learning_rate": 1.7269301810121908e-05, "loss": 0.0122, "step": 56440 }, { "epoch": 1.39, "learning_rate": 1.7268809259943357e-05, "loss": 0.0107, "step": 56450 }, { "epoch": 1.39, "learning_rate": 1.726831670976481e-05, "loss": 0.0194, "step": 56460 }, { "epoch": 1.39, "learning_rate": 1.726782415958626e-05, "loss": 0.0136, "step": 56470 }, { "epoch": 1.39, "learning_rate": 1.7267331609407708e-05, "loss": 0.0119, "step": 56480 }, { "epoch": 1.39, "learning_rate": 1.726683905922916e-05, "loss": 0.0133, "step": 56490 }, { "epoch": 1.39, "learning_rate": 1.726634650905061e-05, "loss": 0.0109, "step": 56500 }, { "epoch": 1.39, "learning_rate": 1.7265853958872062e-05, "loss": 0.0208, "step": 56510 }, { "epoch": 1.39, "learning_rate": 1.726536140869351e-05, "loss": 0.0161, "step": 56520 }, { "epoch": 1.39, "learning_rate": 1.7264868858514964e-05, "loss": 0.0137, "step": 56530 }, { "epoch": 1.39, "learning_rate": 1.7264376308336413e-05, "loss": 0.0116, "step": 56540 }, { "epoch": 1.39, "learning_rate": 1.7263883758157866e-05, "loss": 0.012, "step": 56550 }, { "epoch": 1.39, "learning_rate": 1.7263391207979315e-05, "loss": 0.0113, "step": 56560 }, { "epoch": 1.39, "learning_rate": 1.7262898657800764e-05, "loss": 0.0134, "step": 56570 }, { "epoch": 1.39, "learning_rate": 1.7262406107622216e-05, "loss": 0.0143, "step": 56580 }, { "epoch": 1.39, "learning_rate": 1.7261913557443665e-05, "loss": 0.0102, "step": 56590 }, { "epoch": 1.39, "learning_rate": 1.7261421007265115e-05, "loss": 0.0189, "step": 56600 }, { "epoch": 1.39, "learning_rate": 1.7260928457086567e-05, "loss": 0.0122, "step": 56610 }, { "epoch": 1.39, "learning_rate": 1.7260435906908016e-05, "loss": 0.0146, "step": 56620 }, { "epoch": 1.39, "learning_rate": 1.725994335672947e-05, "loss": 0.0161, "step": 56630 }, { "epoch": 1.39, "learning_rate": 1.7259450806550918e-05, "loss": 0.0109, "step": 56640 }, { "epoch": 1.39, "learning_rate": 1.725895825637237e-05, "loss": 0.0195, "step": 56650 }, { "epoch": 1.39, "learning_rate": 1.725846570619382e-05, "loss": 0.026, "step": 56660 }, { "epoch": 1.39, "learning_rate": 1.7257973156015272e-05, "loss": 0.0102, "step": 56670 }, { "epoch": 1.39, "learning_rate": 1.725748060583672e-05, "loss": 0.0103, "step": 56680 }, { "epoch": 1.39, "learning_rate": 1.7256988055658174e-05, "loss": 0.02, "step": 56690 }, { "epoch": 1.39, "learning_rate": 1.7256495505479623e-05, "loss": 0.0121, "step": 56700 }, { "epoch": 1.39, "learning_rate": 1.7256002955301072e-05, "loss": 0.0125, "step": 56710 }, { "epoch": 1.39, "learning_rate": 1.7255510405122525e-05, "loss": 0.0111, "step": 56720 }, { "epoch": 1.39, "learning_rate": 1.7255017854943974e-05, "loss": 0.012, "step": 56730 }, { "epoch": 1.39, "learning_rate": 1.7254525304765423e-05, "loss": 0.0112, "step": 56740 }, { "epoch": 1.39, "learning_rate": 1.7254032754586876e-05, "loss": 0.0117, "step": 56750 }, { "epoch": 1.39, "learning_rate": 1.7253540204408325e-05, "loss": 0.0101, "step": 56760 }, { "epoch": 1.39, "learning_rate": 1.7253047654229774e-05, "loss": 0.0105, "step": 56770 }, { "epoch": 1.39, "learning_rate": 1.7252555104051226e-05, "loss": 0.0119, "step": 56780 }, { "epoch": 1.4, "learning_rate": 1.7252062553872676e-05, "loss": 0.0211, "step": 56790 }, { "epoch": 1.4, "learning_rate": 1.7251570003694128e-05, "loss": 0.0106, "step": 56800 }, { "epoch": 1.4, "learning_rate": 1.725107745351558e-05, "loss": 0.0123, "step": 56810 }, { "epoch": 1.4, "learning_rate": 1.725058490333703e-05, "loss": 0.013, "step": 56820 }, { "epoch": 1.4, "learning_rate": 1.725009235315848e-05, "loss": 0.0105, "step": 56830 }, { "epoch": 1.4, "learning_rate": 1.724959980297993e-05, "loss": 0.0105, "step": 56840 }, { "epoch": 1.4, "learning_rate": 1.724910725280138e-05, "loss": 0.0121, "step": 56850 }, { "epoch": 1.4, "learning_rate": 1.724861470262283e-05, "loss": 0.0166, "step": 56860 }, { "epoch": 1.4, "learning_rate": 1.7248122152444282e-05, "loss": 0.0104, "step": 56870 }, { "epoch": 1.4, "learning_rate": 1.724762960226573e-05, "loss": 0.0104, "step": 56880 }, { "epoch": 1.4, "learning_rate": 1.724713705208718e-05, "loss": 0.0113, "step": 56890 }, { "epoch": 1.4, "learning_rate": 1.7246644501908633e-05, "loss": 0.0127, "step": 56900 }, { "epoch": 1.4, "learning_rate": 1.7246151951730082e-05, "loss": 0.0109, "step": 56910 }, { "epoch": 1.4, "learning_rate": 1.7245659401551535e-05, "loss": 0.0144, "step": 56920 }, { "epoch": 1.4, "learning_rate": 1.7245166851372984e-05, "loss": 0.0128, "step": 56930 }, { "epoch": 1.4, "learning_rate": 1.7244674301194437e-05, "loss": 0.0152, "step": 56940 }, { "epoch": 1.4, "learning_rate": 1.7244181751015886e-05, "loss": 0.0133, "step": 56950 }, { "epoch": 1.4, "learning_rate": 1.7243689200837338e-05, "loss": 0.0119, "step": 56960 }, { "epoch": 1.4, "learning_rate": 1.7243196650658787e-05, "loss": 0.0105, "step": 56970 }, { "epoch": 1.4, "learning_rate": 1.724270410048024e-05, "loss": 0.0134, "step": 56980 }, { "epoch": 1.4, "learning_rate": 1.724221155030169e-05, "loss": 0.0105, "step": 56990 }, { "epoch": 1.4, "learning_rate": 1.7241719000123138e-05, "loss": 0.011, "step": 57000 }, { "epoch": 1.4, "learning_rate": 1.724122644994459e-05, "loss": 0.0144, "step": 57010 }, { "epoch": 1.4, "learning_rate": 1.724073389976604e-05, "loss": 0.0148, "step": 57020 }, { "epoch": 1.4, "learning_rate": 1.724024134958749e-05, "loss": 0.0178, "step": 57030 }, { "epoch": 1.4, "learning_rate": 1.723974879940894e-05, "loss": 0.0136, "step": 57040 }, { "epoch": 1.4, "learning_rate": 1.723925624923039e-05, "loss": 0.0173, "step": 57050 }, { "epoch": 1.4, "learning_rate": 1.723876369905184e-05, "loss": 0.0114, "step": 57060 }, { "epoch": 1.4, "learning_rate": 1.7238271148873292e-05, "loss": 0.01, "step": 57070 }, { "epoch": 1.4, "learning_rate": 1.7237778598694745e-05, "loss": 0.0119, "step": 57080 }, { "epoch": 1.4, "learning_rate": 1.7237286048516194e-05, "loss": 0.0124, "step": 57090 }, { "epoch": 1.4, "learning_rate": 1.7236793498337647e-05, "loss": 0.012, "step": 57100 }, { "epoch": 1.4, "learning_rate": 1.7236300948159096e-05, "loss": 0.0141, "step": 57110 }, { "epoch": 1.4, "learning_rate": 1.7235808397980545e-05, "loss": 0.0116, "step": 57120 }, { "epoch": 1.4, "learning_rate": 1.7235315847801997e-05, "loss": 0.0112, "step": 57130 }, { "epoch": 1.4, "learning_rate": 1.7234823297623447e-05, "loss": 0.0116, "step": 57140 }, { "epoch": 1.4, "learning_rate": 1.72343307474449e-05, "loss": 0.0121, "step": 57150 }, { "epoch": 1.4, "learning_rate": 1.7233838197266348e-05, "loss": 0.0124, "step": 57160 }, { "epoch": 1.4, "learning_rate": 1.7233345647087797e-05, "loss": 0.0102, "step": 57170 }, { "epoch": 1.4, "learning_rate": 1.723285309690925e-05, "loss": 0.0126, "step": 57180 }, { "epoch": 1.4, "learning_rate": 1.72323605467307e-05, "loss": 0.0123, "step": 57190 }, { "epoch": 1.41, "learning_rate": 1.7231867996552148e-05, "loss": 0.0124, "step": 57200 }, { "epoch": 1.41, "learning_rate": 1.72313754463736e-05, "loss": 0.0116, "step": 57210 }, { "epoch": 1.41, "learning_rate": 1.723088289619505e-05, "loss": 0.0102, "step": 57220 }, { "epoch": 1.41, "learning_rate": 1.7230390346016502e-05, "loss": 0.0116, "step": 57230 }, { "epoch": 1.41, "learning_rate": 1.722989779583795e-05, "loss": 0.0099, "step": 57240 }, { "epoch": 1.41, "learning_rate": 1.7229405245659404e-05, "loss": 0.0143, "step": 57250 }, { "epoch": 1.41, "learning_rate": 1.7228912695480853e-05, "loss": 0.0121, "step": 57260 }, { "epoch": 1.41, "learning_rate": 1.7228420145302306e-05, "loss": 0.0115, "step": 57270 }, { "epoch": 1.41, "learning_rate": 1.7227927595123755e-05, "loss": 0.0137, "step": 57280 }, { "epoch": 1.41, "learning_rate": 1.7227435044945204e-05, "loss": 0.0101, "step": 57290 }, { "epoch": 1.41, "learning_rate": 1.7226942494766657e-05, "loss": 0.0108, "step": 57300 }, { "epoch": 1.41, "learning_rate": 1.7226449944588106e-05, "loss": 0.013, "step": 57310 }, { "epoch": 1.41, "learning_rate": 1.7225957394409555e-05, "loss": 0.0122, "step": 57320 }, { "epoch": 1.41, "learning_rate": 1.7225464844231008e-05, "loss": 0.0104, "step": 57330 }, { "epoch": 1.41, "learning_rate": 1.7224972294052457e-05, "loss": 0.022, "step": 57340 }, { "epoch": 1.41, "learning_rate": 1.722447974387391e-05, "loss": 0.0126, "step": 57350 }, { "epoch": 1.41, "learning_rate": 1.722398719369536e-05, "loss": 0.0118, "step": 57360 }, { "epoch": 1.41, "learning_rate": 1.722349464351681e-05, "loss": 0.0121, "step": 57370 }, { "epoch": 1.41, "learning_rate": 1.722300209333826e-05, "loss": 0.011, "step": 57380 }, { "epoch": 1.41, "learning_rate": 1.7222509543159713e-05, "loss": 0.0123, "step": 57390 }, { "epoch": 1.41, "learning_rate": 1.7222016992981162e-05, "loss": 0.0112, "step": 57400 }, { "epoch": 1.41, "learning_rate": 1.7221524442802614e-05, "loss": 0.0217, "step": 57410 }, { "epoch": 1.41, "learning_rate": 1.7221031892624063e-05, "loss": 0.011, "step": 57420 }, { "epoch": 1.41, "learning_rate": 1.7220539342445513e-05, "loss": 0.0129, "step": 57430 }, { "epoch": 1.41, "learning_rate": 1.7220046792266965e-05, "loss": 0.0123, "step": 57440 }, { "epoch": 1.41, "learning_rate": 1.7219554242088414e-05, "loss": 0.0118, "step": 57450 }, { "epoch": 1.41, "learning_rate": 1.7219061691909863e-05, "loss": 0.0147, "step": 57460 }, { "epoch": 1.41, "learning_rate": 1.7218569141731316e-05, "loss": 0.0167, "step": 57470 }, { "epoch": 1.41, "learning_rate": 1.7218076591552765e-05, "loss": 0.0117, "step": 57480 }, { "epoch": 1.41, "learning_rate": 1.7217584041374214e-05, "loss": 0.0101, "step": 57490 }, { "epoch": 1.41, "learning_rate": 1.7217091491195667e-05, "loss": 0.0105, "step": 57500 }, { "epoch": 1.41, "learning_rate": 1.7216598941017116e-05, "loss": 0.0107, "step": 57510 }, { "epoch": 1.41, "learning_rate": 1.721610639083857e-05, "loss": 0.011, "step": 57520 }, { "epoch": 1.41, "learning_rate": 1.7215613840660018e-05, "loss": 0.0119, "step": 57530 }, { "epoch": 1.41, "learning_rate": 1.721512129048147e-05, "loss": 0.0118, "step": 57540 }, { "epoch": 1.41, "learning_rate": 1.721462874030292e-05, "loss": 0.0179, "step": 57550 }, { "epoch": 1.41, "learning_rate": 1.7214136190124372e-05, "loss": 0.0123, "step": 57560 }, { "epoch": 1.41, "learning_rate": 1.721364363994582e-05, "loss": 0.0127, "step": 57570 }, { "epoch": 1.41, "learning_rate": 1.721315108976727e-05, "loss": 0.0133, "step": 57580 }, { "epoch": 1.41, "learning_rate": 1.7212658539588723e-05, "loss": 0.0161, "step": 57590 }, { "epoch": 1.42, "learning_rate": 1.7212165989410172e-05, "loss": 0.0102, "step": 57600 }, { "epoch": 1.42, "learning_rate": 1.721167343923162e-05, "loss": 0.0219, "step": 57610 }, { "epoch": 1.42, "learning_rate": 1.7211180889053073e-05, "loss": 0.0101, "step": 57620 }, { "epoch": 1.42, "learning_rate": 1.7210688338874523e-05, "loss": 0.0184, "step": 57630 }, { "epoch": 1.42, "learning_rate": 1.7210195788695975e-05, "loss": 0.0117, "step": 57640 }, { "epoch": 1.42, "learning_rate": 1.7209703238517424e-05, "loss": 0.0149, "step": 57650 }, { "epoch": 1.42, "learning_rate": 1.7209210688338877e-05, "loss": 0.0093, "step": 57660 }, { "epoch": 1.42, "learning_rate": 1.7208718138160326e-05, "loss": 0.0102, "step": 57670 }, { "epoch": 1.42, "learning_rate": 1.720822558798178e-05, "loss": 0.0131, "step": 57680 }, { "epoch": 1.42, "learning_rate": 1.7207733037803228e-05, "loss": 0.0152, "step": 57690 }, { "epoch": 1.42, "learning_rate": 1.720724048762468e-05, "loss": 0.0142, "step": 57700 }, { "epoch": 1.42, "learning_rate": 1.720674793744613e-05, "loss": 0.0108, "step": 57710 }, { "epoch": 1.42, "learning_rate": 1.720625538726758e-05, "loss": 0.0116, "step": 57720 }, { "epoch": 1.42, "learning_rate": 1.720576283708903e-05, "loss": 0.0116, "step": 57730 }, { "epoch": 1.42, "learning_rate": 1.720527028691048e-05, "loss": 0.0115, "step": 57740 }, { "epoch": 1.42, "learning_rate": 1.720477773673193e-05, "loss": 0.0143, "step": 57750 }, { "epoch": 1.42, "learning_rate": 1.7204285186553382e-05, "loss": 0.0136, "step": 57760 }, { "epoch": 1.42, "learning_rate": 1.720379263637483e-05, "loss": 0.0204, "step": 57770 }, { "epoch": 1.42, "learning_rate": 1.720330008619628e-05, "loss": 0.0112, "step": 57780 }, { "epoch": 1.42, "learning_rate": 1.7202807536017733e-05, "loss": 0.0205, "step": 57790 }, { "epoch": 1.42, "learning_rate": 1.7202314985839185e-05, "loss": 0.0115, "step": 57800 }, { "epoch": 1.42, "learning_rate": 1.7201822435660634e-05, "loss": 0.0182, "step": 57810 }, { "epoch": 1.42, "learning_rate": 1.7201329885482087e-05, "loss": 0.0114, "step": 57820 }, { "epoch": 1.42, "learning_rate": 1.7200837335303536e-05, "loss": 0.012, "step": 57830 }, { "epoch": 1.42, "learning_rate": 1.7200344785124985e-05, "loss": 0.0147, "step": 57840 }, { "epoch": 1.42, "learning_rate": 1.7199852234946438e-05, "loss": 0.0115, "step": 57850 }, { "epoch": 1.42, "learning_rate": 1.7199359684767887e-05, "loss": 0.0117, "step": 57860 }, { "epoch": 1.42, "learning_rate": 1.719886713458934e-05, "loss": 0.0112, "step": 57870 }, { "epoch": 1.42, "learning_rate": 1.719837458441079e-05, "loss": 0.0125, "step": 57880 }, { "epoch": 1.42, "learning_rate": 1.7197882034232238e-05, "loss": 0.0131, "step": 57890 }, { "epoch": 1.42, "learning_rate": 1.719738948405369e-05, "loss": 0.0115, "step": 57900 }, { "epoch": 1.42, "learning_rate": 1.719689693387514e-05, "loss": 0.0116, "step": 57910 }, { "epoch": 1.42, "learning_rate": 1.719640438369659e-05, "loss": 0.0259, "step": 57920 }, { "epoch": 1.42, "learning_rate": 1.719591183351804e-05, "loss": 0.0105, "step": 57930 }, { "epoch": 1.42, "learning_rate": 1.719541928333949e-05, "loss": 0.0149, "step": 57940 }, { "epoch": 1.42, "learning_rate": 1.7194926733160943e-05, "loss": 0.0126, "step": 57950 }, { "epoch": 1.42, "learning_rate": 1.7194434182982392e-05, "loss": 0.0217, "step": 57960 }, { "epoch": 1.42, "learning_rate": 1.7193941632803845e-05, "loss": 0.0121, "step": 57970 }, { "epoch": 1.42, "learning_rate": 1.7193449082625294e-05, "loss": 0.0111, "step": 57980 }, { "epoch": 1.42, "learning_rate": 1.7192956532446746e-05, "loss": 0.0113, "step": 57990 }, { "epoch": 1.42, "learning_rate": 1.7192463982268195e-05, "loss": 0.015, "step": 58000 }, { "epoch": 1.43, "learning_rate": 1.7191971432089644e-05, "loss": 0.0132, "step": 58010 }, { "epoch": 1.43, "learning_rate": 1.7191478881911097e-05, "loss": 0.011, "step": 58020 }, { "epoch": 1.43, "learning_rate": 1.7190986331732546e-05, "loss": 0.0101, "step": 58030 }, { "epoch": 1.43, "learning_rate": 1.7190493781553995e-05, "loss": 0.014, "step": 58040 }, { "epoch": 1.43, "learning_rate": 1.7190001231375448e-05, "loss": 0.0249, "step": 58050 }, { "epoch": 1.43, "learning_rate": 1.7189508681196897e-05, "loss": 0.0111, "step": 58060 }, { "epoch": 1.43, "learning_rate": 1.718901613101835e-05, "loss": 0.0134, "step": 58070 }, { "epoch": 1.43, "learning_rate": 1.71885235808398e-05, "loss": 0.0131, "step": 58080 }, { "epoch": 1.43, "learning_rate": 1.718803103066125e-05, "loss": 0.015, "step": 58090 }, { "epoch": 1.43, "learning_rate": 1.71875384804827e-05, "loss": 0.0099, "step": 58100 }, { "epoch": 1.43, "learning_rate": 1.7187045930304153e-05, "loss": 0.0121, "step": 58110 }, { "epoch": 1.43, "learning_rate": 1.7186553380125602e-05, "loss": 0.0119, "step": 58120 }, { "epoch": 1.43, "learning_rate": 1.7186060829947055e-05, "loss": 0.0121, "step": 58130 }, { "epoch": 1.43, "learning_rate": 1.7185568279768504e-05, "loss": 0.0231, "step": 58140 }, { "epoch": 1.43, "learning_rate": 1.7185075729589953e-05, "loss": 0.0118, "step": 58150 }, { "epoch": 1.43, "learning_rate": 1.7184583179411405e-05, "loss": 0.0175, "step": 58160 }, { "epoch": 1.43, "learning_rate": 1.7184090629232855e-05, "loss": 0.0144, "step": 58170 }, { "epoch": 1.43, "learning_rate": 1.7183598079054304e-05, "loss": 0.0119, "step": 58180 }, { "epoch": 1.43, "learning_rate": 1.7183105528875756e-05, "loss": 0.0144, "step": 58190 }, { "epoch": 1.43, "learning_rate": 1.7182612978697205e-05, "loss": 0.0114, "step": 58200 }, { "epoch": 1.43, "learning_rate": 1.7182120428518655e-05, "loss": 0.0121, "step": 58210 }, { "epoch": 1.43, "learning_rate": 1.7181627878340107e-05, "loss": 0.0128, "step": 58220 }, { "epoch": 1.43, "learning_rate": 1.7181135328161556e-05, "loss": 0.0218, "step": 58230 }, { "epoch": 1.43, "learning_rate": 1.718064277798301e-05, "loss": 0.0109, "step": 58240 }, { "epoch": 1.43, "learning_rate": 1.7180150227804458e-05, "loss": 0.0102, "step": 58250 }, { "epoch": 1.43, "learning_rate": 1.717965767762591e-05, "loss": 0.0114, "step": 58260 }, { "epoch": 1.43, "learning_rate": 1.717916512744736e-05, "loss": 0.0132, "step": 58270 }, { "epoch": 1.43, "learning_rate": 1.7178672577268812e-05, "loss": 0.0173, "step": 58280 }, { "epoch": 1.43, "learning_rate": 1.717818002709026e-05, "loss": 0.0125, "step": 58290 }, { "epoch": 1.43, "learning_rate": 1.717768747691171e-05, "loss": 0.0247, "step": 58300 }, { "epoch": 1.43, "learning_rate": 1.7177194926733163e-05, "loss": 0.0126, "step": 58310 }, { "epoch": 1.43, "learning_rate": 1.7176702376554612e-05, "loss": 0.02, "step": 58320 }, { "epoch": 1.43, "learning_rate": 1.717620982637606e-05, "loss": 0.0114, "step": 58330 }, { "epoch": 1.43, "learning_rate": 1.7175717276197514e-05, "loss": 0.0155, "step": 58340 }, { "epoch": 1.43, "learning_rate": 1.7175224726018963e-05, "loss": 0.0104, "step": 58350 }, { "epoch": 1.43, "learning_rate": 1.7174732175840416e-05, "loss": 0.012, "step": 58360 }, { "epoch": 1.43, "learning_rate": 1.7174239625661865e-05, "loss": 0.0152, "step": 58370 }, { "epoch": 1.43, "learning_rate": 1.7173747075483317e-05, "loss": 0.013, "step": 58380 }, { "epoch": 1.43, "learning_rate": 1.7173254525304766e-05, "loss": 0.0185, "step": 58390 }, { "epoch": 1.43, "learning_rate": 1.717276197512622e-05, "loss": 0.0113, "step": 58400 }, { "epoch": 1.43, "learning_rate": 1.7172269424947668e-05, "loss": 0.0152, "step": 58410 }, { "epoch": 1.44, "learning_rate": 1.717177687476912e-05, "loss": 0.0104, "step": 58420 }, { "epoch": 1.44, "learning_rate": 1.717128432459057e-05, "loss": 0.0177, "step": 58430 }, { "epoch": 1.44, "learning_rate": 1.717079177441202e-05, "loss": 0.0144, "step": 58440 }, { "epoch": 1.44, "learning_rate": 1.717029922423347e-05, "loss": 0.0188, "step": 58450 }, { "epoch": 1.44, "learning_rate": 1.716980667405492e-05, "loss": 0.0112, "step": 58460 }, { "epoch": 1.44, "learning_rate": 1.716931412387637e-05, "loss": 0.0112, "step": 58470 }, { "epoch": 1.44, "learning_rate": 1.7168821573697822e-05, "loss": 0.011, "step": 58480 }, { "epoch": 1.44, "learning_rate": 1.716832902351927e-05, "loss": 0.0126, "step": 58490 }, { "epoch": 1.44, "learning_rate": 1.716783647334072e-05, "loss": 0.0186, "step": 58500 }, { "epoch": 1.44, "learning_rate": 1.7167343923162173e-05, "loss": 0.0146, "step": 58510 }, { "epoch": 1.44, "learning_rate": 1.7166851372983626e-05, "loss": 0.013, "step": 58520 }, { "epoch": 1.44, "learning_rate": 1.7166358822805075e-05, "loss": 0.0117, "step": 58530 }, { "epoch": 1.44, "learning_rate": 1.7165866272626527e-05, "loss": 0.0145, "step": 58540 }, { "epoch": 1.44, "learning_rate": 1.7165373722447976e-05, "loss": 0.0121, "step": 58550 }, { "epoch": 1.44, "learning_rate": 1.7164881172269426e-05, "loss": 0.0121, "step": 58560 }, { "epoch": 1.44, "learning_rate": 1.7164388622090878e-05, "loss": 0.0119, "step": 58570 }, { "epoch": 1.44, "learning_rate": 1.7163896071912327e-05, "loss": 0.0126, "step": 58580 }, { "epoch": 1.44, "learning_rate": 1.716340352173378e-05, "loss": 0.0127, "step": 58590 }, { "epoch": 1.44, "learning_rate": 1.716291097155523e-05, "loss": 0.0116, "step": 58600 }, { "epoch": 1.44, "learning_rate": 1.7162418421376678e-05, "loss": 0.0101, "step": 58610 }, { "epoch": 1.44, "learning_rate": 1.716192587119813e-05, "loss": 0.0104, "step": 58620 }, { "epoch": 1.44, "learning_rate": 1.716143332101958e-05, "loss": 0.0097, "step": 58630 }, { "epoch": 1.44, "learning_rate": 1.716094077084103e-05, "loss": 0.0119, "step": 58640 }, { "epoch": 1.44, "learning_rate": 1.716044822066248e-05, "loss": 0.0153, "step": 58650 }, { "epoch": 1.44, "learning_rate": 1.715995567048393e-05, "loss": 0.0105, "step": 58660 }, { "epoch": 1.44, "learning_rate": 1.7159463120305383e-05, "loss": 0.0135, "step": 58670 }, { "epoch": 1.44, "learning_rate": 1.7158970570126832e-05, "loss": 0.011, "step": 58680 }, { "epoch": 1.44, "learning_rate": 1.7158478019948285e-05, "loss": 0.0117, "step": 58690 }, { "epoch": 1.44, "learning_rate": 1.7157985469769734e-05, "loss": 0.0123, "step": 58700 }, { "epoch": 1.44, "learning_rate": 1.7157492919591187e-05, "loss": 0.0156, "step": 58710 }, { "epoch": 1.44, "learning_rate": 1.7157000369412636e-05, "loss": 0.013, "step": 58720 }, { "epoch": 1.44, "learning_rate": 1.7156507819234085e-05, "loss": 0.0116, "step": 58730 }, { "epoch": 1.44, "learning_rate": 1.7156015269055537e-05, "loss": 0.0179, "step": 58740 }, { "epoch": 1.44, "learning_rate": 1.7155522718876987e-05, "loss": 0.0156, "step": 58750 }, { "epoch": 1.44, "learning_rate": 1.7155030168698436e-05, "loss": 0.0109, "step": 58760 }, { "epoch": 1.44, "learning_rate": 1.7154537618519888e-05, "loss": 0.0108, "step": 58770 }, { "epoch": 1.44, "learning_rate": 1.7154045068341337e-05, "loss": 0.0121, "step": 58780 }, { "epoch": 1.44, "learning_rate": 1.715355251816279e-05, "loss": 0.016, "step": 58790 }, { "epoch": 1.44, "learning_rate": 1.715305996798424e-05, "loss": 0.0103, "step": 58800 }, { "epoch": 1.44, "learning_rate": 1.715256741780569e-05, "loss": 0.0124, "step": 58810 }, { "epoch": 1.45, "learning_rate": 1.715207486762714e-05, "loss": 0.0136, "step": 58820 }, { "epoch": 1.45, "learning_rate": 1.7151582317448593e-05, "loss": 0.0111, "step": 58830 }, { "epoch": 1.45, "learning_rate": 1.7151089767270042e-05, "loss": 0.0185, "step": 58840 }, { "epoch": 1.45, "learning_rate": 1.7150597217091495e-05, "loss": 0.0183, "step": 58850 }, { "epoch": 1.45, "learning_rate": 1.7150104666912944e-05, "loss": 0.0122, "step": 58860 }, { "epoch": 1.45, "learning_rate": 1.7149612116734393e-05, "loss": 0.0279, "step": 58870 }, { "epoch": 1.45, "learning_rate": 1.7149119566555846e-05, "loss": 0.0166, "step": 58880 }, { "epoch": 1.45, "learning_rate": 1.7148627016377295e-05, "loss": 0.0312, "step": 58890 }, { "epoch": 1.45, "learning_rate": 1.7148134466198744e-05, "loss": 0.0134, "step": 58900 }, { "epoch": 1.45, "learning_rate": 1.7147641916020197e-05, "loss": 0.0118, "step": 58910 }, { "epoch": 1.45, "learning_rate": 1.7147149365841646e-05, "loss": 0.0159, "step": 58920 }, { "epoch": 1.45, "learning_rate": 1.7146656815663095e-05, "loss": 0.0104, "step": 58930 }, { "epoch": 1.45, "learning_rate": 1.7146164265484547e-05, "loss": 0.0226, "step": 58940 }, { "epoch": 1.45, "learning_rate": 1.7145671715305997e-05, "loss": 0.0138, "step": 58950 }, { "epoch": 1.45, "learning_rate": 1.714517916512745e-05, "loss": 0.019, "step": 58960 }, { "epoch": 1.45, "learning_rate": 1.7144686614948898e-05, "loss": 0.0111, "step": 58970 }, { "epoch": 1.45, "learning_rate": 1.714419406477035e-05, "loss": 0.0187, "step": 58980 }, { "epoch": 1.45, "learning_rate": 1.71437015145918e-05, "loss": 0.0095, "step": 58990 }, { "epoch": 1.45, "learning_rate": 1.7143208964413252e-05, "loss": 0.0135, "step": 59000 }, { "epoch": 1.45, "learning_rate": 1.71427164142347e-05, "loss": 0.0157, "step": 59010 }, { "epoch": 1.45, "learning_rate": 1.714222386405615e-05, "loss": 0.0235, "step": 59020 }, { "epoch": 1.45, "learning_rate": 1.7141731313877603e-05, "loss": 0.0119, "step": 59030 }, { "epoch": 1.45, "learning_rate": 1.7141238763699052e-05, "loss": 0.0133, "step": 59040 }, { "epoch": 1.45, "learning_rate": 1.71407462135205e-05, "loss": 0.0108, "step": 59050 }, { "epoch": 1.45, "learning_rate": 1.7140253663341954e-05, "loss": 0.0117, "step": 59060 }, { "epoch": 1.45, "learning_rate": 1.7139761113163403e-05, "loss": 0.0125, "step": 59070 }, { "epoch": 1.45, "learning_rate": 1.7139268562984856e-05, "loss": 0.0115, "step": 59080 }, { "epoch": 1.45, "learning_rate": 1.7138776012806305e-05, "loss": 0.0122, "step": 59090 }, { "epoch": 1.45, "learning_rate": 1.7138283462627758e-05, "loss": 0.012, "step": 59100 }, { "epoch": 1.45, "learning_rate": 1.7137790912449207e-05, "loss": 0.0126, "step": 59110 }, { "epoch": 1.45, "learning_rate": 1.713729836227066e-05, "loss": 0.0119, "step": 59120 }, { "epoch": 1.45, "learning_rate": 1.713680581209211e-05, "loss": 0.0123, "step": 59130 }, { "epoch": 1.45, "learning_rate": 1.713631326191356e-05, "loss": 0.0209, "step": 59140 }, { "epoch": 1.45, "learning_rate": 1.713582071173501e-05, "loss": 0.0114, "step": 59150 }, { "epoch": 1.45, "learning_rate": 1.713532816155646e-05, "loss": 0.0113, "step": 59160 }, { "epoch": 1.45, "learning_rate": 1.7134835611377912e-05, "loss": 0.0109, "step": 59170 }, { "epoch": 1.45, "learning_rate": 1.713434306119936e-05, "loss": 0.0114, "step": 59180 }, { "epoch": 1.45, "learning_rate": 1.713385051102081e-05, "loss": 0.0124, "step": 59190 }, { "epoch": 1.45, "learning_rate": 1.7133357960842263e-05, "loss": 0.0209, "step": 59200 }, { "epoch": 1.45, "learning_rate": 1.7132865410663712e-05, "loss": 0.0125, "step": 59210 }, { "epoch": 1.45, "learning_rate": 1.713237286048516e-05, "loss": 0.0101, "step": 59220 }, { "epoch": 1.46, "learning_rate": 1.7131880310306613e-05, "loss": 0.0165, "step": 59230 }, { "epoch": 1.46, "learning_rate": 1.7131387760128066e-05, "loss": 0.0158, "step": 59240 }, { "epoch": 1.46, "learning_rate": 1.7130895209949515e-05, "loss": 0.0191, "step": 59250 }, { "epoch": 1.46, "learning_rate": 1.7130402659770968e-05, "loss": 0.013, "step": 59260 }, { "epoch": 1.46, "learning_rate": 1.7129910109592417e-05, "loss": 0.0124, "step": 59270 }, { "epoch": 1.46, "learning_rate": 1.7129417559413866e-05, "loss": 0.0115, "step": 59280 }, { "epoch": 1.46, "learning_rate": 1.712892500923532e-05, "loss": 0.0153, "step": 59290 }, { "epoch": 1.46, "learning_rate": 1.7128432459056768e-05, "loss": 0.0143, "step": 59300 }, { "epoch": 1.46, "learning_rate": 1.712793990887822e-05, "loss": 0.0196, "step": 59310 }, { "epoch": 1.46, "learning_rate": 1.712744735869967e-05, "loss": 0.0108, "step": 59320 }, { "epoch": 1.46, "learning_rate": 1.712695480852112e-05, "loss": 0.0117, "step": 59330 }, { "epoch": 1.46, "learning_rate": 1.712646225834257e-05, "loss": 0.0216, "step": 59340 }, { "epoch": 1.46, "learning_rate": 1.712596970816402e-05, "loss": 0.0109, "step": 59350 }, { "epoch": 1.46, "learning_rate": 1.712547715798547e-05, "loss": 0.0111, "step": 59360 }, { "epoch": 1.46, "learning_rate": 1.7124984607806922e-05, "loss": 0.0115, "step": 59370 }, { "epoch": 1.46, "learning_rate": 1.712449205762837e-05, "loss": 0.0125, "step": 59380 }, { "epoch": 1.46, "learning_rate": 1.7123999507449823e-05, "loss": 0.0121, "step": 59390 }, { "epoch": 1.46, "learning_rate": 1.7123506957271273e-05, "loss": 0.0139, "step": 59400 }, { "epoch": 1.46, "learning_rate": 1.7123014407092725e-05, "loss": 0.0134, "step": 59410 }, { "epoch": 1.46, "learning_rate": 1.7122521856914174e-05, "loss": 0.0103, "step": 59420 }, { "epoch": 1.46, "learning_rate": 1.7122029306735627e-05, "loss": 0.0116, "step": 59430 }, { "epoch": 1.46, "learning_rate": 1.7121536756557076e-05, "loss": 0.01, "step": 59440 }, { "epoch": 1.46, "learning_rate": 1.7121044206378525e-05, "loss": 0.0109, "step": 59450 }, { "epoch": 1.46, "learning_rate": 1.7120551656199978e-05, "loss": 0.0122, "step": 59460 }, { "epoch": 1.46, "learning_rate": 1.7120059106021427e-05, "loss": 0.0146, "step": 59470 }, { "epoch": 1.46, "learning_rate": 1.7119566555842876e-05, "loss": 0.009, "step": 59480 }, { "epoch": 1.46, "learning_rate": 1.711907400566433e-05, "loss": 0.011, "step": 59490 }, { "epoch": 1.46, "learning_rate": 1.7118581455485778e-05, "loss": 0.0197, "step": 59500 }, { "epoch": 1.46, "learning_rate": 1.711808890530723e-05, "loss": 0.0115, "step": 59510 }, { "epoch": 1.46, "learning_rate": 1.711759635512868e-05, "loss": 0.011, "step": 59520 }, { "epoch": 1.46, "learning_rate": 1.7117103804950132e-05, "loss": 0.0114, "step": 59530 }, { "epoch": 1.46, "learning_rate": 1.711661125477158e-05, "loss": 0.0103, "step": 59540 }, { "epoch": 1.46, "learning_rate": 1.7116118704593034e-05, "loss": 0.0122, "step": 59550 }, { "epoch": 1.46, "learning_rate": 1.7115626154414483e-05, "loss": 0.0122, "step": 59560 }, { "epoch": 1.46, "learning_rate": 1.7115133604235935e-05, "loss": 0.0114, "step": 59570 }, { "epoch": 1.46, "learning_rate": 1.7114641054057384e-05, "loss": 0.0109, "step": 59580 }, { "epoch": 1.46, "learning_rate": 1.7114148503878834e-05, "loss": 0.0112, "step": 59590 }, { "epoch": 1.46, "learning_rate": 1.7113655953700286e-05, "loss": 0.0107, "step": 59600 }, { "epoch": 1.46, "learning_rate": 1.7113163403521735e-05, "loss": 0.0117, "step": 59610 }, { "epoch": 1.46, "learning_rate": 1.7112670853343184e-05, "loss": 0.0124, "step": 59620 }, { "epoch": 1.46, "learning_rate": 1.7112178303164637e-05, "loss": 0.0118, "step": 59630 }, { "epoch": 1.47, "learning_rate": 1.7111685752986086e-05, "loss": 0.0301, "step": 59640 }, { "epoch": 1.47, "learning_rate": 1.7111193202807535e-05, "loss": 0.0104, "step": 59650 }, { "epoch": 1.47, "learning_rate": 1.7110700652628988e-05, "loss": 0.0143, "step": 59660 }, { "epoch": 1.47, "learning_rate": 1.7110208102450437e-05, "loss": 0.0143, "step": 59670 }, { "epoch": 1.47, "learning_rate": 1.710971555227189e-05, "loss": 0.0104, "step": 59680 }, { "epoch": 1.47, "learning_rate": 1.710922300209334e-05, "loss": 0.0111, "step": 59690 }, { "epoch": 1.47, "learning_rate": 1.710873045191479e-05, "loss": 0.0121, "step": 59700 }, { "epoch": 1.47, "learning_rate": 1.710823790173624e-05, "loss": 0.0111, "step": 59710 }, { "epoch": 1.47, "learning_rate": 1.7107745351557693e-05, "loss": 0.0101, "step": 59720 }, { "epoch": 1.47, "learning_rate": 1.7107252801379142e-05, "loss": 0.0105, "step": 59730 }, { "epoch": 1.47, "learning_rate": 1.710676025120059e-05, "loss": 0.0109, "step": 59740 }, { "epoch": 1.47, "learning_rate": 1.7106267701022044e-05, "loss": 0.0182, "step": 59750 }, { "epoch": 1.47, "learning_rate": 1.7105775150843493e-05, "loss": 0.0097, "step": 59760 }, { "epoch": 1.47, "learning_rate": 1.7105282600664942e-05, "loss": 0.0099, "step": 59770 }, { "epoch": 1.47, "learning_rate": 1.7104790050486395e-05, "loss": 0.0198, "step": 59780 }, { "epoch": 1.47, "learning_rate": 1.7104297500307844e-05, "loss": 0.0115, "step": 59790 }, { "epoch": 1.47, "learning_rate": 1.7103804950129296e-05, "loss": 0.0192, "step": 59800 }, { "epoch": 1.47, "learning_rate": 1.7103312399950745e-05, "loss": 0.0122, "step": 59810 }, { "epoch": 1.47, "learning_rate": 1.7102819849772198e-05, "loss": 0.0136, "step": 59820 }, { "epoch": 1.47, "learning_rate": 1.7102327299593647e-05, "loss": 0.0106, "step": 59830 }, { "epoch": 1.47, "learning_rate": 1.71018347494151e-05, "loss": 0.0118, "step": 59840 }, { "epoch": 1.47, "learning_rate": 1.710134219923655e-05, "loss": 0.0109, "step": 59850 }, { "epoch": 1.47, "learning_rate": 1.7100849649058e-05, "loss": 0.0181, "step": 59860 }, { "epoch": 1.47, "learning_rate": 1.710035709887945e-05, "loss": 0.0175, "step": 59870 }, { "epoch": 1.47, "learning_rate": 1.70998645487009e-05, "loss": 0.0126, "step": 59880 }, { "epoch": 1.47, "learning_rate": 1.7099371998522352e-05, "loss": 0.0198, "step": 59890 }, { "epoch": 1.47, "learning_rate": 1.70988794483438e-05, "loss": 0.0167, "step": 59900 }, { "epoch": 1.47, "learning_rate": 1.709838689816525e-05, "loss": 0.0145, "step": 59910 }, { "epoch": 1.47, "learning_rate": 1.7097894347986703e-05, "loss": 0.0123, "step": 59920 }, { "epoch": 1.47, "learning_rate": 1.7097401797808152e-05, "loss": 0.0127, "step": 59930 }, { "epoch": 1.47, "learning_rate": 1.70969092476296e-05, "loss": 0.014, "step": 59940 }, { "epoch": 1.47, "learning_rate": 1.7096416697451054e-05, "loss": 0.011, "step": 59950 }, { "epoch": 1.47, "learning_rate": 1.7095924147272506e-05, "loss": 0.0127, "step": 59960 }, { "epoch": 1.47, "learning_rate": 1.7095431597093955e-05, "loss": 0.0175, "step": 59970 }, { "epoch": 1.47, "learning_rate": 1.7094939046915408e-05, "loss": 0.02, "step": 59980 }, { "epoch": 1.47, "learning_rate": 1.7094446496736857e-05, "loss": 0.0122, "step": 59990 }, { "epoch": 1.47, "learning_rate": 1.7093953946558306e-05, "loss": 0.0112, "step": 60000 }, { "epoch": 1.47, "eval_bleu": 97.7792, "eval_gen_len_ratio": 1.0034, "eval_loss": 0.009601549245417118, "eval_runtime": 336.0117, "eval_samples_per_second": 5.553, "eval_steps_per_second": 0.176, "step": 60000 }, { "epoch": 1.47, "learning_rate": 1.709346139637976e-05, "loss": 0.0123, "step": 60010 }, { "epoch": 1.47, "learning_rate": 1.7092968846201208e-05, "loss": 0.0109, "step": 60020 }, { "epoch": 1.47, "learning_rate": 1.709247629602266e-05, "loss": 0.0178, "step": 60030 }, { "epoch": 1.47, "learning_rate": 1.709198374584411e-05, "loss": 0.0124, "step": 60040 }, { "epoch": 1.48, "learning_rate": 1.709149119566556e-05, "loss": 0.0119, "step": 60050 }, { "epoch": 1.48, "learning_rate": 1.709099864548701e-05, "loss": 0.0113, "step": 60060 }, { "epoch": 1.48, "learning_rate": 1.709050609530846e-05, "loss": 0.0171, "step": 60070 }, { "epoch": 1.48, "learning_rate": 1.709001354512991e-05, "loss": 0.0113, "step": 60080 }, { "epoch": 1.48, "learning_rate": 1.7089520994951362e-05, "loss": 0.0108, "step": 60090 }, { "epoch": 1.48, "learning_rate": 1.708902844477281e-05, "loss": 0.0127, "step": 60100 }, { "epoch": 1.48, "learning_rate": 1.7088535894594264e-05, "loss": 0.0121, "step": 60110 }, { "epoch": 1.48, "learning_rate": 1.7088043344415713e-05, "loss": 0.0215, "step": 60120 }, { "epoch": 1.48, "learning_rate": 1.7087550794237166e-05, "loss": 0.014, "step": 60130 }, { "epoch": 1.48, "learning_rate": 1.7087058244058615e-05, "loss": 0.0115, "step": 60140 }, { "epoch": 1.48, "learning_rate": 1.7086565693880067e-05, "loss": 0.011, "step": 60150 }, { "epoch": 1.48, "learning_rate": 1.7086073143701516e-05, "loss": 0.0117, "step": 60160 }, { "epoch": 1.48, "learning_rate": 1.7085580593522966e-05, "loss": 0.0106, "step": 60170 }, { "epoch": 1.48, "learning_rate": 1.7085088043344418e-05, "loss": 0.0104, "step": 60180 }, { "epoch": 1.48, "learning_rate": 1.7084595493165867e-05, "loss": 0.0122, "step": 60190 }, { "epoch": 1.48, "learning_rate": 1.7084102942987316e-05, "loss": 0.0101, "step": 60200 }, { "epoch": 1.48, "learning_rate": 1.708361039280877e-05, "loss": 0.0119, "step": 60210 }, { "epoch": 1.48, "learning_rate": 1.7083117842630218e-05, "loss": 0.0114, "step": 60220 }, { "epoch": 1.48, "learning_rate": 1.708262529245167e-05, "loss": 0.0123, "step": 60230 }, { "epoch": 1.48, "learning_rate": 1.708213274227312e-05, "loss": 0.0112, "step": 60240 }, { "epoch": 1.48, "learning_rate": 1.7081640192094572e-05, "loss": 0.0183, "step": 60250 }, { "epoch": 1.48, "learning_rate": 1.708114764191602e-05, "loss": 0.0103, "step": 60260 }, { "epoch": 1.48, "learning_rate": 1.7080655091737474e-05, "loss": 0.0117, "step": 60270 }, { "epoch": 1.48, "learning_rate": 1.7080162541558923e-05, "loss": 0.0116, "step": 60280 }, { "epoch": 1.48, "learning_rate": 1.7079669991380376e-05, "loss": 0.0114, "step": 60290 }, { "epoch": 1.48, "learning_rate": 1.7079177441201825e-05, "loss": 0.0133, "step": 60300 }, { "epoch": 1.48, "learning_rate": 1.7078684891023274e-05, "loss": 0.0117, "step": 60310 }, { "epoch": 1.48, "learning_rate": 1.7078192340844726e-05, "loss": 0.0101, "step": 60320 }, { "epoch": 1.48, "learning_rate": 1.7077699790666176e-05, "loss": 0.0156, "step": 60330 }, { "epoch": 1.48, "learning_rate": 1.7077207240487625e-05, "loss": 0.0122, "step": 60340 }, { "epoch": 1.48, "learning_rate": 1.7076714690309077e-05, "loss": 0.0119, "step": 60350 }, { "epoch": 1.48, "learning_rate": 1.7076222140130526e-05, "loss": 0.0122, "step": 60360 }, { "epoch": 1.48, "learning_rate": 1.7075729589951976e-05, "loss": 0.0123, "step": 60370 }, { "epoch": 1.48, "learning_rate": 1.7075237039773428e-05, "loss": 0.013, "step": 60380 }, { "epoch": 1.48, "learning_rate": 1.7074744489594877e-05, "loss": 0.0145, "step": 60390 }, { "epoch": 1.48, "learning_rate": 1.707425193941633e-05, "loss": 0.0148, "step": 60400 }, { "epoch": 1.48, "learning_rate": 1.707375938923778e-05, "loss": 0.0118, "step": 60410 }, { "epoch": 1.48, "learning_rate": 1.707326683905923e-05, "loss": 0.019, "step": 60420 }, { "epoch": 1.48, "learning_rate": 1.707277428888068e-05, "loss": 0.0123, "step": 60430 }, { "epoch": 1.48, "learning_rate": 1.7072281738702133e-05, "loss": 0.0165, "step": 60440 }, { "epoch": 1.49, "learning_rate": 1.7071789188523582e-05, "loss": 0.0161, "step": 60450 }, { "epoch": 1.49, "learning_rate": 1.707129663834503e-05, "loss": 0.0127, "step": 60460 }, { "epoch": 1.49, "learning_rate": 1.7070804088166484e-05, "loss": 0.0125, "step": 60470 }, { "epoch": 1.49, "learning_rate": 1.7070311537987933e-05, "loss": 0.0119, "step": 60480 }, { "epoch": 1.49, "learning_rate": 1.7069818987809382e-05, "loss": 0.0143, "step": 60490 }, { "epoch": 1.49, "learning_rate": 1.7069326437630835e-05, "loss": 0.0164, "step": 60500 }, { "epoch": 1.49, "learning_rate": 1.7068833887452284e-05, "loss": 0.0123, "step": 60510 }, { "epoch": 1.49, "learning_rate": 1.7068341337273737e-05, "loss": 0.0112, "step": 60520 }, { "epoch": 1.49, "learning_rate": 1.7067848787095186e-05, "loss": 0.0101, "step": 60530 }, { "epoch": 1.49, "learning_rate": 1.7067356236916638e-05, "loss": 0.0117, "step": 60540 }, { "epoch": 1.49, "learning_rate": 1.7066863686738087e-05, "loss": 0.0126, "step": 60550 }, { "epoch": 1.49, "learning_rate": 1.706637113655954e-05, "loss": 0.0117, "step": 60560 }, { "epoch": 1.49, "learning_rate": 1.706587858638099e-05, "loss": 0.0119, "step": 60570 }, { "epoch": 1.49, "learning_rate": 1.706538603620244e-05, "loss": 0.0107, "step": 60580 }, { "epoch": 1.49, "learning_rate": 1.706489348602389e-05, "loss": 0.0231, "step": 60590 }, { "epoch": 1.49, "learning_rate": 1.706440093584534e-05, "loss": 0.0129, "step": 60600 }, { "epoch": 1.49, "learning_rate": 1.7063908385666792e-05, "loss": 0.0108, "step": 60610 }, { "epoch": 1.49, "learning_rate": 1.706341583548824e-05, "loss": 0.0113, "step": 60620 }, { "epoch": 1.49, "learning_rate": 1.706292328530969e-05, "loss": 0.0181, "step": 60630 }, { "epoch": 1.49, "learning_rate": 1.7062430735131143e-05, "loss": 0.0121, "step": 60640 }, { "epoch": 1.49, "learning_rate": 1.7061938184952592e-05, "loss": 0.0128, "step": 60650 }, { "epoch": 1.49, "learning_rate": 1.706144563477404e-05, "loss": 0.0124, "step": 60660 }, { "epoch": 1.49, "learning_rate": 1.7060953084595494e-05, "loss": 0.0156, "step": 60670 }, { "epoch": 1.49, "learning_rate": 1.7060460534416947e-05, "loss": 0.0104, "step": 60680 }, { "epoch": 1.49, "learning_rate": 1.7059967984238396e-05, "loss": 0.0123, "step": 60690 }, { "epoch": 1.49, "learning_rate": 1.705947543405985e-05, "loss": 0.0117, "step": 60700 }, { "epoch": 1.49, "learning_rate": 1.7058982883881297e-05, "loss": 0.0133, "step": 60710 }, { "epoch": 1.49, "learning_rate": 1.7058490333702747e-05, "loss": 0.0117, "step": 60720 }, { "epoch": 1.49, "learning_rate": 1.70579977835242e-05, "loss": 0.0114, "step": 60730 }, { "epoch": 1.49, "learning_rate": 1.7057505233345648e-05, "loss": 0.0126, "step": 60740 }, { "epoch": 1.49, "learning_rate": 1.70570126831671e-05, "loss": 0.0108, "step": 60750 }, { "epoch": 1.49, "learning_rate": 1.705652013298855e-05, "loss": 0.0196, "step": 60760 }, { "epoch": 1.49, "learning_rate": 1.705602758281e-05, "loss": 0.0103, "step": 60770 }, { "epoch": 1.49, "learning_rate": 1.705553503263145e-05, "loss": 0.0106, "step": 60780 }, { "epoch": 1.49, "learning_rate": 1.70550424824529e-05, "loss": 0.0121, "step": 60790 }, { "epoch": 1.49, "learning_rate": 1.705454993227435e-05, "loss": 0.01, "step": 60800 }, { "epoch": 1.49, "learning_rate": 1.7054057382095802e-05, "loss": 0.0148, "step": 60810 }, { "epoch": 1.49, "learning_rate": 1.705356483191725e-05, "loss": 0.0208, "step": 60820 }, { "epoch": 1.49, "learning_rate": 1.7053072281738704e-05, "loss": 0.0145, "step": 60830 }, { "epoch": 1.49, "learning_rate": 1.7052579731560153e-05, "loss": 0.0231, "step": 60840 }, { "epoch": 1.49, "learning_rate": 1.7052087181381606e-05, "loss": 0.013, "step": 60850 }, { "epoch": 1.5, "learning_rate": 1.7051594631203055e-05, "loss": 0.0107, "step": 60860 }, { "epoch": 1.5, "learning_rate": 1.7051102081024508e-05, "loss": 0.0158, "step": 60870 }, { "epoch": 1.5, "learning_rate": 1.7050609530845957e-05, "loss": 0.0122, "step": 60880 }, { "epoch": 1.5, "learning_rate": 1.7050116980667406e-05, "loss": 0.0177, "step": 60890 }, { "epoch": 1.5, "learning_rate": 1.704962443048886e-05, "loss": 0.0248, "step": 60900 }, { "epoch": 1.5, "learning_rate": 1.7049131880310308e-05, "loss": 0.0119, "step": 60910 }, { "epoch": 1.5, "learning_rate": 1.7048639330131757e-05, "loss": 0.0117, "step": 60920 }, { "epoch": 1.5, "learning_rate": 1.704814677995321e-05, "loss": 0.0095, "step": 60930 }, { "epoch": 1.5, "learning_rate": 1.704765422977466e-05, "loss": 0.0085, "step": 60940 }, { "epoch": 1.5, "learning_rate": 1.704716167959611e-05, "loss": 0.0092, "step": 60950 }, { "epoch": 1.5, "learning_rate": 1.704666912941756e-05, "loss": 0.0164, "step": 60960 }, { "epoch": 1.5, "learning_rate": 1.7046176579239013e-05, "loss": 0.0118, "step": 60970 }, { "epoch": 1.5, "learning_rate": 1.7045684029060462e-05, "loss": 0.0166, "step": 60980 }, { "epoch": 1.5, "learning_rate": 1.7045191478881914e-05, "loss": 0.0193, "step": 60990 }, { "epoch": 1.5, "learning_rate": 1.7044698928703363e-05, "loss": 0.0097, "step": 61000 }, { "epoch": 1.5, "learning_rate": 1.7044206378524816e-05, "loss": 0.0124, "step": 61010 }, { "epoch": 1.5, "learning_rate": 1.7043713828346265e-05, "loss": 0.019, "step": 61020 }, { "epoch": 1.5, "learning_rate": 1.7043221278167714e-05, "loss": 0.0109, "step": 61030 }, { "epoch": 1.5, "learning_rate": 1.7042728727989167e-05, "loss": 0.0123, "step": 61040 }, { "epoch": 1.5, "learning_rate": 1.7042236177810616e-05, "loss": 0.0156, "step": 61050 }, { "epoch": 1.5, "learning_rate": 1.7041743627632065e-05, "loss": 0.013, "step": 61060 }, { "epoch": 1.5, "learning_rate": 1.7041251077453518e-05, "loss": 0.0112, "step": 61070 }, { "epoch": 1.5, "learning_rate": 1.7040758527274967e-05, "loss": 0.0122, "step": 61080 }, { "epoch": 1.5, "learning_rate": 1.7040265977096416e-05, "loss": 0.0131, "step": 61090 }, { "epoch": 1.5, "learning_rate": 1.703977342691787e-05, "loss": 0.0126, "step": 61100 }, { "epoch": 1.5, "learning_rate": 1.7039280876739318e-05, "loss": 0.0121, "step": 61110 }, { "epoch": 1.5, "learning_rate": 1.703878832656077e-05, "loss": 0.0112, "step": 61120 }, { "epoch": 1.5, "learning_rate": 1.703829577638222e-05, "loss": 0.0173, "step": 61130 }, { "epoch": 1.5, "learning_rate": 1.7037803226203672e-05, "loss": 0.0121, "step": 61140 }, { "epoch": 1.5, "learning_rate": 1.703731067602512e-05, "loss": 0.0114, "step": 61150 }, { "epoch": 1.5, "learning_rate": 1.7036818125846574e-05, "loss": 0.0108, "step": 61160 }, { "epoch": 1.5, "learning_rate": 1.7036325575668023e-05, "loss": 0.0109, "step": 61170 }, { "epoch": 1.5, "learning_rate": 1.7035833025489472e-05, "loss": 0.0118, "step": 61180 }, { "epoch": 1.5, "learning_rate": 1.7035340475310924e-05, "loss": 0.0121, "step": 61190 }, { "epoch": 1.5, "learning_rate": 1.7034847925132374e-05, "loss": 0.0124, "step": 61200 }, { "epoch": 1.5, "learning_rate": 1.7034355374953823e-05, "loss": 0.0116, "step": 61210 }, { "epoch": 1.5, "learning_rate": 1.7033862824775275e-05, "loss": 0.0101, "step": 61220 }, { "epoch": 1.5, "learning_rate": 1.7033370274596724e-05, "loss": 0.0118, "step": 61230 }, { "epoch": 1.5, "learning_rate": 1.7032877724418177e-05, "loss": 0.0149, "step": 61240 }, { "epoch": 1.5, "learning_rate": 1.7032385174239626e-05, "loss": 0.0133, "step": 61250 }, { "epoch": 1.5, "learning_rate": 1.703189262406108e-05, "loss": 0.0112, "step": 61260 }, { "epoch": 1.51, "learning_rate": 1.7031400073882528e-05, "loss": 0.0113, "step": 61270 }, { "epoch": 1.51, "learning_rate": 1.703090752370398e-05, "loss": 0.0127, "step": 61280 }, { "epoch": 1.51, "learning_rate": 1.703041497352543e-05, "loss": 0.0132, "step": 61290 }, { "epoch": 1.51, "learning_rate": 1.7029922423346882e-05, "loss": 0.0135, "step": 61300 }, { "epoch": 1.51, "learning_rate": 1.702942987316833e-05, "loss": 0.0146, "step": 61310 }, { "epoch": 1.51, "learning_rate": 1.702893732298978e-05, "loss": 0.0129, "step": 61320 }, { "epoch": 1.51, "learning_rate": 1.7028444772811233e-05, "loss": 0.0122, "step": 61330 }, { "epoch": 1.51, "learning_rate": 1.7027952222632682e-05, "loss": 0.0133, "step": 61340 }, { "epoch": 1.51, "learning_rate": 1.702745967245413e-05, "loss": 0.0128, "step": 61350 }, { "epoch": 1.51, "learning_rate": 1.7026967122275584e-05, "loss": 0.0136, "step": 61360 }, { "epoch": 1.51, "learning_rate": 1.7026474572097033e-05, "loss": 0.0196, "step": 61370 }, { "epoch": 1.51, "learning_rate": 1.7025982021918482e-05, "loss": 0.012, "step": 61380 }, { "epoch": 1.51, "learning_rate": 1.7025489471739934e-05, "loss": 0.0119, "step": 61390 }, { "epoch": 1.51, "learning_rate": 1.7024996921561387e-05, "loss": 0.0145, "step": 61400 }, { "epoch": 1.51, "learning_rate": 1.7024504371382836e-05, "loss": 0.0114, "step": 61410 }, { "epoch": 1.51, "learning_rate": 1.702401182120429e-05, "loss": 0.0265, "step": 61420 }, { "epoch": 1.51, "learning_rate": 1.7023519271025738e-05, "loss": 0.0099, "step": 61430 }, { "epoch": 1.51, "learning_rate": 1.7023026720847187e-05, "loss": 0.0118, "step": 61440 }, { "epoch": 1.51, "learning_rate": 1.702253417066864e-05, "loss": 0.0111, "step": 61450 }, { "epoch": 1.51, "learning_rate": 1.702204162049009e-05, "loss": 0.0152, "step": 61460 }, { "epoch": 1.51, "learning_rate": 1.702154907031154e-05, "loss": 0.0123, "step": 61470 }, { "epoch": 1.51, "learning_rate": 1.702105652013299e-05, "loss": 0.0117, "step": 61480 }, { "epoch": 1.51, "learning_rate": 1.702056396995444e-05, "loss": 0.0111, "step": 61490 }, { "epoch": 1.51, "learning_rate": 1.7020071419775892e-05, "loss": 0.0254, "step": 61500 }, { "epoch": 1.51, "learning_rate": 1.701957886959734e-05, "loss": 0.011, "step": 61510 }, { "epoch": 1.51, "learning_rate": 1.701908631941879e-05, "loss": 0.0098, "step": 61520 }, { "epoch": 1.51, "learning_rate": 1.7018593769240243e-05, "loss": 0.027, "step": 61530 }, { "epoch": 1.51, "learning_rate": 1.7018101219061692e-05, "loss": 0.0133, "step": 61540 }, { "epoch": 1.51, "learning_rate": 1.7017608668883145e-05, "loss": 0.0126, "step": 61550 }, { "epoch": 1.51, "learning_rate": 1.7017116118704594e-05, "loss": 0.0127, "step": 61560 }, { "epoch": 1.51, "learning_rate": 1.7016623568526046e-05, "loss": 0.0266, "step": 61570 }, { "epoch": 1.51, "learning_rate": 1.7016131018347495e-05, "loss": 0.0275, "step": 61580 }, { "epoch": 1.51, "learning_rate": 1.7015638468168948e-05, "loss": 0.0112, "step": 61590 }, { "epoch": 1.51, "learning_rate": 1.7015145917990397e-05, "loss": 0.0146, "step": 61600 }, { "epoch": 1.51, "learning_rate": 1.7014653367811846e-05, "loss": 0.0131, "step": 61610 }, { "epoch": 1.51, "learning_rate": 1.70141608176333e-05, "loss": 0.0115, "step": 61620 }, { "epoch": 1.51, "learning_rate": 1.7013668267454748e-05, "loss": 0.0104, "step": 61630 }, { "epoch": 1.51, "learning_rate": 1.7013175717276197e-05, "loss": 0.0118, "step": 61640 }, { "epoch": 1.51, "learning_rate": 1.701268316709765e-05, "loss": 0.0111, "step": 61650 }, { "epoch": 1.51, "learning_rate": 1.70121906169191e-05, "loss": 0.0104, "step": 61660 }, { "epoch": 1.52, "learning_rate": 1.701169806674055e-05, "loss": 0.0121, "step": 61670 }, { "epoch": 1.52, "learning_rate": 1.7011205516562e-05, "loss": 0.0112, "step": 61680 }, { "epoch": 1.52, "learning_rate": 1.7010712966383453e-05, "loss": 0.009, "step": 61690 }, { "epoch": 1.52, "learning_rate": 1.7010220416204902e-05, "loss": 0.0104, "step": 61700 }, { "epoch": 1.52, "learning_rate": 1.7009727866026355e-05, "loss": 0.0106, "step": 61710 }, { "epoch": 1.52, "learning_rate": 1.7009235315847804e-05, "loss": 0.0118, "step": 61720 }, { "epoch": 1.52, "learning_rate": 1.7008742765669256e-05, "loss": 0.018, "step": 61730 }, { "epoch": 1.52, "learning_rate": 1.7008250215490705e-05, "loss": 0.0116, "step": 61740 }, { "epoch": 1.52, "learning_rate": 1.7007757665312155e-05, "loss": 0.0123, "step": 61750 }, { "epoch": 1.52, "learning_rate": 1.7007265115133607e-05, "loss": 0.0109, "step": 61760 }, { "epoch": 1.52, "learning_rate": 1.7006772564955056e-05, "loss": 0.0114, "step": 61770 }, { "epoch": 1.52, "learning_rate": 1.7006280014776505e-05, "loss": 0.0099, "step": 61780 }, { "epoch": 1.52, "learning_rate": 1.7005787464597958e-05, "loss": 0.0125, "step": 61790 }, { "epoch": 1.52, "learning_rate": 1.7005294914419407e-05, "loss": 0.0137, "step": 61800 }, { "epoch": 1.52, "learning_rate": 1.7004802364240856e-05, "loss": 0.0119, "step": 61810 }, { "epoch": 1.52, "learning_rate": 1.700430981406231e-05, "loss": 0.0164, "step": 61820 }, { "epoch": 1.52, "learning_rate": 1.7003817263883758e-05, "loss": 0.0092, "step": 61830 }, { "epoch": 1.52, "learning_rate": 1.700332471370521e-05, "loss": 0.0123, "step": 61840 }, { "epoch": 1.52, "learning_rate": 1.700283216352666e-05, "loss": 0.0143, "step": 61850 }, { "epoch": 1.52, "learning_rate": 1.7002339613348112e-05, "loss": 0.0115, "step": 61860 }, { "epoch": 1.52, "learning_rate": 1.700184706316956e-05, "loss": 0.0107, "step": 61870 }, { "epoch": 1.52, "learning_rate": 1.7001354512991014e-05, "loss": 0.0114, "step": 61880 }, { "epoch": 1.52, "learning_rate": 1.7000861962812463e-05, "loss": 0.0164, "step": 61890 }, { "epoch": 1.52, "learning_rate": 1.7000369412633912e-05, "loss": 0.0157, "step": 61900 }, { "epoch": 1.52, "learning_rate": 1.6999876862455365e-05, "loss": 0.0111, "step": 61910 }, { "epoch": 1.52, "learning_rate": 1.6999384312276814e-05, "loss": 0.0109, "step": 61920 }, { "epoch": 1.52, "learning_rate": 1.6998891762098263e-05, "loss": 0.0116, "step": 61930 }, { "epoch": 1.52, "learning_rate": 1.6998399211919716e-05, "loss": 0.0137, "step": 61940 }, { "epoch": 1.52, "learning_rate": 1.6997906661741165e-05, "loss": 0.013, "step": 61950 }, { "epoch": 1.52, "learning_rate": 1.6997414111562617e-05, "loss": 0.0152, "step": 61960 }, { "epoch": 1.52, "learning_rate": 1.6996921561384066e-05, "loss": 0.0158, "step": 61970 }, { "epoch": 1.52, "learning_rate": 1.699642901120552e-05, "loss": 0.0147, "step": 61980 }, { "epoch": 1.52, "learning_rate": 1.6995936461026968e-05, "loss": 0.0119, "step": 61990 }, { "epoch": 1.52, "learning_rate": 1.699544391084842e-05, "loss": 0.0121, "step": 62000 }, { "epoch": 1.52, "learning_rate": 1.699495136066987e-05, "loss": 0.0127, "step": 62010 }, { "epoch": 1.52, "learning_rate": 1.6994458810491322e-05, "loss": 0.0111, "step": 62020 }, { "epoch": 1.52, "learning_rate": 1.699396626031277e-05, "loss": 0.0123, "step": 62030 }, { "epoch": 1.52, "learning_rate": 1.699347371013422e-05, "loss": 0.0113, "step": 62040 }, { "epoch": 1.52, "learning_rate": 1.6992981159955673e-05, "loss": 0.0134, "step": 62050 }, { "epoch": 1.52, "learning_rate": 1.6992488609777122e-05, "loss": 0.0135, "step": 62060 }, { "epoch": 1.52, "learning_rate": 1.699199605959857e-05, "loss": 0.012, "step": 62070 }, { "epoch": 1.53, "learning_rate": 1.6991503509420024e-05, "loss": 0.0113, "step": 62080 }, { "epoch": 1.53, "learning_rate": 1.6991010959241473e-05, "loss": 0.0103, "step": 62090 }, { "epoch": 1.53, "learning_rate": 1.6990518409062922e-05, "loss": 0.0146, "step": 62100 }, { "epoch": 1.53, "learning_rate": 1.6990025858884375e-05, "loss": 0.0097, "step": 62110 }, { "epoch": 1.53, "learning_rate": 1.6989533308705827e-05, "loss": 0.0202, "step": 62120 }, { "epoch": 1.53, "learning_rate": 1.6989040758527276e-05, "loss": 0.0155, "step": 62130 }, { "epoch": 1.53, "learning_rate": 1.698854820834873e-05, "loss": 0.0112, "step": 62140 }, { "epoch": 1.53, "learning_rate": 1.6988055658170178e-05, "loss": 0.0171, "step": 62150 }, { "epoch": 1.53, "learning_rate": 1.6987563107991627e-05, "loss": 0.0145, "step": 62160 }, { "epoch": 1.53, "learning_rate": 1.698707055781308e-05, "loss": 0.0107, "step": 62170 }, { "epoch": 1.53, "learning_rate": 1.698657800763453e-05, "loss": 0.0126, "step": 62180 }, { "epoch": 1.53, "learning_rate": 1.698608545745598e-05, "loss": 0.0138, "step": 62190 }, { "epoch": 1.53, "learning_rate": 1.698559290727743e-05, "loss": 0.0117, "step": 62200 }, { "epoch": 1.53, "learning_rate": 1.698510035709888e-05, "loss": 0.0119, "step": 62210 }, { "epoch": 1.53, "learning_rate": 1.6984607806920332e-05, "loss": 0.0147, "step": 62220 }, { "epoch": 1.53, "learning_rate": 1.698411525674178e-05, "loss": 0.0116, "step": 62230 }, { "epoch": 1.53, "learning_rate": 1.698362270656323e-05, "loss": 0.0117, "step": 62240 }, { "epoch": 1.53, "learning_rate": 1.6983130156384683e-05, "loss": 0.011, "step": 62250 }, { "epoch": 1.53, "learning_rate": 1.6982637606206132e-05, "loss": 0.013, "step": 62260 }, { "epoch": 1.53, "learning_rate": 1.6982145056027585e-05, "loss": 0.011, "step": 62270 }, { "epoch": 1.53, "learning_rate": 1.6981652505849034e-05, "loss": 0.0116, "step": 62280 }, { "epoch": 1.53, "learning_rate": 1.6981159955670487e-05, "loss": 0.0105, "step": 62290 }, { "epoch": 1.53, "learning_rate": 1.6980667405491936e-05, "loss": 0.0197, "step": 62300 }, { "epoch": 1.53, "learning_rate": 1.6980174855313388e-05, "loss": 0.0231, "step": 62310 }, { "epoch": 1.53, "learning_rate": 1.6979682305134837e-05, "loss": 0.0115, "step": 62320 }, { "epoch": 1.53, "learning_rate": 1.6979189754956287e-05, "loss": 0.0129, "step": 62330 }, { "epoch": 1.53, "learning_rate": 1.697869720477774e-05, "loss": 0.0091, "step": 62340 }, { "epoch": 1.53, "learning_rate": 1.6978204654599188e-05, "loss": 0.0126, "step": 62350 }, { "epoch": 1.53, "learning_rate": 1.6977712104420637e-05, "loss": 0.0293, "step": 62360 }, { "epoch": 1.53, "learning_rate": 1.697721955424209e-05, "loss": 0.0133, "step": 62370 }, { "epoch": 1.53, "learning_rate": 1.697672700406354e-05, "loss": 0.0137, "step": 62380 }, { "epoch": 1.53, "learning_rate": 1.697623445388499e-05, "loss": 0.0122, "step": 62390 }, { "epoch": 1.53, "learning_rate": 1.697574190370644e-05, "loss": 0.012, "step": 62400 }, { "epoch": 1.53, "learning_rate": 1.6975249353527893e-05, "loss": 0.0156, "step": 62410 }, { "epoch": 1.53, "learning_rate": 1.6974756803349342e-05, "loss": 0.0133, "step": 62420 }, { "epoch": 1.53, "learning_rate": 1.6974264253170795e-05, "loss": 0.024, "step": 62430 }, { "epoch": 1.53, "learning_rate": 1.6973771702992244e-05, "loss": 0.0136, "step": 62440 }, { "epoch": 1.53, "learning_rate": 1.6973279152813697e-05, "loss": 0.0099, "step": 62450 }, { "epoch": 1.53, "learning_rate": 1.6972786602635146e-05, "loss": 0.0123, "step": 62460 }, { "epoch": 1.53, "learning_rate": 1.6972294052456595e-05, "loss": 0.0107, "step": 62470 }, { "epoch": 1.53, "learning_rate": 1.6971801502278047e-05, "loss": 0.0155, "step": 62480 }, { "epoch": 1.54, "learning_rate": 1.6971308952099497e-05, "loss": 0.0187, "step": 62490 }, { "epoch": 1.54, "learning_rate": 1.6970816401920946e-05, "loss": 0.0231, "step": 62500 }, { "epoch": 1.54, "learning_rate": 1.69703238517424e-05, "loss": 0.012, "step": 62510 }, { "epoch": 1.54, "learning_rate": 1.6969831301563847e-05, "loss": 0.0119, "step": 62520 }, { "epoch": 1.54, "learning_rate": 1.6969338751385297e-05, "loss": 0.0111, "step": 62530 }, { "epoch": 1.54, "learning_rate": 1.696884620120675e-05, "loss": 0.0192, "step": 62540 }, { "epoch": 1.54, "learning_rate": 1.6968353651028198e-05, "loss": 0.0116, "step": 62550 }, { "epoch": 1.54, "learning_rate": 1.696786110084965e-05, "loss": 0.0179, "step": 62560 }, { "epoch": 1.54, "learning_rate": 1.69673685506711e-05, "loss": 0.0107, "step": 62570 }, { "epoch": 1.54, "learning_rate": 1.6966876000492553e-05, "loss": 0.0119, "step": 62580 }, { "epoch": 1.54, "learning_rate": 1.6966383450314e-05, "loss": 0.0155, "step": 62590 }, { "epoch": 1.54, "learning_rate": 1.6965890900135454e-05, "loss": 0.0175, "step": 62600 }, { "epoch": 1.54, "learning_rate": 1.6965398349956903e-05, "loss": 0.0235, "step": 62610 }, { "epoch": 1.54, "learning_rate": 1.6964905799778352e-05, "loss": 0.0179, "step": 62620 }, { "epoch": 1.54, "learning_rate": 1.6964413249599805e-05, "loss": 0.0116, "step": 62630 }, { "epoch": 1.54, "learning_rate": 1.6963920699421254e-05, "loss": 0.0098, "step": 62640 }, { "epoch": 1.54, "learning_rate": 1.6963428149242703e-05, "loss": 0.0203, "step": 62650 }, { "epoch": 1.54, "learning_rate": 1.6962935599064156e-05, "loss": 0.0119, "step": 62660 }, { "epoch": 1.54, "learning_rate": 1.6962443048885605e-05, "loss": 0.0135, "step": 62670 }, { "epoch": 1.54, "learning_rate": 1.6961950498707058e-05, "loss": 0.0142, "step": 62680 }, { "epoch": 1.54, "learning_rate": 1.6961457948528507e-05, "loss": 0.0117, "step": 62690 }, { "epoch": 1.54, "learning_rate": 1.696096539834996e-05, "loss": 0.018, "step": 62700 }, { "epoch": 1.54, "learning_rate": 1.696047284817141e-05, "loss": 0.0147, "step": 62710 }, { "epoch": 1.54, "learning_rate": 1.695998029799286e-05, "loss": 0.0115, "step": 62720 }, { "epoch": 1.54, "learning_rate": 1.695948774781431e-05, "loss": 0.0175, "step": 62730 }, { "epoch": 1.54, "learning_rate": 1.6958995197635763e-05, "loss": 0.0193, "step": 62740 }, { "epoch": 1.54, "learning_rate": 1.6958502647457212e-05, "loss": 0.0213, "step": 62750 }, { "epoch": 1.54, "learning_rate": 1.695801009727866e-05, "loss": 0.0134, "step": 62760 }, { "epoch": 1.54, "learning_rate": 1.6957517547100113e-05, "loss": 0.01, "step": 62770 }, { "epoch": 1.54, "learning_rate": 1.6957024996921563e-05, "loss": 0.0109, "step": 62780 }, { "epoch": 1.54, "learning_rate": 1.6956532446743012e-05, "loss": 0.0108, "step": 62790 }, { "epoch": 1.54, "learning_rate": 1.6956039896564464e-05, "loss": 0.0102, "step": 62800 }, { "epoch": 1.54, "learning_rate": 1.6955547346385913e-05, "loss": 0.0167, "step": 62810 }, { "epoch": 1.54, "learning_rate": 1.6955054796207363e-05, "loss": 0.0173, "step": 62820 }, { "epoch": 1.54, "learning_rate": 1.6954562246028815e-05, "loss": 0.0123, "step": 62830 }, { "epoch": 1.54, "learning_rate": 1.6954069695850268e-05, "loss": 0.0107, "step": 62840 }, { "epoch": 1.54, "learning_rate": 1.6953577145671717e-05, "loss": 0.0132, "step": 62850 }, { "epoch": 1.54, "learning_rate": 1.695308459549317e-05, "loss": 0.0105, "step": 62860 }, { "epoch": 1.54, "learning_rate": 1.695259204531462e-05, "loss": 0.0137, "step": 62870 }, { "epoch": 1.54, "learning_rate": 1.6952099495136068e-05, "loss": 0.0122, "step": 62880 }, { "epoch": 1.55, "learning_rate": 1.695160694495752e-05, "loss": 0.0109, "step": 62890 }, { "epoch": 1.55, "learning_rate": 1.695111439477897e-05, "loss": 0.0128, "step": 62900 }, { "epoch": 1.55, "learning_rate": 1.6950621844600422e-05, "loss": 0.0124, "step": 62910 }, { "epoch": 1.55, "learning_rate": 1.695012929442187e-05, "loss": 0.0131, "step": 62920 }, { "epoch": 1.55, "learning_rate": 1.694963674424332e-05, "loss": 0.0103, "step": 62930 }, { "epoch": 1.55, "learning_rate": 1.6949144194064773e-05, "loss": 0.0116, "step": 62940 }, { "epoch": 1.55, "learning_rate": 1.6948651643886222e-05, "loss": 0.0106, "step": 62950 }, { "epoch": 1.55, "learning_rate": 1.694815909370767e-05, "loss": 0.0112, "step": 62960 }, { "epoch": 1.55, "learning_rate": 1.6947666543529124e-05, "loss": 0.0099, "step": 62970 }, { "epoch": 1.55, "learning_rate": 1.6947173993350573e-05, "loss": 0.0096, "step": 62980 }, { "epoch": 1.55, "learning_rate": 1.6946681443172025e-05, "loss": 0.0173, "step": 62990 }, { "epoch": 1.55, "learning_rate": 1.6946188892993474e-05, "loss": 0.0257, "step": 63000 }, { "epoch": 1.55, "learning_rate": 1.6945696342814927e-05, "loss": 0.021, "step": 63010 }, { "epoch": 1.55, "learning_rate": 1.6945203792636376e-05, "loss": 0.0118, "step": 63020 }, { "epoch": 1.55, "learning_rate": 1.694471124245783e-05, "loss": 0.0128, "step": 63030 }, { "epoch": 1.55, "learning_rate": 1.6944218692279278e-05, "loss": 0.0096, "step": 63040 }, { "epoch": 1.55, "learning_rate": 1.6943726142100727e-05, "loss": 0.0121, "step": 63050 }, { "epoch": 1.55, "learning_rate": 1.694323359192218e-05, "loss": 0.0126, "step": 63060 }, { "epoch": 1.55, "learning_rate": 1.694274104174363e-05, "loss": 0.0165, "step": 63070 }, { "epoch": 1.55, "learning_rate": 1.6942248491565078e-05, "loss": 0.0133, "step": 63080 }, { "epoch": 1.55, "learning_rate": 1.694175594138653e-05, "loss": 0.0181, "step": 63090 }, { "epoch": 1.55, "learning_rate": 1.694126339120798e-05, "loss": 0.0112, "step": 63100 }, { "epoch": 1.55, "learning_rate": 1.6940770841029432e-05, "loss": 0.011, "step": 63110 }, { "epoch": 1.55, "learning_rate": 1.694027829085088e-05, "loss": 0.0105, "step": 63120 }, { "epoch": 1.55, "learning_rate": 1.6939785740672334e-05, "loss": 0.0107, "step": 63130 }, { "epoch": 1.55, "learning_rate": 1.6939293190493783e-05, "loss": 0.0127, "step": 63140 }, { "epoch": 1.55, "learning_rate": 1.6938800640315235e-05, "loss": 0.0097, "step": 63150 }, { "epoch": 1.55, "learning_rate": 1.6938308090136684e-05, "loss": 0.0213, "step": 63160 }, { "epoch": 1.55, "learning_rate": 1.6937815539958137e-05, "loss": 0.0222, "step": 63170 }, { "epoch": 1.55, "learning_rate": 1.6937322989779586e-05, "loss": 0.0305, "step": 63180 }, { "epoch": 1.55, "learning_rate": 1.6936830439601035e-05, "loss": 0.0159, "step": 63190 }, { "epoch": 1.55, "learning_rate": 1.6936337889422488e-05, "loss": 0.0102, "step": 63200 }, { "epoch": 1.55, "learning_rate": 1.6935845339243937e-05, "loss": 0.0107, "step": 63210 }, { "epoch": 1.55, "learning_rate": 1.6935352789065386e-05, "loss": 0.013, "step": 63220 }, { "epoch": 1.55, "learning_rate": 1.693486023888684e-05, "loss": 0.0125, "step": 63230 }, { "epoch": 1.55, "learning_rate": 1.6934367688708288e-05, "loss": 0.0151, "step": 63240 }, { "epoch": 1.55, "learning_rate": 1.6933875138529737e-05, "loss": 0.0123, "step": 63250 }, { "epoch": 1.55, "learning_rate": 1.693338258835119e-05, "loss": 0.0123, "step": 63260 }, { "epoch": 1.55, "learning_rate": 1.693289003817264e-05, "loss": 0.0159, "step": 63270 }, { "epoch": 1.55, "learning_rate": 1.693239748799409e-05, "loss": 0.0136, "step": 63280 }, { "epoch": 1.55, "learning_rate": 1.693190493781554e-05, "loss": 0.0107, "step": 63290 }, { "epoch": 1.56, "learning_rate": 1.6931412387636993e-05, "loss": 0.0121, "step": 63300 }, { "epoch": 1.56, "learning_rate": 1.6930919837458442e-05, "loss": 0.011, "step": 63310 }, { "epoch": 1.56, "learning_rate": 1.6930427287279895e-05, "loss": 0.0097, "step": 63320 }, { "epoch": 1.56, "learning_rate": 1.6929934737101344e-05, "loss": 0.0116, "step": 63330 }, { "epoch": 1.56, "learning_rate": 1.6929442186922793e-05, "loss": 0.013, "step": 63340 }, { "epoch": 1.56, "learning_rate": 1.6928949636744245e-05, "loss": 0.0109, "step": 63350 }, { "epoch": 1.56, "learning_rate": 1.6928457086565695e-05, "loss": 0.012, "step": 63360 }, { "epoch": 1.56, "learning_rate": 1.6927964536387144e-05, "loss": 0.0095, "step": 63370 }, { "epoch": 1.56, "learning_rate": 1.6927471986208596e-05, "loss": 0.0103, "step": 63380 }, { "epoch": 1.56, "learning_rate": 1.6926979436030045e-05, "loss": 0.0149, "step": 63390 }, { "epoch": 1.56, "learning_rate": 1.6926486885851498e-05, "loss": 0.0117, "step": 63400 }, { "epoch": 1.56, "learning_rate": 1.6925994335672947e-05, "loss": 0.0107, "step": 63410 }, { "epoch": 1.56, "learning_rate": 1.69255017854944e-05, "loss": 0.011, "step": 63420 }, { "epoch": 1.56, "learning_rate": 1.692500923531585e-05, "loss": 0.0109, "step": 63430 }, { "epoch": 1.56, "learning_rate": 1.69245166851373e-05, "loss": 0.0112, "step": 63440 }, { "epoch": 1.56, "learning_rate": 1.692402413495875e-05, "loss": 0.0164, "step": 63450 }, { "epoch": 1.56, "learning_rate": 1.6923531584780203e-05, "loss": 0.0119, "step": 63460 }, { "epoch": 1.56, "learning_rate": 1.6923039034601652e-05, "loss": 0.0111, "step": 63470 }, { "epoch": 1.56, "learning_rate": 1.69225464844231e-05, "loss": 0.0217, "step": 63480 }, { "epoch": 1.56, "learning_rate": 1.6922053934244554e-05, "loss": 0.0156, "step": 63490 }, { "epoch": 1.56, "learning_rate": 1.6921561384066003e-05, "loss": 0.0106, "step": 63500 }, { "epoch": 1.56, "learning_rate": 1.6921068833887452e-05, "loss": 0.0201, "step": 63510 }, { "epoch": 1.56, "learning_rate": 1.6920576283708905e-05, "loss": 0.0115, "step": 63520 }, { "epoch": 1.56, "learning_rate": 1.6920083733530354e-05, "loss": 0.0124, "step": 63530 }, { "epoch": 1.56, "learning_rate": 1.6919591183351803e-05, "loss": 0.0109, "step": 63540 }, { "epoch": 1.56, "learning_rate": 1.6919098633173255e-05, "loss": 0.0102, "step": 63550 }, { "epoch": 1.56, "learning_rate": 1.6918606082994708e-05, "loss": 0.0126, "step": 63560 }, { "epoch": 1.56, "learning_rate": 1.6918113532816157e-05, "loss": 0.0125, "step": 63570 }, { "epoch": 1.56, "learning_rate": 1.691762098263761e-05, "loss": 0.0118, "step": 63580 }, { "epoch": 1.56, "learning_rate": 1.691712843245906e-05, "loss": 0.01, "step": 63590 }, { "epoch": 1.56, "learning_rate": 1.6916635882280508e-05, "loss": 0.0157, "step": 63600 }, { "epoch": 1.56, "learning_rate": 1.691614333210196e-05, "loss": 0.0127, "step": 63610 }, { "epoch": 1.56, "learning_rate": 1.691565078192341e-05, "loss": 0.0099, "step": 63620 }, { "epoch": 1.56, "learning_rate": 1.6915158231744862e-05, "loss": 0.0125, "step": 63630 }, { "epoch": 1.56, "learning_rate": 1.691466568156631e-05, "loss": 0.0103, "step": 63640 }, { "epoch": 1.56, "learning_rate": 1.691417313138776e-05, "loss": 0.0148, "step": 63650 }, { "epoch": 1.56, "learning_rate": 1.6913680581209213e-05, "loss": 0.0141, "step": 63660 }, { "epoch": 1.56, "learning_rate": 1.6913188031030662e-05, "loss": 0.0113, "step": 63670 }, { "epoch": 1.56, "learning_rate": 1.691269548085211e-05, "loss": 0.0118, "step": 63680 }, { "epoch": 1.56, "learning_rate": 1.6912202930673564e-05, "loss": 0.0121, "step": 63690 }, { "epoch": 1.56, "learning_rate": 1.6911710380495013e-05, "loss": 0.0116, "step": 63700 }, { "epoch": 1.57, "learning_rate": 1.6911217830316466e-05, "loss": 0.011, "step": 63710 }, { "epoch": 1.57, "learning_rate": 1.6910725280137915e-05, "loss": 0.0182, "step": 63720 }, { "epoch": 1.57, "learning_rate": 1.6910232729959367e-05, "loss": 0.011, "step": 63730 }, { "epoch": 1.57, "learning_rate": 1.6909740179780816e-05, "loss": 0.0273, "step": 63740 }, { "epoch": 1.57, "learning_rate": 1.690924762960227e-05, "loss": 0.0178, "step": 63750 }, { "epoch": 1.57, "learning_rate": 1.6908755079423718e-05, "loss": 0.0162, "step": 63760 }, { "epoch": 1.57, "learning_rate": 1.6908262529245167e-05, "loss": 0.0112, "step": 63770 }, { "epoch": 1.57, "learning_rate": 1.690776997906662e-05, "loss": 0.0108, "step": 63780 }, { "epoch": 1.57, "learning_rate": 1.690727742888807e-05, "loss": 0.0122, "step": 63790 }, { "epoch": 1.57, "learning_rate": 1.6906784878709518e-05, "loss": 0.0124, "step": 63800 }, { "epoch": 1.57, "learning_rate": 1.690629232853097e-05, "loss": 0.0104, "step": 63810 }, { "epoch": 1.57, "learning_rate": 1.690579977835242e-05, "loss": 0.0123, "step": 63820 }, { "epoch": 1.57, "learning_rate": 1.6905307228173872e-05, "loss": 0.011, "step": 63830 }, { "epoch": 1.57, "learning_rate": 1.690481467799532e-05, "loss": 0.0179, "step": 63840 }, { "epoch": 1.57, "learning_rate": 1.6904322127816774e-05, "loss": 0.0164, "step": 63850 }, { "epoch": 1.57, "learning_rate": 1.6903829577638223e-05, "loss": 0.0108, "step": 63860 }, { "epoch": 1.57, "learning_rate": 1.6903337027459676e-05, "loss": 0.0151, "step": 63870 }, { "epoch": 1.57, "learning_rate": 1.6902844477281125e-05, "loss": 0.0112, "step": 63880 }, { "epoch": 1.57, "learning_rate": 1.6902351927102577e-05, "loss": 0.0238, "step": 63890 }, { "epoch": 1.57, "learning_rate": 1.6901859376924026e-05, "loss": 0.0237, "step": 63900 }, { "epoch": 1.57, "learning_rate": 1.6901366826745476e-05, "loss": 0.0109, "step": 63910 }, { "epoch": 1.57, "learning_rate": 1.6900874276566928e-05, "loss": 0.0099, "step": 63920 }, { "epoch": 1.57, "learning_rate": 1.6900381726388377e-05, "loss": 0.012, "step": 63930 }, { "epoch": 1.57, "learning_rate": 1.6899889176209826e-05, "loss": 0.0105, "step": 63940 }, { "epoch": 1.57, "learning_rate": 1.689939662603128e-05, "loss": 0.0111, "step": 63950 }, { "epoch": 1.57, "learning_rate": 1.6898904075852728e-05, "loss": 0.0148, "step": 63960 }, { "epoch": 1.57, "learning_rate": 1.6898411525674177e-05, "loss": 0.0198, "step": 63970 }, { "epoch": 1.57, "learning_rate": 1.689791897549563e-05, "loss": 0.011, "step": 63980 }, { "epoch": 1.57, "learning_rate": 1.689742642531708e-05, "loss": 0.0116, "step": 63990 }, { "epoch": 1.57, "learning_rate": 1.689693387513853e-05, "loss": 0.0104, "step": 64000 }, { "epoch": 1.57, "learning_rate": 1.689644132495998e-05, "loss": 0.0136, "step": 64010 }, { "epoch": 1.57, "learning_rate": 1.6895948774781433e-05, "loss": 0.0101, "step": 64020 }, { "epoch": 1.57, "learning_rate": 1.6895456224602882e-05, "loss": 0.0145, "step": 64030 }, { "epoch": 1.57, "learning_rate": 1.6894963674424335e-05, "loss": 0.0206, "step": 64040 }, { "epoch": 1.57, "learning_rate": 1.6894471124245784e-05, "loss": 0.0106, "step": 64050 }, { "epoch": 1.57, "learning_rate": 1.6893978574067233e-05, "loss": 0.0107, "step": 64060 }, { "epoch": 1.57, "learning_rate": 1.6893486023888686e-05, "loss": 0.012, "step": 64070 }, { "epoch": 1.57, "learning_rate": 1.6892993473710135e-05, "loss": 0.012, "step": 64080 }, { "epoch": 1.57, "learning_rate": 1.6892500923531584e-05, "loss": 0.012, "step": 64090 }, { "epoch": 1.57, "learning_rate": 1.6892008373353037e-05, "loss": 0.0097, "step": 64100 }, { "epoch": 1.57, "learning_rate": 1.6891515823174486e-05, "loss": 0.012, "step": 64110 }, { "epoch": 1.58, "learning_rate": 1.6891023272995938e-05, "loss": 0.0124, "step": 64120 }, { "epoch": 1.58, "learning_rate": 1.6890530722817387e-05, "loss": 0.0111, "step": 64130 }, { "epoch": 1.58, "learning_rate": 1.689003817263884e-05, "loss": 0.0119, "step": 64140 }, { "epoch": 1.58, "learning_rate": 1.688954562246029e-05, "loss": 0.0099, "step": 64150 }, { "epoch": 1.58, "learning_rate": 1.688905307228174e-05, "loss": 0.0112, "step": 64160 }, { "epoch": 1.58, "learning_rate": 1.688856052210319e-05, "loss": 0.0129, "step": 64170 }, { "epoch": 1.58, "learning_rate": 1.6888067971924643e-05, "loss": 0.0164, "step": 64180 }, { "epoch": 1.58, "learning_rate": 1.6887575421746092e-05, "loss": 0.0184, "step": 64190 }, { "epoch": 1.58, "learning_rate": 1.688708287156754e-05, "loss": 0.0129, "step": 64200 }, { "epoch": 1.58, "learning_rate": 1.6886590321388994e-05, "loss": 0.014, "step": 64210 }, { "epoch": 1.58, "learning_rate": 1.6886097771210443e-05, "loss": 0.0101, "step": 64220 }, { "epoch": 1.58, "learning_rate": 1.6885605221031892e-05, "loss": 0.0223, "step": 64230 }, { "epoch": 1.58, "learning_rate": 1.6885112670853345e-05, "loss": 0.0108, "step": 64240 }, { "epoch": 1.58, "learning_rate": 1.6884620120674794e-05, "loss": 0.0103, "step": 64250 }, { "epoch": 1.58, "learning_rate": 1.6884127570496243e-05, "loss": 0.0128, "step": 64260 }, { "epoch": 1.58, "learning_rate": 1.6883635020317696e-05, "loss": 0.0244, "step": 64270 }, { "epoch": 1.58, "learning_rate": 1.688314247013915e-05, "loss": 0.0103, "step": 64280 }, { "epoch": 1.58, "learning_rate": 1.6882649919960597e-05, "loss": 0.011, "step": 64290 }, { "epoch": 1.58, "learning_rate": 1.688215736978205e-05, "loss": 0.0223, "step": 64300 }, { "epoch": 1.58, "learning_rate": 1.68816648196035e-05, "loss": 0.0144, "step": 64310 }, { "epoch": 1.58, "learning_rate": 1.688117226942495e-05, "loss": 0.0107, "step": 64320 }, { "epoch": 1.58, "learning_rate": 1.68806797192464e-05, "loss": 0.014, "step": 64330 }, { "epoch": 1.58, "learning_rate": 1.688018716906785e-05, "loss": 0.0122, "step": 64340 }, { "epoch": 1.58, "learning_rate": 1.6879694618889303e-05, "loss": 0.0122, "step": 64350 }, { "epoch": 1.58, "learning_rate": 1.687920206871075e-05, "loss": 0.0106, "step": 64360 }, { "epoch": 1.58, "learning_rate": 1.68787095185322e-05, "loss": 0.0104, "step": 64370 }, { "epoch": 1.58, "learning_rate": 1.6878216968353653e-05, "loss": 0.0153, "step": 64380 }, { "epoch": 1.58, "learning_rate": 1.6877724418175103e-05, "loss": 0.0127, "step": 64390 }, { "epoch": 1.58, "learning_rate": 1.687723186799655e-05, "loss": 0.0104, "step": 64400 }, { "epoch": 1.58, "learning_rate": 1.6876739317818004e-05, "loss": 0.0106, "step": 64410 }, { "epoch": 1.58, "learning_rate": 1.6876246767639453e-05, "loss": 0.0146, "step": 64420 }, { "epoch": 1.58, "learning_rate": 1.6875754217460906e-05, "loss": 0.0108, "step": 64430 }, { "epoch": 1.58, "learning_rate": 1.6875261667282355e-05, "loss": 0.0111, "step": 64440 }, { "epoch": 1.58, "learning_rate": 1.6874769117103808e-05, "loss": 0.0108, "step": 64450 }, { "epoch": 1.58, "learning_rate": 1.6874276566925257e-05, "loss": 0.0121, "step": 64460 }, { "epoch": 1.58, "learning_rate": 1.687378401674671e-05, "loss": 0.0212, "step": 64470 }, { "epoch": 1.58, "learning_rate": 1.687329146656816e-05, "loss": 0.0139, "step": 64480 }, { "epoch": 1.58, "learning_rate": 1.6872798916389608e-05, "loss": 0.0116, "step": 64490 }, { "epoch": 1.58, "learning_rate": 1.687230636621106e-05, "loss": 0.0107, "step": 64500 }, { "epoch": 1.58, "learning_rate": 1.687181381603251e-05, "loss": 0.01, "step": 64510 }, { "epoch": 1.59, "learning_rate": 1.687132126585396e-05, "loss": 0.0138, "step": 64520 }, { "epoch": 1.59, "learning_rate": 1.687082871567541e-05, "loss": 0.011, "step": 64530 }, { "epoch": 1.59, "learning_rate": 1.687033616549686e-05, "loss": 0.0126, "step": 64540 }, { "epoch": 1.59, "learning_rate": 1.6869843615318313e-05, "loss": 0.0129, "step": 64550 }, { "epoch": 1.59, "learning_rate": 1.6869351065139762e-05, "loss": 0.0096, "step": 64560 }, { "epoch": 1.59, "learning_rate": 1.6868858514961214e-05, "loss": 0.01, "step": 64570 }, { "epoch": 1.59, "learning_rate": 1.6868365964782663e-05, "loss": 0.0096, "step": 64580 }, { "epoch": 1.59, "learning_rate": 1.6867873414604116e-05, "loss": 0.0106, "step": 64590 }, { "epoch": 1.59, "learning_rate": 1.6867380864425565e-05, "loss": 0.011, "step": 64600 }, { "epoch": 1.59, "learning_rate": 1.6866888314247018e-05, "loss": 0.0159, "step": 64610 }, { "epoch": 1.59, "learning_rate": 1.6866395764068467e-05, "loss": 0.0178, "step": 64620 }, { "epoch": 1.59, "learning_rate": 1.6865903213889916e-05, "loss": 0.0117, "step": 64630 }, { "epoch": 1.59, "learning_rate": 1.686541066371137e-05, "loss": 0.0123, "step": 64640 }, { "epoch": 1.59, "learning_rate": 1.6864918113532818e-05, "loss": 0.0118, "step": 64650 }, { "epoch": 1.59, "learning_rate": 1.6864425563354267e-05, "loss": 0.016, "step": 64660 }, { "epoch": 1.59, "learning_rate": 1.686393301317572e-05, "loss": 0.0113, "step": 64670 }, { "epoch": 1.59, "learning_rate": 1.686344046299717e-05, "loss": 0.0116, "step": 64680 }, { "epoch": 1.59, "learning_rate": 1.6862947912818618e-05, "loss": 0.0105, "step": 64690 }, { "epoch": 1.59, "learning_rate": 1.686245536264007e-05, "loss": 0.0131, "step": 64700 }, { "epoch": 1.59, "learning_rate": 1.686196281246152e-05, "loss": 0.0111, "step": 64710 }, { "epoch": 1.59, "learning_rate": 1.6861470262282972e-05, "loss": 0.0123, "step": 64720 }, { "epoch": 1.59, "learning_rate": 1.686097771210442e-05, "loss": 0.0114, "step": 64730 }, { "epoch": 1.59, "learning_rate": 1.6860485161925874e-05, "loss": 0.0264, "step": 64740 }, { "epoch": 1.59, "learning_rate": 1.6859992611747323e-05, "loss": 0.0117, "step": 64750 }, { "epoch": 1.59, "learning_rate": 1.6859500061568775e-05, "loss": 0.0114, "step": 64760 }, { "epoch": 1.59, "learning_rate": 1.6859007511390224e-05, "loss": 0.0109, "step": 64770 }, { "epoch": 1.59, "learning_rate": 1.6858514961211674e-05, "loss": 0.0118, "step": 64780 }, { "epoch": 1.59, "learning_rate": 1.6858022411033126e-05, "loss": 0.0123, "step": 64790 }, { "epoch": 1.59, "learning_rate": 1.6857529860854575e-05, "loss": 0.0151, "step": 64800 }, { "epoch": 1.59, "learning_rate": 1.6857037310676024e-05, "loss": 0.0101, "step": 64810 }, { "epoch": 1.59, "learning_rate": 1.6856544760497477e-05, "loss": 0.0115, "step": 64820 }, { "epoch": 1.59, "learning_rate": 1.6856052210318926e-05, "loss": 0.0116, "step": 64830 }, { "epoch": 1.59, "learning_rate": 1.685555966014038e-05, "loss": 0.0131, "step": 64840 }, { "epoch": 1.59, "learning_rate": 1.6855067109961828e-05, "loss": 0.0106, "step": 64850 }, { "epoch": 1.59, "learning_rate": 1.685457455978328e-05, "loss": 0.0105, "step": 64860 }, { "epoch": 1.59, "learning_rate": 1.685408200960473e-05, "loss": 0.0124, "step": 64870 }, { "epoch": 1.59, "learning_rate": 1.6853589459426182e-05, "loss": 0.012, "step": 64880 }, { "epoch": 1.59, "learning_rate": 1.685309690924763e-05, "loss": 0.0153, "step": 64890 }, { "epoch": 1.59, "learning_rate": 1.6852604359069084e-05, "loss": 0.0092, "step": 64900 }, { "epoch": 1.59, "learning_rate": 1.6852111808890533e-05, "loss": 0.0104, "step": 64910 }, { "epoch": 1.59, "learning_rate": 1.6851619258711982e-05, "loss": 0.0125, "step": 64920 }, { "epoch": 1.6, "learning_rate": 1.6851126708533434e-05, "loss": 0.0209, "step": 64930 }, { "epoch": 1.6, "learning_rate": 1.6850634158354884e-05, "loss": 0.0111, "step": 64940 }, { "epoch": 1.6, "learning_rate": 1.6850141608176333e-05, "loss": 0.0137, "step": 64950 }, { "epoch": 1.6, "learning_rate": 1.6849649057997785e-05, "loss": 0.0127, "step": 64960 }, { "epoch": 1.6, "learning_rate": 1.6849156507819234e-05, "loss": 0.0117, "step": 64970 }, { "epoch": 1.6, "learning_rate": 1.6848663957640684e-05, "loss": 0.0208, "step": 64980 }, { "epoch": 1.6, "learning_rate": 1.6848171407462136e-05, "loss": 0.0097, "step": 64990 }, { "epoch": 1.6, "learning_rate": 1.684767885728359e-05, "loss": 0.0112, "step": 65000 }, { "epoch": 1.6, "eval_bleu": 98.0903, "eval_gen_len_ratio": 0.9994, "eval_loss": 0.009515752084553242, "eval_runtime": 252.7699, "eval_samples_per_second": 7.382, "eval_steps_per_second": 0.233, "step": 65000 }, { "epoch": 1.6, "learning_rate": 1.6847186307105038e-05, "loss": 0.0087, "step": 65010 }, { "epoch": 1.6, "learning_rate": 1.684669375692649e-05, "loss": 0.012, "step": 65020 }, { "epoch": 1.6, "learning_rate": 1.684620120674794e-05, "loss": 0.0095, "step": 65030 }, { "epoch": 1.6, "learning_rate": 1.684570865656939e-05, "loss": 0.0114, "step": 65040 }, { "epoch": 1.6, "learning_rate": 1.684521610639084e-05, "loss": 0.0149, "step": 65050 }, { "epoch": 1.6, "learning_rate": 1.684472355621229e-05, "loss": 0.0116, "step": 65060 }, { "epoch": 1.6, "learning_rate": 1.6844231006033743e-05, "loss": 0.0136, "step": 65070 }, { "epoch": 1.6, "learning_rate": 1.6843738455855192e-05, "loss": 0.0152, "step": 65080 }, { "epoch": 1.6, "learning_rate": 1.684324590567664e-05, "loss": 0.0112, "step": 65090 }, { "epoch": 1.6, "learning_rate": 1.6842753355498094e-05, "loss": 0.0122, "step": 65100 }, { "epoch": 1.6, "learning_rate": 1.6842260805319543e-05, "loss": 0.0177, "step": 65110 }, { "epoch": 1.6, "learning_rate": 1.6841768255140992e-05, "loss": 0.0229, "step": 65120 }, { "epoch": 1.6, "learning_rate": 1.6841275704962445e-05, "loss": 0.0145, "step": 65130 }, { "epoch": 1.6, "learning_rate": 1.6840783154783894e-05, "loss": 0.0184, "step": 65140 }, { "epoch": 1.6, "learning_rate": 1.6840290604605346e-05, "loss": 0.0109, "step": 65150 }, { "epoch": 1.6, "learning_rate": 1.6839798054426795e-05, "loss": 0.0117, "step": 65160 }, { "epoch": 1.6, "learning_rate": 1.6839305504248248e-05, "loss": 0.0122, "step": 65170 }, { "epoch": 1.6, "learning_rate": 1.6838812954069697e-05, "loss": 0.0107, "step": 65180 }, { "epoch": 1.6, "learning_rate": 1.683832040389115e-05, "loss": 0.0109, "step": 65190 }, { "epoch": 1.6, "learning_rate": 1.68378278537126e-05, "loss": 0.0101, "step": 65200 }, { "epoch": 1.6, "learning_rate": 1.6837335303534048e-05, "loss": 0.0114, "step": 65210 }, { "epoch": 1.6, "learning_rate": 1.68368427533555e-05, "loss": 0.0173, "step": 65220 }, { "epoch": 1.6, "learning_rate": 1.683635020317695e-05, "loss": 0.0166, "step": 65230 }, { "epoch": 1.6, "learning_rate": 1.68358576529984e-05, "loss": 0.0142, "step": 65240 }, { "epoch": 1.6, "learning_rate": 1.683536510281985e-05, "loss": 0.0132, "step": 65250 }, { "epoch": 1.6, "learning_rate": 1.68348725526413e-05, "loss": 0.0125, "step": 65260 }, { "epoch": 1.6, "learning_rate": 1.6834380002462753e-05, "loss": 0.01, "step": 65270 }, { "epoch": 1.6, "learning_rate": 1.6833887452284202e-05, "loss": 0.0181, "step": 65280 }, { "epoch": 1.6, "learning_rate": 1.6833394902105655e-05, "loss": 0.01, "step": 65290 }, { "epoch": 1.6, "learning_rate": 1.6832902351927104e-05, "loss": 0.0136, "step": 65300 }, { "epoch": 1.6, "learning_rate": 1.6832409801748556e-05, "loss": 0.0116, "step": 65310 }, { "epoch": 1.6, "learning_rate": 1.6831917251570005e-05, "loss": 0.0179, "step": 65320 }, { "epoch": 1.6, "learning_rate": 1.6831424701391458e-05, "loss": 0.0146, "step": 65330 }, { "epoch": 1.61, "learning_rate": 1.6830932151212907e-05, "loss": 0.011, "step": 65340 }, { "epoch": 1.61, "learning_rate": 1.6830439601034356e-05, "loss": 0.0106, "step": 65350 }, { "epoch": 1.61, "learning_rate": 1.682994705085581e-05, "loss": 0.0217, "step": 65360 }, { "epoch": 1.61, "learning_rate": 1.6829454500677258e-05, "loss": 0.017, "step": 65370 }, { "epoch": 1.61, "learning_rate": 1.6828961950498707e-05, "loss": 0.0115, "step": 65380 }, { "epoch": 1.61, "learning_rate": 1.682846940032016e-05, "loss": 0.0128, "step": 65390 }, { "epoch": 1.61, "learning_rate": 1.682797685014161e-05, "loss": 0.0086, "step": 65400 }, { "epoch": 1.61, "learning_rate": 1.6827484299963058e-05, "loss": 0.0151, "step": 65410 }, { "epoch": 1.61, "learning_rate": 1.682699174978451e-05, "loss": 0.0125, "step": 65420 }, { "epoch": 1.61, "learning_rate": 1.682649919960596e-05, "loss": 0.0111, "step": 65430 }, { "epoch": 1.61, "learning_rate": 1.6826006649427412e-05, "loss": 0.019, "step": 65440 }, { "epoch": 1.61, "learning_rate": 1.682551409924886e-05, "loss": 0.0104, "step": 65450 }, { "epoch": 1.61, "learning_rate": 1.6825021549070314e-05, "loss": 0.012, "step": 65460 }, { "epoch": 1.61, "learning_rate": 1.6824528998891763e-05, "loss": 0.0206, "step": 65470 }, { "epoch": 1.61, "learning_rate": 1.6824036448713216e-05, "loss": 0.0136, "step": 65480 }, { "epoch": 1.61, "learning_rate": 1.6823543898534665e-05, "loss": 0.0111, "step": 65490 }, { "epoch": 1.61, "learning_rate": 1.6823051348356114e-05, "loss": 0.0129, "step": 65500 }, { "epoch": 1.61, "learning_rate": 1.6822558798177566e-05, "loss": 0.011, "step": 65510 }, { "epoch": 1.61, "learning_rate": 1.6822066247999016e-05, "loss": 0.0317, "step": 65520 }, { "epoch": 1.61, "learning_rate": 1.6821573697820465e-05, "loss": 0.0101, "step": 65530 }, { "epoch": 1.61, "learning_rate": 1.6821081147641917e-05, "loss": 0.0105, "step": 65540 }, { "epoch": 1.61, "learning_rate": 1.6820588597463366e-05, "loss": 0.0111, "step": 65550 }, { "epoch": 1.61, "learning_rate": 1.682009604728482e-05, "loss": 0.0209, "step": 65560 }, { "epoch": 1.61, "learning_rate": 1.6819603497106268e-05, "loss": 0.0124, "step": 65570 }, { "epoch": 1.61, "learning_rate": 1.681911094692772e-05, "loss": 0.0119, "step": 65580 }, { "epoch": 1.61, "learning_rate": 1.681861839674917e-05, "loss": 0.0122, "step": 65590 }, { "epoch": 1.61, "learning_rate": 1.6818125846570622e-05, "loss": 0.018, "step": 65600 }, { "epoch": 1.61, "learning_rate": 1.681763329639207e-05, "loss": 0.0105, "step": 65610 }, { "epoch": 1.61, "learning_rate": 1.6817140746213524e-05, "loss": 0.0192, "step": 65620 }, { "epoch": 1.61, "learning_rate": 1.6816648196034973e-05, "loss": 0.013, "step": 65630 }, { "epoch": 1.61, "learning_rate": 1.6816155645856422e-05, "loss": 0.0124, "step": 65640 }, { "epoch": 1.61, "learning_rate": 1.6815663095677875e-05, "loss": 0.0143, "step": 65650 }, { "epoch": 1.61, "learning_rate": 1.6815170545499324e-05, "loss": 0.0161, "step": 65660 }, { "epoch": 1.61, "learning_rate": 1.6814677995320773e-05, "loss": 0.0112, "step": 65670 }, { "epoch": 1.61, "learning_rate": 1.6814185445142226e-05, "loss": 0.0222, "step": 65680 }, { "epoch": 1.61, "learning_rate": 1.6813692894963675e-05, "loss": 0.0106, "step": 65690 }, { "epoch": 1.61, "learning_rate": 1.6813200344785124e-05, "loss": 0.0168, "step": 65700 }, { "epoch": 1.61, "learning_rate": 1.6812707794606576e-05, "loss": 0.0165, "step": 65710 }, { "epoch": 1.61, "learning_rate": 1.681221524442803e-05, "loss": 0.0141, "step": 65720 }, { "epoch": 1.61, "learning_rate": 1.6811722694249478e-05, "loss": 0.016, "step": 65730 }, { "epoch": 1.62, "learning_rate": 1.681123014407093e-05, "loss": 0.0088, "step": 65740 }, { "epoch": 1.62, "learning_rate": 1.681073759389238e-05, "loss": 0.0101, "step": 65750 }, { "epoch": 1.62, "learning_rate": 1.681024504371383e-05, "loss": 0.0108, "step": 65760 }, { "epoch": 1.62, "learning_rate": 1.680975249353528e-05, "loss": 0.0152, "step": 65770 }, { "epoch": 1.62, "learning_rate": 1.680925994335673e-05, "loss": 0.0225, "step": 65780 }, { "epoch": 1.62, "learning_rate": 1.680876739317818e-05, "loss": 0.019, "step": 65790 }, { "epoch": 1.62, "learning_rate": 1.6808274842999632e-05, "loss": 0.0119, "step": 65800 }, { "epoch": 1.62, "learning_rate": 1.680778229282108e-05, "loss": 0.011, "step": 65810 }, { "epoch": 1.62, "learning_rate": 1.6807289742642534e-05, "loss": 0.0118, "step": 65820 }, { "epoch": 1.62, "learning_rate": 1.6806797192463983e-05, "loss": 0.0161, "step": 65830 }, { "epoch": 1.62, "learning_rate": 1.6806304642285432e-05, "loss": 0.0173, "step": 65840 }, { "epoch": 1.62, "learning_rate": 1.6805812092106885e-05, "loss": 0.0125, "step": 65850 }, { "epoch": 1.62, "learning_rate": 1.6805319541928334e-05, "loss": 0.0111, "step": 65860 }, { "epoch": 1.62, "learning_rate": 1.6804826991749787e-05, "loss": 0.0175, "step": 65870 }, { "epoch": 1.62, "learning_rate": 1.6804334441571236e-05, "loss": 0.0134, "step": 65880 }, { "epoch": 1.62, "learning_rate": 1.6803841891392688e-05, "loss": 0.0165, "step": 65890 }, { "epoch": 1.62, "learning_rate": 1.6803349341214137e-05, "loss": 0.0152, "step": 65900 }, { "epoch": 1.62, "learning_rate": 1.680285679103559e-05, "loss": 0.0101, "step": 65910 }, { "epoch": 1.62, "learning_rate": 1.680236424085704e-05, "loss": 0.0114, "step": 65920 }, { "epoch": 1.62, "learning_rate": 1.6801871690678488e-05, "loss": 0.0104, "step": 65930 }, { "epoch": 1.62, "learning_rate": 1.680137914049994e-05, "loss": 0.0115, "step": 65940 }, { "epoch": 1.62, "learning_rate": 1.680088659032139e-05, "loss": 0.0124, "step": 65950 }, { "epoch": 1.62, "learning_rate": 1.680039404014284e-05, "loss": 0.0122, "step": 65960 }, { "epoch": 1.62, "learning_rate": 1.679990148996429e-05, "loss": 0.0107, "step": 65970 }, { "epoch": 1.62, "learning_rate": 1.679940893978574e-05, "loss": 0.0118, "step": 65980 }, { "epoch": 1.62, "learning_rate": 1.6798916389607193e-05, "loss": 0.0151, "step": 65990 }, { "epoch": 1.62, "learning_rate": 1.6798423839428642e-05, "loss": 0.0138, "step": 66000 }, { "epoch": 1.62, "learning_rate": 1.6797931289250095e-05, "loss": 0.0156, "step": 66010 }, { "epoch": 1.62, "learning_rate": 1.6797438739071544e-05, "loss": 0.0177, "step": 66020 }, { "epoch": 1.62, "learning_rate": 1.6796946188892997e-05, "loss": 0.0111, "step": 66030 }, { "epoch": 1.62, "learning_rate": 1.6796453638714446e-05, "loss": 0.0103, "step": 66040 }, { "epoch": 1.62, "learning_rate": 1.67959610885359e-05, "loss": 0.017, "step": 66050 }, { "epoch": 1.62, "learning_rate": 1.6795468538357348e-05, "loss": 0.0106, "step": 66060 }, { "epoch": 1.62, "learning_rate": 1.6794975988178797e-05, "loss": 0.0119, "step": 66070 }, { "epoch": 1.62, "learning_rate": 1.679448343800025e-05, "loss": 0.0112, "step": 66080 }, { "epoch": 1.62, "learning_rate": 1.67939908878217e-05, "loss": 0.0115, "step": 66090 }, { "epoch": 1.62, "learning_rate": 1.6793498337643147e-05, "loss": 0.028, "step": 66100 }, { "epoch": 1.62, "learning_rate": 1.67930057874646e-05, "loss": 0.0113, "step": 66110 }, { "epoch": 1.62, "learning_rate": 1.679251323728605e-05, "loss": 0.0117, "step": 66120 }, { "epoch": 1.62, "learning_rate": 1.67920206871075e-05, "loss": 0.0108, "step": 66130 }, { "epoch": 1.62, "learning_rate": 1.679152813692895e-05, "loss": 0.011, "step": 66140 }, { "epoch": 1.63, "learning_rate": 1.67910355867504e-05, "loss": 0.0131, "step": 66150 }, { "epoch": 1.63, "learning_rate": 1.6790543036571853e-05, "loss": 0.014, "step": 66160 }, { "epoch": 1.63, "learning_rate": 1.67900504863933e-05, "loss": 0.0105, "step": 66170 }, { "epoch": 1.63, "learning_rate": 1.6789557936214754e-05, "loss": 0.0105, "step": 66180 }, { "epoch": 1.63, "learning_rate": 1.6789065386036203e-05, "loss": 0.0107, "step": 66190 }, { "epoch": 1.63, "learning_rate": 1.6788572835857656e-05, "loss": 0.0198, "step": 66200 }, { "epoch": 1.63, "learning_rate": 1.6788080285679105e-05, "loss": 0.0153, "step": 66210 }, { "epoch": 1.63, "learning_rate": 1.6787587735500554e-05, "loss": 0.0111, "step": 66220 }, { "epoch": 1.63, "learning_rate": 1.6787095185322007e-05, "loss": 0.0103, "step": 66230 }, { "epoch": 1.63, "learning_rate": 1.6786602635143456e-05, "loss": 0.0102, "step": 66240 }, { "epoch": 1.63, "learning_rate": 1.6786110084964905e-05, "loss": 0.0116, "step": 66250 }, { "epoch": 1.63, "learning_rate": 1.6785617534786358e-05, "loss": 0.0102, "step": 66260 }, { "epoch": 1.63, "learning_rate": 1.6785124984607807e-05, "loss": 0.0161, "step": 66270 }, { "epoch": 1.63, "learning_rate": 1.678463243442926e-05, "loss": 0.013, "step": 66280 }, { "epoch": 1.63, "learning_rate": 1.678413988425071e-05, "loss": 0.015, "step": 66290 }, { "epoch": 1.63, "learning_rate": 1.678364733407216e-05, "loss": 0.0303, "step": 66300 }, { "epoch": 1.63, "learning_rate": 1.678315478389361e-05, "loss": 0.0108, "step": 66310 }, { "epoch": 1.63, "learning_rate": 1.6782662233715063e-05, "loss": 0.0106, "step": 66320 }, { "epoch": 1.63, "learning_rate": 1.6782169683536512e-05, "loss": 0.012, "step": 66330 }, { "epoch": 1.63, "learning_rate": 1.6781677133357964e-05, "loss": 0.0102, "step": 66340 }, { "epoch": 1.63, "learning_rate": 1.6781184583179413e-05, "loss": 0.0109, "step": 66350 }, { "epoch": 1.63, "learning_rate": 1.6780692033000863e-05, "loss": 0.0194, "step": 66360 }, { "epoch": 1.63, "learning_rate": 1.6780199482822315e-05, "loss": 0.0279, "step": 66370 }, { "epoch": 1.63, "learning_rate": 1.6779706932643764e-05, "loss": 0.0115, "step": 66380 }, { "epoch": 1.63, "learning_rate": 1.6779214382465213e-05, "loss": 0.0201, "step": 66390 }, { "epoch": 1.63, "learning_rate": 1.6778721832286666e-05, "loss": 0.0126, "step": 66400 }, { "epoch": 1.63, "learning_rate": 1.6778229282108115e-05, "loss": 0.0114, "step": 66410 }, { "epoch": 1.63, "learning_rate": 1.6777736731929564e-05, "loss": 0.015, "step": 66420 }, { "epoch": 1.63, "learning_rate": 1.6777244181751017e-05, "loss": 0.0131, "step": 66430 }, { "epoch": 1.63, "learning_rate": 1.677675163157247e-05, "loss": 0.0122, "step": 66440 }, { "epoch": 1.63, "learning_rate": 1.677625908139392e-05, "loss": 0.0194, "step": 66450 }, { "epoch": 1.63, "learning_rate": 1.677576653121537e-05, "loss": 0.0121, "step": 66460 }, { "epoch": 1.63, "learning_rate": 1.677527398103682e-05, "loss": 0.0097, "step": 66470 }, { "epoch": 1.63, "learning_rate": 1.677478143085827e-05, "loss": 0.0121, "step": 66480 }, { "epoch": 1.63, "learning_rate": 1.6774288880679722e-05, "loss": 0.0127, "step": 66490 }, { "epoch": 1.63, "learning_rate": 1.677379633050117e-05, "loss": 0.0176, "step": 66500 }, { "epoch": 1.63, "learning_rate": 1.677330378032262e-05, "loss": 0.0154, "step": 66510 }, { "epoch": 1.63, "learning_rate": 1.6772811230144073e-05, "loss": 0.0131, "step": 66520 }, { "epoch": 1.63, "learning_rate": 1.6772318679965522e-05, "loss": 0.0103, "step": 66530 }, { "epoch": 1.63, "learning_rate": 1.6771826129786974e-05, "loss": 0.0148, "step": 66540 }, { "epoch": 1.63, "learning_rate": 1.6771333579608424e-05, "loss": 0.0096, "step": 66550 }, { "epoch": 1.64, "learning_rate": 1.6770841029429873e-05, "loss": 0.0105, "step": 66560 }, { "epoch": 1.64, "learning_rate": 1.6770348479251325e-05, "loss": 0.01, "step": 66570 }, { "epoch": 1.64, "learning_rate": 1.6769855929072774e-05, "loss": 0.0111, "step": 66580 }, { "epoch": 1.64, "learning_rate": 1.6769363378894227e-05, "loss": 0.0116, "step": 66590 }, { "epoch": 1.64, "learning_rate": 1.6768870828715676e-05, "loss": 0.011, "step": 66600 }, { "epoch": 1.64, "learning_rate": 1.676837827853713e-05, "loss": 0.0098, "step": 66610 }, { "epoch": 1.64, "learning_rate": 1.6767885728358578e-05, "loss": 0.0131, "step": 66620 }, { "epoch": 1.64, "learning_rate": 1.676739317818003e-05, "loss": 0.0152, "step": 66630 }, { "epoch": 1.64, "learning_rate": 1.676690062800148e-05, "loss": 0.0089, "step": 66640 }, { "epoch": 1.64, "learning_rate": 1.676640807782293e-05, "loss": 0.0107, "step": 66650 }, { "epoch": 1.64, "learning_rate": 1.676591552764438e-05, "loss": 0.0121, "step": 66660 }, { "epoch": 1.64, "learning_rate": 1.676542297746583e-05, "loss": 0.0161, "step": 66670 }, { "epoch": 1.64, "learning_rate": 1.676493042728728e-05, "loss": 0.0114, "step": 66680 }, { "epoch": 1.64, "learning_rate": 1.6764437877108732e-05, "loss": 0.0136, "step": 66690 }, { "epoch": 1.64, "learning_rate": 1.676394532693018e-05, "loss": 0.0115, "step": 66700 }, { "epoch": 1.64, "learning_rate": 1.6763452776751634e-05, "loss": 0.0116, "step": 66710 }, { "epoch": 1.64, "learning_rate": 1.6762960226573083e-05, "loss": 0.0114, "step": 66720 }, { "epoch": 1.64, "learning_rate": 1.6762467676394535e-05, "loss": 0.0172, "step": 66730 }, { "epoch": 1.64, "learning_rate": 1.6761975126215984e-05, "loss": 0.0129, "step": 66740 }, { "epoch": 1.64, "learning_rate": 1.6761482576037437e-05, "loss": 0.0204, "step": 66750 }, { "epoch": 1.64, "learning_rate": 1.6760990025858886e-05, "loss": 0.0106, "step": 66760 }, { "epoch": 1.64, "learning_rate": 1.676049747568034e-05, "loss": 0.0121, "step": 66770 }, { "epoch": 1.64, "learning_rate": 1.6760004925501788e-05, "loss": 0.0134, "step": 66780 }, { "epoch": 1.64, "learning_rate": 1.6759512375323237e-05, "loss": 0.0104, "step": 66790 }, { "epoch": 1.64, "learning_rate": 1.675901982514469e-05, "loss": 0.0187, "step": 66800 }, { "epoch": 1.64, "learning_rate": 1.675852727496614e-05, "loss": 0.0117, "step": 66810 }, { "epoch": 1.64, "learning_rate": 1.6758034724787588e-05, "loss": 0.0104, "step": 66820 }, { "epoch": 1.64, "learning_rate": 1.675754217460904e-05, "loss": 0.0089, "step": 66830 }, { "epoch": 1.64, "learning_rate": 1.675704962443049e-05, "loss": 0.0115, "step": 66840 }, { "epoch": 1.64, "learning_rate": 1.675655707425194e-05, "loss": 0.0158, "step": 66850 }, { "epoch": 1.64, "learning_rate": 1.675606452407339e-05, "loss": 0.0189, "step": 66860 }, { "epoch": 1.64, "learning_rate": 1.675557197389484e-05, "loss": 0.0122, "step": 66870 }, { "epoch": 1.64, "learning_rate": 1.6755079423716293e-05, "loss": 0.0093, "step": 66880 }, { "epoch": 1.64, "learning_rate": 1.6754586873537742e-05, "loss": 0.0121, "step": 66890 }, { "epoch": 1.64, "learning_rate": 1.6754094323359195e-05, "loss": 0.0195, "step": 66900 }, { "epoch": 1.64, "learning_rate": 1.6753601773180644e-05, "loss": 0.0121, "step": 66910 }, { "epoch": 1.64, "learning_rate": 1.6753109223002096e-05, "loss": 0.0105, "step": 66920 }, { "epoch": 1.64, "learning_rate": 1.6752616672823545e-05, "loss": 0.0128, "step": 66930 }, { "epoch": 1.64, "learning_rate": 1.6752124122644995e-05, "loss": 0.0201, "step": 66940 }, { "epoch": 1.64, "learning_rate": 1.6751631572466447e-05, "loss": 0.0114, "step": 66950 }, { "epoch": 1.64, "learning_rate": 1.6751139022287896e-05, "loss": 0.011, "step": 66960 }, { "epoch": 1.65, "learning_rate": 1.6750646472109345e-05, "loss": 0.0121, "step": 66970 }, { "epoch": 1.65, "learning_rate": 1.6750153921930798e-05, "loss": 0.0127, "step": 66980 }, { "epoch": 1.65, "learning_rate": 1.6749661371752247e-05, "loss": 0.0148, "step": 66990 }, { "epoch": 1.65, "learning_rate": 1.67491688215737e-05, "loss": 0.0127, "step": 67000 }, { "epoch": 1.65, "learning_rate": 1.674867627139515e-05, "loss": 0.0127, "step": 67010 }, { "epoch": 1.65, "learning_rate": 1.67481837212166e-05, "loss": 0.0187, "step": 67020 }, { "epoch": 1.65, "learning_rate": 1.674769117103805e-05, "loss": 0.0197, "step": 67030 }, { "epoch": 1.65, "learning_rate": 1.6747198620859503e-05, "loss": 0.0106, "step": 67040 }, { "epoch": 1.65, "learning_rate": 1.6746706070680952e-05, "loss": 0.0181, "step": 67050 }, { "epoch": 1.65, "learning_rate": 1.6746213520502405e-05, "loss": 0.0109, "step": 67060 }, { "epoch": 1.65, "learning_rate": 1.6745720970323854e-05, "loss": 0.0199, "step": 67070 }, { "epoch": 1.65, "learning_rate": 1.6745228420145303e-05, "loss": 0.0099, "step": 67080 }, { "epoch": 1.65, "learning_rate": 1.6744735869966755e-05, "loss": 0.0246, "step": 67090 }, { "epoch": 1.65, "learning_rate": 1.6744243319788205e-05, "loss": 0.0103, "step": 67100 }, { "epoch": 1.65, "learning_rate": 1.6743750769609654e-05, "loss": 0.0153, "step": 67110 }, { "epoch": 1.65, "learning_rate": 1.6743258219431106e-05, "loss": 0.0115, "step": 67120 }, { "epoch": 1.65, "learning_rate": 1.6742765669252555e-05, "loss": 0.0142, "step": 67130 }, { "epoch": 1.65, "learning_rate": 1.6742273119074005e-05, "loss": 0.0117, "step": 67140 }, { "epoch": 1.65, "learning_rate": 1.6741780568895457e-05, "loss": 0.0121, "step": 67150 }, { "epoch": 1.65, "learning_rate": 1.674128801871691e-05, "loss": 0.0185, "step": 67160 }, { "epoch": 1.65, "learning_rate": 1.674079546853836e-05, "loss": 0.0113, "step": 67170 }, { "epoch": 1.65, "learning_rate": 1.674030291835981e-05, "loss": 0.0112, "step": 67180 }, { "epoch": 1.65, "learning_rate": 1.673981036818126e-05, "loss": 0.0122, "step": 67190 }, { "epoch": 1.65, "learning_rate": 1.673931781800271e-05, "loss": 0.0103, "step": 67200 }, { "epoch": 1.65, "learning_rate": 1.6738825267824162e-05, "loss": 0.0146, "step": 67210 }, { "epoch": 1.65, "learning_rate": 1.673833271764561e-05, "loss": 0.0122, "step": 67220 }, { "epoch": 1.65, "learning_rate": 1.673784016746706e-05, "loss": 0.0172, "step": 67230 }, { "epoch": 1.65, "learning_rate": 1.6737347617288513e-05, "loss": 0.0106, "step": 67240 }, { "epoch": 1.65, "learning_rate": 1.6736855067109962e-05, "loss": 0.0103, "step": 67250 }, { "epoch": 1.65, "learning_rate": 1.6736362516931415e-05, "loss": 0.0111, "step": 67260 }, { "epoch": 1.65, "learning_rate": 1.6735869966752864e-05, "loss": 0.0129, "step": 67270 }, { "epoch": 1.65, "learning_rate": 1.6735377416574313e-05, "loss": 0.0145, "step": 67280 }, { "epoch": 1.65, "learning_rate": 1.6734884866395766e-05, "loss": 0.0119, "step": 67290 }, { "epoch": 1.65, "learning_rate": 1.6734392316217215e-05, "loss": 0.0133, "step": 67300 }, { "epoch": 1.65, "learning_rate": 1.6733899766038667e-05, "loss": 0.0207, "step": 67310 }, { "epoch": 1.65, "learning_rate": 1.6733407215860116e-05, "loss": 0.0128, "step": 67320 }, { "epoch": 1.65, "learning_rate": 1.673291466568157e-05, "loss": 0.0116, "step": 67330 }, { "epoch": 1.65, "learning_rate": 1.6732422115503018e-05, "loss": 0.0133, "step": 67340 }, { "epoch": 1.65, "learning_rate": 1.673192956532447e-05, "loss": 0.0115, "step": 67350 }, { "epoch": 1.65, "learning_rate": 1.673143701514592e-05, "loss": 0.0183, "step": 67360 }, { "epoch": 1.66, "learning_rate": 1.673094446496737e-05, "loss": 0.0171, "step": 67370 }, { "epoch": 1.66, "learning_rate": 1.673045191478882e-05, "loss": 0.0111, "step": 67380 }, { "epoch": 1.66, "learning_rate": 1.672995936461027e-05, "loss": 0.0117, "step": 67390 }, { "epoch": 1.66, "learning_rate": 1.672946681443172e-05, "loss": 0.0111, "step": 67400 }, { "epoch": 1.66, "learning_rate": 1.6728974264253172e-05, "loss": 0.0121, "step": 67410 }, { "epoch": 1.66, "learning_rate": 1.672848171407462e-05, "loss": 0.0123, "step": 67420 }, { "epoch": 1.66, "learning_rate": 1.6727989163896074e-05, "loss": 0.0222, "step": 67430 }, { "epoch": 1.66, "learning_rate": 1.6727496613717523e-05, "loss": 0.0104, "step": 67440 }, { "epoch": 1.66, "learning_rate": 1.6727004063538976e-05, "loss": 0.0108, "step": 67450 }, { "epoch": 1.66, "learning_rate": 1.6726511513360425e-05, "loss": 0.0131, "step": 67460 }, { "epoch": 1.66, "learning_rate": 1.6726018963181877e-05, "loss": 0.0095, "step": 67470 }, { "epoch": 1.66, "learning_rate": 1.6725526413003326e-05, "loss": 0.0105, "step": 67480 }, { "epoch": 1.66, "learning_rate": 1.672503386282478e-05, "loss": 0.0194, "step": 67490 }, { "epoch": 1.66, "learning_rate": 1.6724541312646228e-05, "loss": 0.0113, "step": 67500 }, { "epoch": 1.66, "learning_rate": 1.6724048762467677e-05, "loss": 0.009, "step": 67510 }, { "epoch": 1.66, "learning_rate": 1.672355621228913e-05, "loss": 0.0161, "step": 67520 }, { "epoch": 1.66, "learning_rate": 1.672306366211058e-05, "loss": 0.0164, "step": 67530 }, { "epoch": 1.66, "learning_rate": 1.6722571111932028e-05, "loss": 0.0121, "step": 67540 }, { "epoch": 1.66, "learning_rate": 1.672207856175348e-05, "loss": 0.0121, "step": 67550 }, { "epoch": 1.66, "learning_rate": 1.672158601157493e-05, "loss": 0.0112, "step": 67560 }, { "epoch": 1.66, "learning_rate": 1.672109346139638e-05, "loss": 0.0122, "step": 67570 }, { "epoch": 1.66, "learning_rate": 1.672060091121783e-05, "loss": 0.0109, "step": 67580 }, { "epoch": 1.66, "learning_rate": 1.672010836103928e-05, "loss": 0.0112, "step": 67590 }, { "epoch": 1.66, "learning_rate": 1.6719615810860733e-05, "loss": 0.0126, "step": 67600 }, { "epoch": 1.66, "learning_rate": 1.6719123260682182e-05, "loss": 0.0099, "step": 67610 }, { "epoch": 1.66, "learning_rate": 1.6718630710503635e-05, "loss": 0.0146, "step": 67620 }, { "epoch": 1.66, "learning_rate": 1.6718138160325084e-05, "loss": 0.0257, "step": 67630 }, { "epoch": 1.66, "learning_rate": 1.6717645610146537e-05, "loss": 0.0109, "step": 67640 }, { "epoch": 1.66, "learning_rate": 1.6717153059967986e-05, "loss": 0.0103, "step": 67650 }, { "epoch": 1.66, "learning_rate": 1.6716660509789435e-05, "loss": 0.0119, "step": 67660 }, { "epoch": 1.66, "learning_rate": 1.6716167959610887e-05, "loss": 0.0116, "step": 67670 }, { "epoch": 1.66, "learning_rate": 1.6715675409432337e-05, "loss": 0.0207, "step": 67680 }, { "epoch": 1.66, "learning_rate": 1.6715182859253786e-05, "loss": 0.0112, "step": 67690 }, { "epoch": 1.66, "learning_rate": 1.6714690309075238e-05, "loss": 0.0129, "step": 67700 }, { "epoch": 1.66, "learning_rate": 1.6714197758896687e-05, "loss": 0.0115, "step": 67710 }, { "epoch": 1.66, "learning_rate": 1.671370520871814e-05, "loss": 0.0104, "step": 67720 }, { "epoch": 1.66, "learning_rate": 1.671321265853959e-05, "loss": 0.0114, "step": 67730 }, { "epoch": 1.66, "learning_rate": 1.671272010836104e-05, "loss": 0.0106, "step": 67740 }, { "epoch": 1.66, "learning_rate": 1.671222755818249e-05, "loss": 0.0107, "step": 67750 }, { "epoch": 1.66, "learning_rate": 1.6711735008003943e-05, "loss": 0.0196, "step": 67760 }, { "epoch": 1.66, "learning_rate": 1.6711242457825392e-05, "loss": 0.0117, "step": 67770 }, { "epoch": 1.67, "learning_rate": 1.6710749907646845e-05, "loss": 0.012, "step": 67780 }, { "epoch": 1.67, "learning_rate": 1.6710257357468294e-05, "loss": 0.0167, "step": 67790 }, { "epoch": 1.67, "learning_rate": 1.6709764807289743e-05, "loss": 0.0155, "step": 67800 }, { "epoch": 1.67, "learning_rate": 1.6709272257111196e-05, "loss": 0.0115, "step": 67810 }, { "epoch": 1.67, "learning_rate": 1.6708779706932645e-05, "loss": 0.0185, "step": 67820 }, { "epoch": 1.67, "learning_rate": 1.6708287156754094e-05, "loss": 0.0124, "step": 67830 }, { "epoch": 1.67, "learning_rate": 1.6707794606575547e-05, "loss": 0.0221, "step": 67840 }, { "epoch": 1.67, "learning_rate": 1.6707302056396996e-05, "loss": 0.0107, "step": 67850 }, { "epoch": 1.67, "learning_rate": 1.6706809506218445e-05, "loss": 0.015, "step": 67860 }, { "epoch": 1.67, "learning_rate": 1.6706316956039898e-05, "loss": 0.0114, "step": 67870 }, { "epoch": 1.67, "learning_rate": 1.670582440586135e-05, "loss": 0.0199, "step": 67880 }, { "epoch": 1.67, "learning_rate": 1.67053318556828e-05, "loss": 0.0114, "step": 67890 }, { "epoch": 1.67, "learning_rate": 1.6704839305504252e-05, "loss": 0.0116, "step": 67900 }, { "epoch": 1.67, "learning_rate": 1.67043467553257e-05, "loss": 0.0119, "step": 67910 }, { "epoch": 1.67, "learning_rate": 1.670385420514715e-05, "loss": 0.0118, "step": 67920 }, { "epoch": 1.67, "learning_rate": 1.6703361654968603e-05, "loss": 0.0113, "step": 67930 }, { "epoch": 1.67, "learning_rate": 1.6702869104790052e-05, "loss": 0.0114, "step": 67940 }, { "epoch": 1.67, "learning_rate": 1.67023765546115e-05, "loss": 0.0176, "step": 67950 }, { "epoch": 1.67, "learning_rate": 1.6701884004432953e-05, "loss": 0.0097, "step": 67960 }, { "epoch": 1.67, "learning_rate": 1.6701391454254403e-05, "loss": 0.0098, "step": 67970 }, { "epoch": 1.67, "learning_rate": 1.6700898904075855e-05, "loss": 0.0171, "step": 67980 }, { "epoch": 1.67, "learning_rate": 1.6700406353897304e-05, "loss": 0.0172, "step": 67990 }, { "epoch": 1.67, "learning_rate": 1.6699913803718753e-05, "loss": 0.0103, "step": 68000 }, { "epoch": 1.67, "learning_rate": 1.6699421253540206e-05, "loss": 0.0113, "step": 68010 }, { "epoch": 1.67, "learning_rate": 1.6698928703361655e-05, "loss": 0.009, "step": 68020 }, { "epoch": 1.67, "learning_rate": 1.6698436153183108e-05, "loss": 0.0118, "step": 68030 }, { "epoch": 1.67, "learning_rate": 1.6697943603004557e-05, "loss": 0.0106, "step": 68040 }, { "epoch": 1.67, "learning_rate": 1.669745105282601e-05, "loss": 0.013, "step": 68050 }, { "epoch": 1.67, "learning_rate": 1.669695850264746e-05, "loss": 0.0169, "step": 68060 }, { "epoch": 1.67, "learning_rate": 1.669646595246891e-05, "loss": 0.0175, "step": 68070 }, { "epoch": 1.67, "learning_rate": 1.669597340229036e-05, "loss": 0.0101, "step": 68080 }, { "epoch": 1.67, "learning_rate": 1.669548085211181e-05, "loss": 0.0102, "step": 68090 }, { "epoch": 1.67, "learning_rate": 1.6694988301933262e-05, "loss": 0.013, "step": 68100 }, { "epoch": 1.67, "learning_rate": 1.669449575175471e-05, "loss": 0.012, "step": 68110 }, { "epoch": 1.67, "learning_rate": 1.669400320157616e-05, "loss": 0.0115, "step": 68120 }, { "epoch": 1.67, "learning_rate": 1.6693510651397613e-05, "loss": 0.0125, "step": 68130 }, { "epoch": 1.67, "learning_rate": 1.6693018101219062e-05, "loss": 0.0114, "step": 68140 }, { "epoch": 1.67, "learning_rate": 1.6692525551040514e-05, "loss": 0.0122, "step": 68150 }, { "epoch": 1.67, "learning_rate": 1.6692033000861963e-05, "loss": 0.0105, "step": 68160 }, { "epoch": 1.67, "learning_rate": 1.6691540450683416e-05, "loss": 0.011, "step": 68170 }, { "epoch": 1.67, "learning_rate": 1.6691047900504865e-05, "loss": 0.0122, "step": 68180 }, { "epoch": 1.68, "learning_rate": 1.6690555350326318e-05, "loss": 0.0197, "step": 68190 }, { "epoch": 1.68, "learning_rate": 1.6690062800147767e-05, "loss": 0.0125, "step": 68200 }, { "epoch": 1.68, "learning_rate": 1.668957024996922e-05, "loss": 0.0115, "step": 68210 }, { "epoch": 1.68, "learning_rate": 1.668907769979067e-05, "loss": 0.0176, "step": 68220 }, { "epoch": 1.68, "learning_rate": 1.6688585149612118e-05, "loss": 0.0097, "step": 68230 }, { "epoch": 1.68, "learning_rate": 1.668809259943357e-05, "loss": 0.0242, "step": 68240 }, { "epoch": 1.68, "learning_rate": 1.668760004925502e-05, "loss": 0.0112, "step": 68250 }, { "epoch": 1.68, "learning_rate": 1.668710749907647e-05, "loss": 0.0245, "step": 68260 }, { "epoch": 1.68, "learning_rate": 1.668661494889792e-05, "loss": 0.0215, "step": 68270 }, { "epoch": 1.68, "learning_rate": 1.668612239871937e-05, "loss": 0.0184, "step": 68280 }, { "epoch": 1.68, "learning_rate": 1.668562984854082e-05, "loss": 0.017, "step": 68290 }, { "epoch": 1.68, "learning_rate": 1.6685137298362272e-05, "loss": 0.0153, "step": 68300 }, { "epoch": 1.68, "learning_rate": 1.668464474818372e-05, "loss": 0.0132, "step": 68310 }, { "epoch": 1.68, "learning_rate": 1.6684152198005174e-05, "loss": 0.0151, "step": 68320 }, { "epoch": 1.68, "learning_rate": 1.6683659647826623e-05, "loss": 0.0206, "step": 68330 }, { "epoch": 1.68, "learning_rate": 1.6683167097648075e-05, "loss": 0.0111, "step": 68340 }, { "epoch": 1.68, "learning_rate": 1.6682674547469524e-05, "loss": 0.0101, "step": 68350 }, { "epoch": 1.68, "learning_rate": 1.6682181997290977e-05, "loss": 0.0123, "step": 68360 }, { "epoch": 1.68, "learning_rate": 1.6681689447112426e-05, "loss": 0.0116, "step": 68370 }, { "epoch": 1.68, "learning_rate": 1.6681196896933875e-05, "loss": 0.0216, "step": 68380 }, { "epoch": 1.68, "learning_rate": 1.6680704346755328e-05, "loss": 0.0159, "step": 68390 }, { "epoch": 1.68, "learning_rate": 1.6680211796576777e-05, "loss": 0.0109, "step": 68400 }, { "epoch": 1.68, "learning_rate": 1.6679719246398226e-05, "loss": 0.0133, "step": 68410 }, { "epoch": 1.68, "learning_rate": 1.667922669621968e-05, "loss": 0.0118, "step": 68420 }, { "epoch": 1.68, "learning_rate": 1.6678734146041128e-05, "loss": 0.0108, "step": 68430 }, { "epoch": 1.68, "learning_rate": 1.667824159586258e-05, "loss": 0.0125, "step": 68440 }, { "epoch": 1.68, "learning_rate": 1.667774904568403e-05, "loss": 0.011, "step": 68450 }, { "epoch": 1.68, "learning_rate": 1.6677256495505482e-05, "loss": 0.0122, "step": 68460 }, { "epoch": 1.68, "learning_rate": 1.667676394532693e-05, "loss": 0.0102, "step": 68470 }, { "epoch": 1.68, "learning_rate": 1.6676271395148384e-05, "loss": 0.0112, "step": 68480 }, { "epoch": 1.68, "learning_rate": 1.6675778844969833e-05, "loss": 0.0114, "step": 68490 }, { "epoch": 1.68, "learning_rate": 1.6675286294791285e-05, "loss": 0.0129, "step": 68500 }, { "epoch": 1.68, "learning_rate": 1.6674793744612734e-05, "loss": 0.012, "step": 68510 }, { "epoch": 1.68, "learning_rate": 1.6674301194434184e-05, "loss": 0.0179, "step": 68520 }, { "epoch": 1.68, "learning_rate": 1.6673808644255636e-05, "loss": 0.0122, "step": 68530 }, { "epoch": 1.68, "learning_rate": 1.6673316094077085e-05, "loss": 0.0142, "step": 68540 }, { "epoch": 1.68, "learning_rate": 1.6672823543898534e-05, "loss": 0.011, "step": 68550 }, { "epoch": 1.68, "learning_rate": 1.6672330993719987e-05, "loss": 0.0108, "step": 68560 }, { "epoch": 1.68, "learning_rate": 1.6671838443541436e-05, "loss": 0.0205, "step": 68570 }, { "epoch": 1.68, "learning_rate": 1.6671345893362885e-05, "loss": 0.0128, "step": 68580 }, { "epoch": 1.69, "learning_rate": 1.6670853343184338e-05, "loss": 0.011, "step": 68590 }, { "epoch": 1.69, "learning_rate": 1.667036079300579e-05, "loss": 0.0106, "step": 68600 }, { "epoch": 1.69, "learning_rate": 1.666986824282724e-05, "loss": 0.0103, "step": 68610 }, { "epoch": 1.69, "learning_rate": 1.6669375692648692e-05, "loss": 0.0159, "step": 68620 }, { "epoch": 1.69, "learning_rate": 1.666888314247014e-05, "loss": 0.0113, "step": 68630 }, { "epoch": 1.69, "learning_rate": 1.666839059229159e-05, "loss": 0.0111, "step": 68640 }, { "epoch": 1.69, "learning_rate": 1.6667898042113043e-05, "loss": 0.0097, "step": 68650 }, { "epoch": 1.69, "learning_rate": 1.6667405491934492e-05, "loss": 0.0114, "step": 68660 }, { "epoch": 1.69, "learning_rate": 1.666691294175594e-05, "loss": 0.0119, "step": 68670 }, { "epoch": 1.69, "learning_rate": 1.6666420391577394e-05, "loss": 0.0171, "step": 68680 }, { "epoch": 1.69, "learning_rate": 1.6665927841398843e-05, "loss": 0.0102, "step": 68690 }, { "epoch": 1.69, "learning_rate": 1.6665435291220295e-05, "loss": 0.014, "step": 68700 }, { "epoch": 1.69, "learning_rate": 1.6664942741041745e-05, "loss": 0.0126, "step": 68710 }, { "epoch": 1.69, "learning_rate": 1.6664450190863194e-05, "loss": 0.0103, "step": 68720 }, { "epoch": 1.69, "learning_rate": 1.6663957640684646e-05, "loss": 0.012, "step": 68730 }, { "epoch": 1.69, "learning_rate": 1.6663465090506095e-05, "loss": 0.0125, "step": 68740 }, { "epoch": 1.69, "learning_rate": 1.6662972540327548e-05, "loss": 0.0123, "step": 68750 }, { "epoch": 1.69, "learning_rate": 1.6662479990148997e-05, "loss": 0.0114, "step": 68760 }, { "epoch": 1.69, "learning_rate": 1.666198743997045e-05, "loss": 0.0131, "step": 68770 }, { "epoch": 1.69, "learning_rate": 1.66614948897919e-05, "loss": 0.0106, "step": 68780 }, { "epoch": 1.69, "learning_rate": 1.666100233961335e-05, "loss": 0.0124, "step": 68790 }, { "epoch": 1.69, "learning_rate": 1.66605097894348e-05, "loss": 0.0159, "step": 68800 }, { "epoch": 1.69, "learning_rate": 1.666001723925625e-05, "loss": 0.0126, "step": 68810 }, { "epoch": 1.69, "learning_rate": 1.6659524689077702e-05, "loss": 0.0139, "step": 68820 }, { "epoch": 1.69, "learning_rate": 1.665903213889915e-05, "loss": 0.0105, "step": 68830 }, { "epoch": 1.69, "learning_rate": 1.66585395887206e-05, "loss": 0.0111, "step": 68840 }, { "epoch": 1.69, "learning_rate": 1.6658047038542053e-05, "loss": 0.0122, "step": 68850 }, { "epoch": 1.69, "learning_rate": 1.6657554488363502e-05, "loss": 0.0095, "step": 68860 }, { "epoch": 1.69, "learning_rate": 1.6657061938184955e-05, "loss": 0.029, "step": 68870 }, { "epoch": 1.69, "learning_rate": 1.6656569388006404e-05, "loss": 0.0102, "step": 68880 }, { "epoch": 1.69, "learning_rate": 1.6656076837827856e-05, "loss": 0.0128, "step": 68890 }, { "epoch": 1.69, "learning_rate": 1.6655584287649305e-05, "loss": 0.0102, "step": 68900 }, { "epoch": 1.69, "learning_rate": 1.6655091737470758e-05, "loss": 0.0141, "step": 68910 }, { "epoch": 1.69, "learning_rate": 1.6654599187292207e-05, "loss": 0.0125, "step": 68920 }, { "epoch": 1.69, "learning_rate": 1.665410663711366e-05, "loss": 0.0191, "step": 68930 }, { "epoch": 1.69, "learning_rate": 1.665361408693511e-05, "loss": 0.0102, "step": 68940 }, { "epoch": 1.69, "learning_rate": 1.6653121536756558e-05, "loss": 0.0107, "step": 68950 }, { "epoch": 1.69, "learning_rate": 1.665262898657801e-05, "loss": 0.0109, "step": 68960 }, { "epoch": 1.69, "learning_rate": 1.665213643639946e-05, "loss": 0.0185, "step": 68970 }, { "epoch": 1.69, "learning_rate": 1.665164388622091e-05, "loss": 0.0116, "step": 68980 }, { "epoch": 1.69, "learning_rate": 1.665115133604236e-05, "loss": 0.0099, "step": 68990 }, { "epoch": 1.7, "learning_rate": 1.665065878586381e-05, "loss": 0.0128, "step": 69000 }, { "epoch": 1.7, "learning_rate": 1.665016623568526e-05, "loss": 0.0115, "step": 69010 }, { "epoch": 1.7, "learning_rate": 1.6649673685506712e-05, "loss": 0.013, "step": 69020 }, { "epoch": 1.7, "learning_rate": 1.664918113532816e-05, "loss": 0.0107, "step": 69030 }, { "epoch": 1.7, "learning_rate": 1.6648688585149614e-05, "loss": 0.0101, "step": 69040 }, { "epoch": 1.7, "learning_rate": 1.6648196034971063e-05, "loss": 0.0127, "step": 69050 }, { "epoch": 1.7, "learning_rate": 1.6647703484792516e-05, "loss": 0.016, "step": 69060 }, { "epoch": 1.7, "learning_rate": 1.6647210934613965e-05, "loss": 0.0129, "step": 69070 }, { "epoch": 1.7, "learning_rate": 1.6646718384435417e-05, "loss": 0.0173, "step": 69080 }, { "epoch": 1.7, "learning_rate": 1.6646225834256866e-05, "loss": 0.0315, "step": 69090 }, { "epoch": 1.7, "learning_rate": 1.6645733284078316e-05, "loss": 0.0111, "step": 69100 }, { "epoch": 1.7, "learning_rate": 1.6645240733899768e-05, "loss": 0.0113, "step": 69110 }, { "epoch": 1.7, "learning_rate": 1.6644748183721217e-05, "loss": 0.0132, "step": 69120 }, { "epoch": 1.7, "learning_rate": 1.6644255633542666e-05, "loss": 0.0109, "step": 69130 }, { "epoch": 1.7, "learning_rate": 1.664376308336412e-05, "loss": 0.0108, "step": 69140 }, { "epoch": 1.7, "learning_rate": 1.6643270533185568e-05, "loss": 0.0142, "step": 69150 }, { "epoch": 1.7, "learning_rate": 1.664277798300702e-05, "loss": 0.0152, "step": 69160 }, { "epoch": 1.7, "learning_rate": 1.664228543282847e-05, "loss": 0.0118, "step": 69170 }, { "epoch": 1.7, "learning_rate": 1.6641792882649922e-05, "loss": 0.0139, "step": 69180 }, { "epoch": 1.7, "learning_rate": 1.664130033247137e-05, "loss": 0.0106, "step": 69190 }, { "epoch": 1.7, "learning_rate": 1.6640807782292824e-05, "loss": 0.0104, "step": 69200 }, { "epoch": 1.7, "learning_rate": 1.6640315232114273e-05, "loss": 0.009, "step": 69210 }, { "epoch": 1.7, "learning_rate": 1.6639822681935726e-05, "loss": 0.01, "step": 69220 }, { "epoch": 1.7, "learning_rate": 1.6639330131757175e-05, "loss": 0.011, "step": 69230 }, { "epoch": 1.7, "learning_rate": 1.6638837581578624e-05, "loss": 0.0099, "step": 69240 }, { "epoch": 1.7, "learning_rate": 1.6638345031400077e-05, "loss": 0.0209, "step": 69250 }, { "epoch": 1.7, "learning_rate": 1.6637852481221526e-05, "loss": 0.0181, "step": 69260 }, { "epoch": 1.7, "learning_rate": 1.6637359931042975e-05, "loss": 0.0125, "step": 69270 }, { "epoch": 1.7, "learning_rate": 1.6636867380864427e-05, "loss": 0.013, "step": 69280 }, { "epoch": 1.7, "learning_rate": 1.6636374830685877e-05, "loss": 0.0173, "step": 69290 }, { "epoch": 1.7, "learning_rate": 1.6635882280507326e-05, "loss": 0.0116, "step": 69300 }, { "epoch": 1.7, "learning_rate": 1.6635389730328778e-05, "loss": 0.0106, "step": 69310 }, { "epoch": 1.7, "learning_rate": 1.663489718015023e-05, "loss": 0.0274, "step": 69320 }, { "epoch": 1.7, "learning_rate": 1.663440462997168e-05, "loss": 0.0124, "step": 69330 }, { "epoch": 1.7, "learning_rate": 1.6633912079793132e-05, "loss": 0.0242, "step": 69340 }, { "epoch": 1.7, "learning_rate": 1.663341952961458e-05, "loss": 0.0117, "step": 69350 }, { "epoch": 1.7, "learning_rate": 1.663292697943603e-05, "loss": 0.0107, "step": 69360 }, { "epoch": 1.7, "learning_rate": 1.6632434429257483e-05, "loss": 0.0106, "step": 69370 }, { "epoch": 1.7, "learning_rate": 1.6631941879078932e-05, "loss": 0.0135, "step": 69380 }, { "epoch": 1.7, "learning_rate": 1.663144932890038e-05, "loss": 0.0107, "step": 69390 }, { "epoch": 1.7, "learning_rate": 1.6630956778721834e-05, "loss": 0.0097, "step": 69400 }, { "epoch": 1.71, "learning_rate": 1.6630464228543283e-05, "loss": 0.0138, "step": 69410 }, { "epoch": 1.71, "learning_rate": 1.6629971678364736e-05, "loss": 0.0106, "step": 69420 }, { "epoch": 1.71, "learning_rate": 1.6629479128186185e-05, "loss": 0.0164, "step": 69430 }, { "epoch": 1.71, "learning_rate": 1.6628986578007634e-05, "loss": 0.011, "step": 69440 }, { "epoch": 1.71, "learning_rate": 1.6628494027829087e-05, "loss": 0.0126, "step": 69450 }, { "epoch": 1.71, "learning_rate": 1.6628001477650536e-05, "loss": 0.0211, "step": 69460 }, { "epoch": 1.71, "learning_rate": 1.6627508927471988e-05, "loss": 0.0104, "step": 69470 }, { "epoch": 1.71, "learning_rate": 1.6627016377293437e-05, "loss": 0.0114, "step": 69480 }, { "epoch": 1.71, "learning_rate": 1.662652382711489e-05, "loss": 0.0114, "step": 69490 }, { "epoch": 1.71, "learning_rate": 1.662603127693634e-05, "loss": 0.0119, "step": 69500 }, { "epoch": 1.71, "learning_rate": 1.662553872675779e-05, "loss": 0.0111, "step": 69510 }, { "epoch": 1.71, "learning_rate": 1.662504617657924e-05, "loss": 0.0114, "step": 69520 }, { "epoch": 1.71, "learning_rate": 1.662455362640069e-05, "loss": 0.0104, "step": 69530 }, { "epoch": 1.71, "learning_rate": 1.6624061076222142e-05, "loss": 0.01, "step": 69540 }, { "epoch": 1.71, "learning_rate": 1.662356852604359e-05, "loss": 0.0149, "step": 69550 }, { "epoch": 1.71, "learning_rate": 1.662307597586504e-05, "loss": 0.0149, "step": 69560 }, { "epoch": 1.71, "learning_rate": 1.6622583425686493e-05, "loss": 0.0141, "step": 69570 }, { "epoch": 1.71, "learning_rate": 1.6622090875507942e-05, "loss": 0.0132, "step": 69580 }, { "epoch": 1.71, "learning_rate": 1.6621598325329395e-05, "loss": 0.0187, "step": 69590 }, { "epoch": 1.71, "learning_rate": 1.6621105775150844e-05, "loss": 0.0107, "step": 69600 }, { "epoch": 1.71, "learning_rate": 1.6620613224972297e-05, "loss": 0.0112, "step": 69610 }, { "epoch": 1.71, "learning_rate": 1.6620120674793746e-05, "loss": 0.0105, "step": 69620 }, { "epoch": 1.71, "learning_rate": 1.66196281246152e-05, "loss": 0.0103, "step": 69630 }, { "epoch": 1.71, "learning_rate": 1.6619135574436648e-05, "loss": 0.0136, "step": 69640 }, { "epoch": 1.71, "learning_rate": 1.66186430242581e-05, "loss": 0.0143, "step": 69650 }, { "epoch": 1.71, "learning_rate": 1.661815047407955e-05, "loss": 0.0115, "step": 69660 }, { "epoch": 1.71, "learning_rate": 1.6617657923901e-05, "loss": 0.0195, "step": 69670 }, { "epoch": 1.71, "learning_rate": 1.661716537372245e-05, "loss": 0.0138, "step": 69680 }, { "epoch": 1.71, "learning_rate": 1.66166728235439e-05, "loss": 0.0109, "step": 69690 }, { "epoch": 1.71, "learning_rate": 1.661618027336535e-05, "loss": 0.0172, "step": 69700 }, { "epoch": 1.71, "learning_rate": 1.6615687723186802e-05, "loss": 0.0122, "step": 69710 }, { "epoch": 1.71, "learning_rate": 1.661519517300825e-05, "loss": 0.0088, "step": 69720 }, { "epoch": 1.71, "learning_rate": 1.66147026228297e-05, "loss": 0.0115, "step": 69730 }, { "epoch": 1.71, "learning_rate": 1.6614210072651153e-05, "loss": 0.0106, "step": 69740 }, { "epoch": 1.71, "learning_rate": 1.6613717522472602e-05, "loss": 0.0213, "step": 69750 }, { "epoch": 1.71, "learning_rate": 1.6613224972294054e-05, "loss": 0.0114, "step": 69760 }, { "epoch": 1.71, "learning_rate": 1.6612732422115503e-05, "loss": 0.0112, "step": 69770 }, { "epoch": 1.71, "learning_rate": 1.6612239871936956e-05, "loss": 0.0157, "step": 69780 }, { "epoch": 1.71, "learning_rate": 1.6611747321758405e-05, "loss": 0.0133, "step": 69790 }, { "epoch": 1.71, "learning_rate": 1.6611254771579858e-05, "loss": 0.0104, "step": 69800 }, { "epoch": 1.72, "learning_rate": 1.6610762221401307e-05, "loss": 0.0099, "step": 69810 }, { "epoch": 1.72, "learning_rate": 1.6610269671222756e-05, "loss": 0.0119, "step": 69820 }, { "epoch": 1.72, "learning_rate": 1.660977712104421e-05, "loss": 0.018, "step": 69830 }, { "epoch": 1.72, "learning_rate": 1.6609284570865658e-05, "loss": 0.0111, "step": 69840 }, { "epoch": 1.72, "learning_rate": 1.6608792020687107e-05, "loss": 0.01, "step": 69850 }, { "epoch": 1.72, "learning_rate": 1.660829947050856e-05, "loss": 0.0109, "step": 69860 }, { "epoch": 1.72, "learning_rate": 1.660780692033001e-05, "loss": 0.0101, "step": 69870 }, { "epoch": 1.72, "learning_rate": 1.660731437015146e-05, "loss": 0.0211, "step": 69880 }, { "epoch": 1.72, "learning_rate": 1.660682181997291e-05, "loss": 0.0302, "step": 69890 }, { "epoch": 1.72, "learning_rate": 1.6606329269794363e-05, "loss": 0.0113, "step": 69900 }, { "epoch": 1.72, "learning_rate": 1.6605836719615812e-05, "loss": 0.0134, "step": 69910 }, { "epoch": 1.72, "learning_rate": 1.6605344169437264e-05, "loss": 0.0096, "step": 69920 }, { "epoch": 1.72, "learning_rate": 1.6604851619258713e-05, "loss": 0.0205, "step": 69930 }, { "epoch": 1.72, "learning_rate": 1.6604359069080166e-05, "loss": 0.0112, "step": 69940 }, { "epoch": 1.72, "learning_rate": 1.6603866518901615e-05, "loss": 0.0172, "step": 69950 }, { "epoch": 1.72, "learning_rate": 1.6603373968723064e-05, "loss": 0.0212, "step": 69960 }, { "epoch": 1.72, "learning_rate": 1.6602881418544517e-05, "loss": 0.0121, "step": 69970 }, { "epoch": 1.72, "learning_rate": 1.6602388868365966e-05, "loss": 0.0128, "step": 69980 }, { "epoch": 1.72, "learning_rate": 1.6601896318187415e-05, "loss": 0.0153, "step": 69990 }, { "epoch": 1.72, "learning_rate": 1.6601403768008868e-05, "loss": 0.0189, "step": 70000 }, { "epoch": 1.72, "eval_bleu": 97.999, "eval_gen_len_ratio": 1.0019, "eval_loss": 0.00936075672507286, "eval_runtime": 256.5421, "eval_samples_per_second": 7.274, "eval_steps_per_second": 0.23, "step": 70000 }, { "epoch": 1.72, "learning_rate": 1.6600911217830317e-05, "loss": 0.0121, "step": 70010 }, { "epoch": 1.72, "learning_rate": 1.6600418667651766e-05, "loss": 0.011, "step": 70020 }, { "epoch": 1.72, "learning_rate": 1.659992611747322e-05, "loss": 0.0103, "step": 70030 }, { "epoch": 1.72, "learning_rate": 1.659943356729467e-05, "loss": 0.0101, "step": 70040 }, { "epoch": 1.72, "learning_rate": 1.659894101711612e-05, "loss": 0.0119, "step": 70050 }, { "epoch": 1.72, "learning_rate": 1.6598448466937573e-05, "loss": 0.0108, "step": 70060 }, { "epoch": 1.72, "learning_rate": 1.6597955916759022e-05, "loss": 0.0102, "step": 70070 }, { "epoch": 1.72, "learning_rate": 1.659746336658047e-05, "loss": 0.0103, "step": 70080 }, { "epoch": 1.72, "learning_rate": 1.6596970816401924e-05, "loss": 0.0108, "step": 70090 }, { "epoch": 1.72, "learning_rate": 1.6596478266223373e-05, "loss": 0.0118, "step": 70100 }, { "epoch": 1.72, "learning_rate": 1.6595985716044822e-05, "loss": 0.0124, "step": 70110 }, { "epoch": 1.72, "learning_rate": 1.6595493165866274e-05, "loss": 0.0132, "step": 70120 }, { "epoch": 1.72, "learning_rate": 1.6595000615687724e-05, "loss": 0.0101, "step": 70130 }, { "epoch": 1.72, "learning_rate": 1.6594508065509176e-05, "loss": 0.0123, "step": 70140 }, { "epoch": 1.72, "learning_rate": 1.6594015515330625e-05, "loss": 0.0124, "step": 70150 }, { "epoch": 1.72, "learning_rate": 1.6593522965152074e-05, "loss": 0.012, "step": 70160 }, { "epoch": 1.72, "learning_rate": 1.6593030414973527e-05, "loss": 0.0109, "step": 70170 }, { "epoch": 1.72, "learning_rate": 1.6592537864794976e-05, "loss": 0.0109, "step": 70180 }, { "epoch": 1.72, "learning_rate": 1.659204531461643e-05, "loss": 0.0127, "step": 70190 }, { "epoch": 1.72, "learning_rate": 1.6591552764437878e-05, "loss": 0.0107, "step": 70200 }, { "epoch": 1.72, "learning_rate": 1.659106021425933e-05, "loss": 0.0128, "step": 70210 }, { "epoch": 1.73, "learning_rate": 1.659056766408078e-05, "loss": 0.0101, "step": 70220 }, { "epoch": 1.73, "learning_rate": 1.6590075113902232e-05, "loss": 0.0087, "step": 70230 }, { "epoch": 1.73, "learning_rate": 1.658958256372368e-05, "loss": 0.0174, "step": 70240 }, { "epoch": 1.73, "learning_rate": 1.658909001354513e-05, "loss": 0.0105, "step": 70250 }, { "epoch": 1.73, "learning_rate": 1.6588597463366583e-05, "loss": 0.0163, "step": 70260 }, { "epoch": 1.73, "learning_rate": 1.6588104913188032e-05, "loss": 0.012, "step": 70270 }, { "epoch": 1.73, "learning_rate": 1.658761236300948e-05, "loss": 0.0191, "step": 70280 }, { "epoch": 1.73, "learning_rate": 1.6587119812830934e-05, "loss": 0.012, "step": 70290 }, { "epoch": 1.73, "learning_rate": 1.6586627262652383e-05, "loss": 0.0118, "step": 70300 }, { "epoch": 1.73, "learning_rate": 1.6586134712473835e-05, "loss": 0.0101, "step": 70310 }, { "epoch": 1.73, "learning_rate": 1.6585642162295284e-05, "loss": 0.0106, "step": 70320 }, { "epoch": 1.73, "learning_rate": 1.6585149612116737e-05, "loss": 0.0115, "step": 70330 }, { "epoch": 1.73, "learning_rate": 1.6584657061938186e-05, "loss": 0.0107, "step": 70340 }, { "epoch": 1.73, "learning_rate": 1.658416451175964e-05, "loss": 0.0168, "step": 70350 }, { "epoch": 1.73, "learning_rate": 1.6583671961581088e-05, "loss": 0.01, "step": 70360 }, { "epoch": 1.73, "learning_rate": 1.658317941140254e-05, "loss": 0.0098, "step": 70370 }, { "epoch": 1.73, "learning_rate": 1.658268686122399e-05, "loss": 0.0205, "step": 70380 }, { "epoch": 1.73, "learning_rate": 1.658219431104544e-05, "loss": 0.0123, "step": 70390 }, { "epoch": 1.73, "learning_rate": 1.658170176086689e-05, "loss": 0.0112, "step": 70400 }, { "epoch": 1.73, "learning_rate": 1.658120921068834e-05, "loss": 0.0119, "step": 70410 }, { "epoch": 1.73, "learning_rate": 1.658071666050979e-05, "loss": 0.0099, "step": 70420 }, { "epoch": 1.73, "learning_rate": 1.6580224110331242e-05, "loss": 0.0121, "step": 70430 }, { "epoch": 1.73, "learning_rate": 1.657973156015269e-05, "loss": 0.0123, "step": 70440 }, { "epoch": 1.73, "learning_rate": 1.657923900997414e-05, "loss": 0.0108, "step": 70450 }, { "epoch": 1.73, "learning_rate": 1.6578746459795593e-05, "loss": 0.0105, "step": 70460 }, { "epoch": 1.73, "learning_rate": 1.6578253909617042e-05, "loss": 0.0121, "step": 70470 }, { "epoch": 1.73, "learning_rate": 1.6577761359438495e-05, "loss": 0.0099, "step": 70480 }, { "epoch": 1.73, "learning_rate": 1.6577268809259944e-05, "loss": 0.0159, "step": 70490 }, { "epoch": 1.73, "learning_rate": 1.6576776259081396e-05, "loss": 0.0119, "step": 70500 }, { "epoch": 1.73, "learning_rate": 1.6576283708902845e-05, "loss": 0.013, "step": 70510 }, { "epoch": 1.73, "learning_rate": 1.6575791158724298e-05, "loss": 0.0197, "step": 70520 }, { "epoch": 1.73, "learning_rate": 1.6575298608545747e-05, "loss": 0.0182, "step": 70530 }, { "epoch": 1.73, "learning_rate": 1.6574806058367196e-05, "loss": 0.0092, "step": 70540 }, { "epoch": 1.73, "learning_rate": 1.657431350818865e-05, "loss": 0.0102, "step": 70550 }, { "epoch": 1.73, "learning_rate": 1.6573820958010098e-05, "loss": 0.0099, "step": 70560 }, { "epoch": 1.73, "learning_rate": 1.6573328407831547e-05, "loss": 0.0127, "step": 70570 }, { "epoch": 1.73, "learning_rate": 1.6572835857653e-05, "loss": 0.0117, "step": 70580 }, { "epoch": 1.73, "learning_rate": 1.657234330747445e-05, "loss": 0.0164, "step": 70590 }, { "epoch": 1.73, "learning_rate": 1.65718507572959e-05, "loss": 0.01, "step": 70600 }, { "epoch": 1.73, "learning_rate": 1.657135820711735e-05, "loss": 0.0124, "step": 70610 }, { "epoch": 1.73, "learning_rate": 1.6570865656938803e-05, "loss": 0.0107, "step": 70620 }, { "epoch": 1.74, "learning_rate": 1.6570373106760252e-05, "loss": 0.0119, "step": 70630 }, { "epoch": 1.74, "learning_rate": 1.6569880556581705e-05, "loss": 0.0123, "step": 70640 }, { "epoch": 1.74, "learning_rate": 1.6569388006403154e-05, "loss": 0.0125, "step": 70650 }, { "epoch": 1.74, "learning_rate": 1.6568895456224606e-05, "loss": 0.0104, "step": 70660 }, { "epoch": 1.74, "learning_rate": 1.6568402906046056e-05, "loss": 0.0114, "step": 70670 }, { "epoch": 1.74, "learning_rate": 1.6567910355867505e-05, "loss": 0.0119, "step": 70680 }, { "epoch": 1.74, "learning_rate": 1.6567417805688957e-05, "loss": 0.0126, "step": 70690 }, { "epoch": 1.74, "learning_rate": 1.6566925255510406e-05, "loss": 0.0098, "step": 70700 }, { "epoch": 1.74, "learning_rate": 1.6566432705331855e-05, "loss": 0.0117, "step": 70710 }, { "epoch": 1.74, "learning_rate": 1.6565940155153308e-05, "loss": 0.0211, "step": 70720 }, { "epoch": 1.74, "learning_rate": 1.6565447604974757e-05, "loss": 0.0117, "step": 70730 }, { "epoch": 1.74, "learning_rate": 1.6564955054796206e-05, "loss": 0.0124, "step": 70740 }, { "epoch": 1.74, "learning_rate": 1.656446250461766e-05, "loss": 0.0247, "step": 70750 }, { "epoch": 1.74, "learning_rate": 1.656396995443911e-05, "loss": 0.0128, "step": 70760 }, { "epoch": 1.74, "learning_rate": 1.656347740426056e-05, "loss": 0.0107, "step": 70770 }, { "epoch": 1.74, "learning_rate": 1.6562984854082013e-05, "loss": 0.0101, "step": 70780 }, { "epoch": 1.74, "learning_rate": 1.6562492303903462e-05, "loss": 0.0126, "step": 70790 }, { "epoch": 1.74, "learning_rate": 1.656199975372491e-05, "loss": 0.0263, "step": 70800 }, { "epoch": 1.74, "learning_rate": 1.6561507203546364e-05, "loss": 0.0172, "step": 70810 }, { "epoch": 1.74, "learning_rate": 1.6561014653367813e-05, "loss": 0.0121, "step": 70820 }, { "epoch": 1.74, "learning_rate": 1.6560522103189262e-05, "loss": 0.0123, "step": 70830 }, { "epoch": 1.74, "learning_rate": 1.6560029553010715e-05, "loss": 0.0133, "step": 70840 }, { "epoch": 1.74, "learning_rate": 1.6559537002832164e-05, "loss": 0.0109, "step": 70850 }, { "epoch": 1.74, "learning_rate": 1.6559044452653616e-05, "loss": 0.0093, "step": 70860 }, { "epoch": 1.74, "learning_rate": 1.6558551902475066e-05, "loss": 0.0106, "step": 70870 }, { "epoch": 1.74, "learning_rate": 1.6558059352296515e-05, "loss": 0.0147, "step": 70880 }, { "epoch": 1.74, "learning_rate": 1.6557566802117967e-05, "loss": 0.0134, "step": 70890 }, { "epoch": 1.74, "learning_rate": 1.6557074251939416e-05, "loss": 0.0103, "step": 70900 }, { "epoch": 1.74, "learning_rate": 1.655658170176087e-05, "loss": 0.0154, "step": 70910 }, { "epoch": 1.74, "learning_rate": 1.6556089151582318e-05, "loss": 0.0116, "step": 70920 }, { "epoch": 1.74, "learning_rate": 1.655559660140377e-05, "loss": 0.0105, "step": 70930 }, { "epoch": 1.74, "learning_rate": 1.655510405122522e-05, "loss": 0.0126, "step": 70940 }, { "epoch": 1.74, "learning_rate": 1.6554611501046672e-05, "loss": 0.0111, "step": 70950 }, { "epoch": 1.74, "learning_rate": 1.655411895086812e-05, "loss": 0.0099, "step": 70960 }, { "epoch": 1.74, "learning_rate": 1.655362640068957e-05, "loss": 0.0118, "step": 70970 }, { "epoch": 1.74, "learning_rate": 1.6553133850511023e-05, "loss": 0.0138, "step": 70980 }, { "epoch": 1.74, "learning_rate": 1.6552641300332472e-05, "loss": 0.0114, "step": 70990 }, { "epoch": 1.74, "learning_rate": 1.655214875015392e-05, "loss": 0.0108, "step": 71000 }, { "epoch": 1.74, "learning_rate": 1.6551656199975374e-05, "loss": 0.0112, "step": 71010 }, { "epoch": 1.74, "learning_rate": 1.6551163649796823e-05, "loss": 0.0121, "step": 71020 }, { "epoch": 1.74, "learning_rate": 1.6550671099618276e-05, "loss": 0.0098, "step": 71030 }, { "epoch": 1.75, "learning_rate": 1.6550178549439725e-05, "loss": 0.011, "step": 71040 }, { "epoch": 1.75, "learning_rate": 1.6549685999261177e-05, "loss": 0.0098, "step": 71050 }, { "epoch": 1.75, "learning_rate": 1.6549193449082627e-05, "loss": 0.0099, "step": 71060 }, { "epoch": 1.75, "learning_rate": 1.654870089890408e-05, "loss": 0.0194, "step": 71070 }, { "epoch": 1.75, "learning_rate": 1.6548208348725528e-05, "loss": 0.0127, "step": 71080 }, { "epoch": 1.75, "learning_rate": 1.654771579854698e-05, "loss": 0.0144, "step": 71090 }, { "epoch": 1.75, "learning_rate": 1.654722324836843e-05, "loss": 0.0128, "step": 71100 }, { "epoch": 1.75, "learning_rate": 1.654673069818988e-05, "loss": 0.0143, "step": 71110 }, { "epoch": 1.75, "learning_rate": 1.654623814801133e-05, "loss": 0.0104, "step": 71120 }, { "epoch": 1.75, "learning_rate": 1.654574559783278e-05, "loss": 0.0094, "step": 71130 }, { "epoch": 1.75, "learning_rate": 1.654525304765423e-05, "loss": 0.0249, "step": 71140 }, { "epoch": 1.75, "learning_rate": 1.6544760497475682e-05, "loss": 0.013, "step": 71150 }, { "epoch": 1.75, "learning_rate": 1.654426794729713e-05, "loss": 0.0113, "step": 71160 }, { "epoch": 1.75, "learning_rate": 1.654377539711858e-05, "loss": 0.0163, "step": 71170 }, { "epoch": 1.75, "learning_rate": 1.6543282846940033e-05, "loss": 0.012, "step": 71180 }, { "epoch": 1.75, "learning_rate": 1.6542790296761482e-05, "loss": 0.0114, "step": 71190 }, { "epoch": 1.75, "learning_rate": 1.6542297746582935e-05, "loss": 0.0108, "step": 71200 }, { "epoch": 1.75, "learning_rate": 1.6541805196404384e-05, "loss": 0.0215, "step": 71210 }, { "epoch": 1.75, "learning_rate": 1.6541312646225837e-05, "loss": 0.0097, "step": 71220 }, { "epoch": 1.75, "learning_rate": 1.6540820096047286e-05, "loss": 0.0106, "step": 71230 }, { "epoch": 1.75, "learning_rate": 1.6540327545868738e-05, "loss": 0.0111, "step": 71240 }, { "epoch": 1.75, "learning_rate": 1.6539834995690187e-05, "loss": 0.0101, "step": 71250 }, { "epoch": 1.75, "learning_rate": 1.6539342445511637e-05, "loss": 0.0125, "step": 71260 }, { "epoch": 1.75, "learning_rate": 1.653884989533309e-05, "loss": 0.0118, "step": 71270 }, { "epoch": 1.75, "learning_rate": 1.6538357345154538e-05, "loss": 0.0115, "step": 71280 }, { "epoch": 1.75, "learning_rate": 1.6537864794975987e-05, "loss": 0.0122, "step": 71290 }, { "epoch": 1.75, "learning_rate": 1.653737224479744e-05, "loss": 0.0112, "step": 71300 }, { "epoch": 1.75, "learning_rate": 1.653687969461889e-05, "loss": 0.0156, "step": 71310 }, { "epoch": 1.75, "learning_rate": 1.653638714444034e-05, "loss": 0.0125, "step": 71320 }, { "epoch": 1.75, "learning_rate": 1.653589459426179e-05, "loss": 0.013, "step": 71330 }, { "epoch": 1.75, "learning_rate": 1.6535402044083243e-05, "loss": 0.0212, "step": 71340 }, { "epoch": 1.75, "learning_rate": 1.6534909493904692e-05, "loss": 0.0136, "step": 71350 }, { "epoch": 1.75, "learning_rate": 1.6534416943726145e-05, "loss": 0.0118, "step": 71360 }, { "epoch": 1.75, "learning_rate": 1.6533924393547594e-05, "loss": 0.0144, "step": 71370 }, { "epoch": 1.75, "learning_rate": 1.6533431843369047e-05, "loss": 0.0116, "step": 71380 }, { "epoch": 1.75, "learning_rate": 1.6532939293190496e-05, "loss": 0.0233, "step": 71390 }, { "epoch": 1.75, "learning_rate": 1.6532446743011945e-05, "loss": 0.0113, "step": 71400 }, { "epoch": 1.75, "learning_rate": 1.6531954192833398e-05, "loss": 0.0151, "step": 71410 }, { "epoch": 1.75, "learning_rate": 1.6531461642654847e-05, "loss": 0.0186, "step": 71420 }, { "epoch": 1.75, "learning_rate": 1.6530969092476296e-05, "loss": 0.0154, "step": 71430 }, { "epoch": 1.76, "learning_rate": 1.653047654229775e-05, "loss": 0.0106, "step": 71440 }, { "epoch": 1.76, "learning_rate": 1.6529983992119198e-05, "loss": 0.0177, "step": 71450 }, { "epoch": 1.76, "learning_rate": 1.6529491441940647e-05, "loss": 0.0121, "step": 71460 }, { "epoch": 1.76, "learning_rate": 1.65289988917621e-05, "loss": 0.0114, "step": 71470 }, { "epoch": 1.76, "learning_rate": 1.652850634158355e-05, "loss": 0.0134, "step": 71480 }, { "epoch": 1.76, "learning_rate": 1.6528013791405e-05, "loss": 0.0155, "step": 71490 }, { "epoch": 1.76, "learning_rate": 1.6527521241226453e-05, "loss": 0.0113, "step": 71500 }, { "epoch": 1.76, "learning_rate": 1.6527028691047903e-05, "loss": 0.0117, "step": 71510 }, { "epoch": 1.76, "learning_rate": 1.6526536140869352e-05, "loss": 0.0142, "step": 71520 }, { "epoch": 1.76, "learning_rate": 1.6526043590690804e-05, "loss": 0.0168, "step": 71530 }, { "epoch": 1.76, "learning_rate": 1.6525551040512253e-05, "loss": 0.0215, "step": 71540 }, { "epoch": 1.76, "learning_rate": 1.6525058490333703e-05, "loss": 0.0125, "step": 71550 }, { "epoch": 1.76, "learning_rate": 1.6524565940155155e-05, "loss": 0.0108, "step": 71560 }, { "epoch": 1.76, "learning_rate": 1.6524073389976604e-05, "loss": 0.0112, "step": 71570 }, { "epoch": 1.76, "learning_rate": 1.6523580839798057e-05, "loss": 0.0108, "step": 71580 }, { "epoch": 1.76, "learning_rate": 1.6523088289619506e-05, "loss": 0.0123, "step": 71590 }, { "epoch": 1.76, "learning_rate": 1.6522595739440955e-05, "loss": 0.0117, "step": 71600 }, { "epoch": 1.76, "learning_rate": 1.6522103189262408e-05, "loss": 0.0122, "step": 71610 }, { "epoch": 1.76, "learning_rate": 1.6521610639083857e-05, "loss": 0.0103, "step": 71620 }, { "epoch": 1.76, "learning_rate": 1.652111808890531e-05, "loss": 0.0222, "step": 71630 }, { "epoch": 1.76, "learning_rate": 1.652062553872676e-05, "loss": 0.0166, "step": 71640 }, { "epoch": 1.76, "learning_rate": 1.652013298854821e-05, "loss": 0.0112, "step": 71650 }, { "epoch": 1.76, "learning_rate": 1.651964043836966e-05, "loss": 0.01, "step": 71660 }, { "epoch": 1.76, "learning_rate": 1.6519147888191113e-05, "loss": 0.0108, "step": 71670 }, { "epoch": 1.76, "learning_rate": 1.6518655338012562e-05, "loss": 0.0118, "step": 71680 }, { "epoch": 1.76, "learning_rate": 1.651816278783401e-05, "loss": 0.0095, "step": 71690 }, { "epoch": 1.76, "learning_rate": 1.6517670237655464e-05, "loss": 0.0123, "step": 71700 }, { "epoch": 1.76, "learning_rate": 1.6517177687476913e-05, "loss": 0.0096, "step": 71710 }, { "epoch": 1.76, "learning_rate": 1.6516685137298362e-05, "loss": 0.0117, "step": 71720 }, { "epoch": 1.76, "learning_rate": 1.6516192587119814e-05, "loss": 0.012, "step": 71730 }, { "epoch": 1.76, "learning_rate": 1.6515700036941263e-05, "loss": 0.0114, "step": 71740 }, { "epoch": 1.76, "learning_rate": 1.6515207486762716e-05, "loss": 0.0118, "step": 71750 }, { "epoch": 1.76, "learning_rate": 1.6514714936584165e-05, "loss": 0.0169, "step": 71760 }, { "epoch": 1.76, "learning_rate": 1.6514222386405618e-05, "loss": 0.0134, "step": 71770 }, { "epoch": 1.76, "learning_rate": 1.6513729836227067e-05, "loss": 0.014, "step": 71780 }, { "epoch": 1.76, "learning_rate": 1.651323728604852e-05, "loss": 0.0179, "step": 71790 }, { "epoch": 1.76, "learning_rate": 1.651274473586997e-05, "loss": 0.0117, "step": 71800 }, { "epoch": 1.76, "learning_rate": 1.651225218569142e-05, "loss": 0.0095, "step": 71810 }, { "epoch": 1.76, "learning_rate": 1.651175963551287e-05, "loss": 0.0122, "step": 71820 }, { "epoch": 1.76, "learning_rate": 1.651126708533432e-05, "loss": 0.0096, "step": 71830 }, { "epoch": 1.76, "learning_rate": 1.6510774535155772e-05, "loss": 0.0122, "step": 71840 }, { "epoch": 1.77, "learning_rate": 1.651028198497722e-05, "loss": 0.0116, "step": 71850 }, { "epoch": 1.77, "learning_rate": 1.650978943479867e-05, "loss": 0.0114, "step": 71860 }, { "epoch": 1.77, "learning_rate": 1.6509296884620123e-05, "loss": 0.0125, "step": 71870 }, { "epoch": 1.77, "learning_rate": 1.6508804334441572e-05, "loss": 0.0126, "step": 71880 }, { "epoch": 1.77, "learning_rate": 1.650831178426302e-05, "loss": 0.0118, "step": 71890 }, { "epoch": 1.77, "learning_rate": 1.6507819234084474e-05, "loss": 0.0102, "step": 71900 }, { "epoch": 1.77, "learning_rate": 1.6507326683905923e-05, "loss": 0.0117, "step": 71910 }, { "epoch": 1.77, "learning_rate": 1.6506834133727375e-05, "loss": 0.0122, "step": 71920 }, { "epoch": 1.77, "learning_rate": 1.6506341583548824e-05, "loss": 0.0115, "step": 71930 }, { "epoch": 1.77, "learning_rate": 1.6505849033370277e-05, "loss": 0.0116, "step": 71940 }, { "epoch": 1.77, "learning_rate": 1.6505356483191726e-05, "loss": 0.0124, "step": 71950 }, { "epoch": 1.77, "learning_rate": 1.650486393301318e-05, "loss": 0.0164, "step": 71960 }, { "epoch": 1.77, "learning_rate": 1.6504371382834628e-05, "loss": 0.0107, "step": 71970 }, { "epoch": 1.77, "learning_rate": 1.6503878832656077e-05, "loss": 0.0111, "step": 71980 }, { "epoch": 1.77, "learning_rate": 1.650338628247753e-05, "loss": 0.0115, "step": 71990 }, { "epoch": 1.77, "learning_rate": 1.650289373229898e-05, "loss": 0.0124, "step": 72000 }, { "epoch": 1.77, "learning_rate": 1.6502401182120428e-05, "loss": 0.0111, "step": 72010 }, { "epoch": 1.77, "learning_rate": 1.650190863194188e-05, "loss": 0.0082, "step": 72020 }, { "epoch": 1.77, "learning_rate": 1.650141608176333e-05, "loss": 0.0104, "step": 72030 }, { "epoch": 1.77, "learning_rate": 1.6500923531584782e-05, "loss": 0.0095, "step": 72040 }, { "epoch": 1.77, "learning_rate": 1.650043098140623e-05, "loss": 0.0184, "step": 72050 }, { "epoch": 1.77, "learning_rate": 1.6499938431227684e-05, "loss": 0.0114, "step": 72060 }, { "epoch": 1.77, "learning_rate": 1.6499445881049133e-05, "loss": 0.0139, "step": 72070 }, { "epoch": 1.77, "learning_rate": 1.6498953330870585e-05, "loss": 0.0154, "step": 72080 }, { "epoch": 1.77, "learning_rate": 1.6498460780692035e-05, "loss": 0.0118, "step": 72090 }, { "epoch": 1.77, "learning_rate": 1.6497968230513487e-05, "loss": 0.0171, "step": 72100 }, { "epoch": 1.77, "learning_rate": 1.6497475680334936e-05, "loss": 0.0157, "step": 72110 }, { "epoch": 1.77, "learning_rate": 1.6496983130156385e-05, "loss": 0.0141, "step": 72120 }, { "epoch": 1.77, "learning_rate": 1.6496490579977838e-05, "loss": 0.0115, "step": 72130 }, { "epoch": 1.77, "learning_rate": 1.6495998029799287e-05, "loss": 0.0096, "step": 72140 }, { "epoch": 1.77, "learning_rate": 1.6495505479620736e-05, "loss": 0.01, "step": 72150 }, { "epoch": 1.77, "learning_rate": 1.649501292944219e-05, "loss": 0.0096, "step": 72160 }, { "epoch": 1.77, "learning_rate": 1.6494520379263638e-05, "loss": 0.0121, "step": 72170 }, { "epoch": 1.77, "learning_rate": 1.6494027829085087e-05, "loss": 0.016, "step": 72180 }, { "epoch": 1.77, "learning_rate": 1.649353527890654e-05, "loss": 0.0131, "step": 72190 }, { "epoch": 1.77, "learning_rate": 1.649304272872799e-05, "loss": 0.0104, "step": 72200 }, { "epoch": 1.77, "learning_rate": 1.649255017854944e-05, "loss": 0.0102, "step": 72210 }, { "epoch": 1.77, "learning_rate": 1.6492057628370894e-05, "loss": 0.0145, "step": 72220 }, { "epoch": 1.77, "learning_rate": 1.6491565078192343e-05, "loss": 0.013, "step": 72230 }, { "epoch": 1.77, "learning_rate": 1.6491072528013792e-05, "loss": 0.017, "step": 72240 }, { "epoch": 1.77, "learning_rate": 1.6490579977835245e-05, "loss": 0.0102, "step": 72250 }, { "epoch": 1.78, "learning_rate": 1.6490087427656694e-05, "loss": 0.0145, "step": 72260 }, { "epoch": 1.78, "learning_rate": 1.6489594877478143e-05, "loss": 0.0104, "step": 72270 }, { "epoch": 1.78, "learning_rate": 1.6489102327299595e-05, "loss": 0.0137, "step": 72280 }, { "epoch": 1.78, "learning_rate": 1.6488609777121045e-05, "loss": 0.0306, "step": 72290 }, { "epoch": 1.78, "learning_rate": 1.6488117226942497e-05, "loss": 0.0118, "step": 72300 }, { "epoch": 1.78, "learning_rate": 1.6487624676763946e-05, "loss": 0.0118, "step": 72310 }, { "epoch": 1.78, "learning_rate": 1.6487132126585395e-05, "loss": 0.0153, "step": 72320 }, { "epoch": 1.78, "learning_rate": 1.6486639576406848e-05, "loss": 0.0107, "step": 72330 }, { "epoch": 1.78, "learning_rate": 1.6486147026228297e-05, "loss": 0.0109, "step": 72340 }, { "epoch": 1.78, "learning_rate": 1.648565447604975e-05, "loss": 0.0121, "step": 72350 }, { "epoch": 1.78, "learning_rate": 1.64851619258712e-05, "loss": 0.0102, "step": 72360 }, { "epoch": 1.78, "learning_rate": 1.648466937569265e-05, "loss": 0.0094, "step": 72370 }, { "epoch": 1.78, "learning_rate": 1.64841768255141e-05, "loss": 0.0187, "step": 72380 }, { "epoch": 1.78, "learning_rate": 1.6483684275335553e-05, "loss": 0.0116, "step": 72390 }, { "epoch": 1.78, "learning_rate": 1.6483191725157002e-05, "loss": 0.0103, "step": 72400 }, { "epoch": 1.78, "learning_rate": 1.648269917497845e-05, "loss": 0.0228, "step": 72410 }, { "epoch": 1.78, "learning_rate": 1.6482206624799904e-05, "loss": 0.0111, "step": 72420 }, { "epoch": 1.78, "learning_rate": 1.6481714074621353e-05, "loss": 0.0102, "step": 72430 }, { "epoch": 1.78, "learning_rate": 1.6481221524442802e-05, "loss": 0.0114, "step": 72440 }, { "epoch": 1.78, "learning_rate": 1.6480728974264255e-05, "loss": 0.0158, "step": 72450 }, { "epoch": 1.78, "learning_rate": 1.6480236424085704e-05, "loss": 0.0109, "step": 72460 }, { "epoch": 1.78, "learning_rate": 1.6479743873907156e-05, "loss": 0.0199, "step": 72470 }, { "epoch": 1.78, "learning_rate": 1.6479251323728606e-05, "loss": 0.0113, "step": 72480 }, { "epoch": 1.78, "learning_rate": 1.6478758773550058e-05, "loss": 0.014, "step": 72490 }, { "epoch": 1.78, "learning_rate": 1.6478266223371507e-05, "loss": 0.0109, "step": 72500 }, { "epoch": 1.78, "learning_rate": 1.647777367319296e-05, "loss": 0.0122, "step": 72510 }, { "epoch": 1.78, "learning_rate": 1.647728112301441e-05, "loss": 0.0121, "step": 72520 }, { "epoch": 1.78, "learning_rate": 1.647678857283586e-05, "loss": 0.0101, "step": 72530 }, { "epoch": 1.78, "learning_rate": 1.647629602265731e-05, "loss": 0.0104, "step": 72540 }, { "epoch": 1.78, "learning_rate": 1.647580347247876e-05, "loss": 0.011, "step": 72550 }, { "epoch": 1.78, "learning_rate": 1.6475310922300212e-05, "loss": 0.0124, "step": 72560 }, { "epoch": 1.78, "learning_rate": 1.647481837212166e-05, "loss": 0.0156, "step": 72570 }, { "epoch": 1.78, "learning_rate": 1.647432582194311e-05, "loss": 0.0147, "step": 72580 }, { "epoch": 1.78, "learning_rate": 1.6473833271764563e-05, "loss": 0.0132, "step": 72590 }, { "epoch": 1.78, "learning_rate": 1.6473340721586012e-05, "loss": 0.0098, "step": 72600 }, { "epoch": 1.78, "learning_rate": 1.647284817140746e-05, "loss": 0.0108, "step": 72610 }, { "epoch": 1.78, "learning_rate": 1.6472355621228914e-05, "loss": 0.012, "step": 72620 }, { "epoch": 1.78, "learning_rate": 1.6471863071050363e-05, "loss": 0.0127, "step": 72630 }, { "epoch": 1.78, "learning_rate": 1.6471370520871816e-05, "loss": 0.0214, "step": 72640 }, { "epoch": 1.78, "learning_rate": 1.6470877970693265e-05, "loss": 0.0212, "step": 72650 }, { "epoch": 1.79, "learning_rate": 1.6470385420514717e-05, "loss": 0.0118, "step": 72660 }, { "epoch": 1.79, "learning_rate": 1.6469892870336166e-05, "loss": 0.0191, "step": 72670 }, { "epoch": 1.79, "learning_rate": 1.646940032015762e-05, "loss": 0.0094, "step": 72680 }, { "epoch": 1.79, "learning_rate": 1.6468907769979068e-05, "loss": 0.0081, "step": 72690 }, { "epoch": 1.79, "learning_rate": 1.6468415219800517e-05, "loss": 0.0121, "step": 72700 }, { "epoch": 1.79, "learning_rate": 1.646792266962197e-05, "loss": 0.0114, "step": 72710 }, { "epoch": 1.79, "learning_rate": 1.646743011944342e-05, "loss": 0.0102, "step": 72720 }, { "epoch": 1.79, "learning_rate": 1.6466937569264868e-05, "loss": 0.023, "step": 72730 }, { "epoch": 1.79, "learning_rate": 1.646644501908632e-05, "loss": 0.0113, "step": 72740 }, { "epoch": 1.79, "learning_rate": 1.646595246890777e-05, "loss": 0.0117, "step": 72750 }, { "epoch": 1.79, "learning_rate": 1.6465459918729222e-05, "loss": 0.01, "step": 72760 }, { "epoch": 1.79, "learning_rate": 1.646496736855067e-05, "loss": 0.0133, "step": 72770 }, { "epoch": 1.79, "learning_rate": 1.6464474818372124e-05, "loss": 0.0151, "step": 72780 }, { "epoch": 1.79, "learning_rate": 1.6463982268193573e-05, "loss": 0.0102, "step": 72790 }, { "epoch": 1.79, "learning_rate": 1.6463489718015026e-05, "loss": 0.0118, "step": 72800 }, { "epoch": 1.79, "learning_rate": 1.6462997167836475e-05, "loss": 0.0214, "step": 72810 }, { "epoch": 1.79, "learning_rate": 1.6462504617657927e-05, "loss": 0.0135, "step": 72820 }, { "epoch": 1.79, "learning_rate": 1.6462012067479377e-05, "loss": 0.0106, "step": 72830 }, { "epoch": 1.79, "learning_rate": 1.6461519517300826e-05, "loss": 0.0104, "step": 72840 }, { "epoch": 1.79, "learning_rate": 1.6461026967122278e-05, "loss": 0.0116, "step": 72850 }, { "epoch": 1.79, "learning_rate": 1.6460534416943727e-05, "loss": 0.0121, "step": 72860 }, { "epoch": 1.79, "learning_rate": 1.6460041866765177e-05, "loss": 0.0104, "step": 72870 }, { "epoch": 1.79, "learning_rate": 1.645954931658663e-05, "loss": 0.011, "step": 72880 }, { "epoch": 1.79, "learning_rate": 1.6459056766408078e-05, "loss": 0.0114, "step": 72890 }, { "epoch": 1.79, "learning_rate": 1.6458564216229527e-05, "loss": 0.0113, "step": 72900 }, { "epoch": 1.79, "learning_rate": 1.645807166605098e-05, "loss": 0.0136, "step": 72910 }, { "epoch": 1.79, "learning_rate": 1.645757911587243e-05, "loss": 0.0108, "step": 72920 }, { "epoch": 1.79, "learning_rate": 1.645708656569388e-05, "loss": 0.0155, "step": 72930 }, { "epoch": 1.79, "learning_rate": 1.6456594015515334e-05, "loss": 0.0116, "step": 72940 }, { "epoch": 1.79, "learning_rate": 1.6456101465336783e-05, "loss": 0.0088, "step": 72950 }, { "epoch": 1.79, "learning_rate": 1.6455608915158232e-05, "loss": 0.0102, "step": 72960 }, { "epoch": 1.79, "learning_rate": 1.6455116364979685e-05, "loss": 0.0099, "step": 72970 }, { "epoch": 1.79, "learning_rate": 1.6454623814801134e-05, "loss": 0.0111, "step": 72980 }, { "epoch": 1.79, "learning_rate": 1.6454131264622583e-05, "loss": 0.0183, "step": 72990 }, { "epoch": 1.79, "learning_rate": 1.6453638714444036e-05, "loss": 0.0114, "step": 73000 }, { "epoch": 1.79, "learning_rate": 1.6453146164265485e-05, "loss": 0.0188, "step": 73010 }, { "epoch": 1.79, "learning_rate": 1.6452653614086937e-05, "loss": 0.0097, "step": 73020 }, { "epoch": 1.79, "learning_rate": 1.6452161063908387e-05, "loss": 0.0147, "step": 73030 }, { "epoch": 1.79, "learning_rate": 1.6451668513729836e-05, "loss": 0.0282, "step": 73040 }, { "epoch": 1.79, "learning_rate": 1.6451175963551288e-05, "loss": 0.0128, "step": 73050 }, { "epoch": 1.79, "learning_rate": 1.6450683413372737e-05, "loss": 0.0098, "step": 73060 }, { "epoch": 1.8, "learning_rate": 1.645019086319419e-05, "loss": 0.0127, "step": 73070 }, { "epoch": 1.8, "learning_rate": 1.644969831301564e-05, "loss": 0.0101, "step": 73080 }, { "epoch": 1.8, "learning_rate": 1.644920576283709e-05, "loss": 0.0204, "step": 73090 }, { "epoch": 1.8, "learning_rate": 1.644871321265854e-05, "loss": 0.0096, "step": 73100 }, { "epoch": 1.8, "learning_rate": 1.6448220662479993e-05, "loss": 0.0092, "step": 73110 }, { "epoch": 1.8, "learning_rate": 1.6447728112301443e-05, "loss": 0.0188, "step": 73120 }, { "epoch": 1.8, "learning_rate": 1.644723556212289e-05, "loss": 0.0189, "step": 73130 }, { "epoch": 1.8, "learning_rate": 1.6446743011944344e-05, "loss": 0.0126, "step": 73140 }, { "epoch": 1.8, "learning_rate": 1.6446250461765793e-05, "loss": 0.0136, "step": 73150 }, { "epoch": 1.8, "learning_rate": 1.6445757911587242e-05, "loss": 0.0161, "step": 73160 }, { "epoch": 1.8, "learning_rate": 1.6445265361408695e-05, "loss": 0.0099, "step": 73170 }, { "epoch": 1.8, "learning_rate": 1.6444772811230144e-05, "loss": 0.0111, "step": 73180 }, { "epoch": 1.8, "learning_rate": 1.6444280261051597e-05, "loss": 0.0118, "step": 73190 }, { "epoch": 1.8, "learning_rate": 1.6443787710873046e-05, "loss": 0.0108, "step": 73200 }, { "epoch": 1.8, "learning_rate": 1.64432951606945e-05, "loss": 0.0115, "step": 73210 }, { "epoch": 1.8, "learning_rate": 1.6442802610515948e-05, "loss": 0.009, "step": 73220 }, { "epoch": 1.8, "learning_rate": 1.64423100603374e-05, "loss": 0.0115, "step": 73230 }, { "epoch": 1.8, "learning_rate": 1.644181751015885e-05, "loss": 0.01, "step": 73240 }, { "epoch": 1.8, "learning_rate": 1.6441324959980302e-05, "loss": 0.013, "step": 73250 }, { "epoch": 1.8, "learning_rate": 1.644083240980175e-05, "loss": 0.0098, "step": 73260 }, { "epoch": 1.8, "learning_rate": 1.64403398596232e-05, "loss": 0.0139, "step": 73270 }, { "epoch": 1.8, "learning_rate": 1.6439847309444653e-05, "loss": 0.0138, "step": 73280 }, { "epoch": 1.8, "learning_rate": 1.6439354759266102e-05, "loss": 0.0108, "step": 73290 }, { "epoch": 1.8, "learning_rate": 1.643886220908755e-05, "loss": 0.0101, "step": 73300 }, { "epoch": 1.8, "learning_rate": 1.6438369658909003e-05, "loss": 0.0118, "step": 73310 }, { "epoch": 1.8, "learning_rate": 1.6437877108730453e-05, "loss": 0.0135, "step": 73320 }, { "epoch": 1.8, "learning_rate": 1.6437384558551902e-05, "loss": 0.0121, "step": 73330 }, { "epoch": 1.8, "learning_rate": 1.6436892008373354e-05, "loss": 0.0102, "step": 73340 }, { "epoch": 1.8, "learning_rate": 1.6436399458194803e-05, "loss": 0.0091, "step": 73350 }, { "epoch": 1.8, "learning_rate": 1.6435906908016256e-05, "loss": 0.0118, "step": 73360 }, { "epoch": 1.8, "learning_rate": 1.6435414357837705e-05, "loss": 0.0186, "step": 73370 }, { "epoch": 1.8, "learning_rate": 1.6434921807659158e-05, "loss": 0.0129, "step": 73380 }, { "epoch": 1.8, "learning_rate": 1.6434429257480607e-05, "loss": 0.0102, "step": 73390 }, { "epoch": 1.8, "learning_rate": 1.643393670730206e-05, "loss": 0.0136, "step": 73400 }, { "epoch": 1.8, "learning_rate": 1.643344415712351e-05, "loss": 0.0103, "step": 73410 }, { "epoch": 1.8, "learning_rate": 1.6432951606944958e-05, "loss": 0.0144, "step": 73420 }, { "epoch": 1.8, "learning_rate": 1.643245905676641e-05, "loss": 0.0184, "step": 73430 }, { "epoch": 1.8, "learning_rate": 1.643196650658786e-05, "loss": 0.0139, "step": 73440 }, { "epoch": 1.8, "learning_rate": 1.643147395640931e-05, "loss": 0.0107, "step": 73450 }, { "epoch": 1.8, "learning_rate": 1.643098140623076e-05, "loss": 0.0112, "step": 73460 }, { "epoch": 1.8, "learning_rate": 1.643048885605221e-05, "loss": 0.0106, "step": 73470 }, { "epoch": 1.81, "learning_rate": 1.6429996305873663e-05, "loss": 0.0109, "step": 73480 }, { "epoch": 1.81, "learning_rate": 1.6429503755695112e-05, "loss": 0.0109, "step": 73490 }, { "epoch": 1.81, "learning_rate": 1.6429011205516564e-05, "loss": 0.0103, "step": 73500 }, { "epoch": 1.81, "learning_rate": 1.6428518655338014e-05, "loss": 0.0191, "step": 73510 }, { "epoch": 1.81, "learning_rate": 1.6428026105159466e-05, "loss": 0.0097, "step": 73520 }, { "epoch": 1.81, "learning_rate": 1.6427533554980915e-05, "loss": 0.0149, "step": 73530 }, { "epoch": 1.81, "learning_rate": 1.6427041004802368e-05, "loss": 0.0126, "step": 73540 }, { "epoch": 1.81, "learning_rate": 1.6426548454623817e-05, "loss": 0.0174, "step": 73550 }, { "epoch": 1.81, "learning_rate": 1.6426055904445266e-05, "loss": 0.0091, "step": 73560 }, { "epoch": 1.81, "learning_rate": 1.642556335426672e-05, "loss": 0.0099, "step": 73570 }, { "epoch": 1.81, "learning_rate": 1.6425070804088168e-05, "loss": 0.0131, "step": 73580 }, { "epoch": 1.81, "learning_rate": 1.6424578253909617e-05, "loss": 0.0109, "step": 73590 }, { "epoch": 1.81, "learning_rate": 1.642408570373107e-05, "loss": 0.0242, "step": 73600 }, { "epoch": 1.81, "learning_rate": 1.642359315355252e-05, "loss": 0.0122, "step": 73610 }, { "epoch": 1.81, "learning_rate": 1.6423100603373968e-05, "loss": 0.0115, "step": 73620 }, { "epoch": 1.81, "learning_rate": 1.642260805319542e-05, "loss": 0.0117, "step": 73630 }, { "epoch": 1.81, "learning_rate": 1.642211550301687e-05, "loss": 0.0112, "step": 73640 }, { "epoch": 1.81, "learning_rate": 1.6421622952838322e-05, "loss": 0.0111, "step": 73650 }, { "epoch": 1.81, "learning_rate": 1.6421130402659774e-05, "loss": 0.0137, "step": 73660 }, { "epoch": 1.81, "learning_rate": 1.6420637852481224e-05, "loss": 0.0107, "step": 73670 }, { "epoch": 1.81, "learning_rate": 1.6420145302302673e-05, "loss": 0.0197, "step": 73680 }, { "epoch": 1.81, "learning_rate": 1.6419652752124125e-05, "loss": 0.012, "step": 73690 }, { "epoch": 1.81, "learning_rate": 1.6419160201945574e-05, "loss": 0.0122, "step": 73700 }, { "epoch": 1.81, "learning_rate": 1.6418667651767024e-05, "loss": 0.0143, "step": 73710 }, { "epoch": 1.81, "learning_rate": 1.6418175101588476e-05, "loss": 0.0121, "step": 73720 }, { "epoch": 1.81, "learning_rate": 1.6417682551409925e-05, "loss": 0.0147, "step": 73730 }, { "epoch": 1.81, "learning_rate": 1.6417190001231378e-05, "loss": 0.0122, "step": 73740 }, { "epoch": 1.81, "learning_rate": 1.6416697451052827e-05, "loss": 0.0124, "step": 73750 }, { "epoch": 1.81, "learning_rate": 1.6416204900874276e-05, "loss": 0.0186, "step": 73760 }, { "epoch": 1.81, "learning_rate": 1.641571235069573e-05, "loss": 0.0101, "step": 73770 }, { "epoch": 1.81, "learning_rate": 1.6415219800517178e-05, "loss": 0.0104, "step": 73780 }, { "epoch": 1.81, "learning_rate": 1.641472725033863e-05, "loss": 0.0124, "step": 73790 }, { "epoch": 1.81, "learning_rate": 1.641423470016008e-05, "loss": 0.0135, "step": 73800 }, { "epoch": 1.81, "learning_rate": 1.6413742149981532e-05, "loss": 0.0103, "step": 73810 }, { "epoch": 1.81, "learning_rate": 1.641324959980298e-05, "loss": 0.012, "step": 73820 }, { "epoch": 1.81, "learning_rate": 1.6412757049624434e-05, "loss": 0.0108, "step": 73830 }, { "epoch": 1.81, "learning_rate": 1.6412264499445883e-05, "loss": 0.0116, "step": 73840 }, { "epoch": 1.81, "learning_rate": 1.6411771949267332e-05, "loss": 0.0113, "step": 73850 }, { "epoch": 1.81, "learning_rate": 1.6411279399088785e-05, "loss": 0.0115, "step": 73860 }, { "epoch": 1.81, "learning_rate": 1.6410786848910234e-05, "loss": 0.012, "step": 73870 }, { "epoch": 1.82, "learning_rate": 1.6410294298731683e-05, "loss": 0.0114, "step": 73880 }, { "epoch": 1.82, "learning_rate": 1.6409801748553135e-05, "loss": 0.011, "step": 73890 }, { "epoch": 1.82, "learning_rate": 1.6409309198374585e-05, "loss": 0.0125, "step": 73900 }, { "epoch": 1.82, "learning_rate": 1.6408816648196037e-05, "loss": 0.0106, "step": 73910 }, { "epoch": 1.82, "learning_rate": 1.6408324098017486e-05, "loss": 0.0252, "step": 73920 }, { "epoch": 1.82, "learning_rate": 1.640783154783894e-05, "loss": 0.0121, "step": 73930 }, { "epoch": 1.82, "learning_rate": 1.6407338997660388e-05, "loss": 0.0154, "step": 73940 }, { "epoch": 1.82, "learning_rate": 1.640684644748184e-05, "loss": 0.013, "step": 73950 }, { "epoch": 1.82, "learning_rate": 1.640635389730329e-05, "loss": 0.0091, "step": 73960 }, { "epoch": 1.82, "learning_rate": 1.6405861347124742e-05, "loss": 0.0092, "step": 73970 }, { "epoch": 1.82, "learning_rate": 1.640536879694619e-05, "loss": 0.0106, "step": 73980 }, { "epoch": 1.82, "learning_rate": 1.640487624676764e-05, "loss": 0.0125, "step": 73990 }, { "epoch": 1.82, "learning_rate": 1.6404383696589093e-05, "loss": 0.0175, "step": 74000 }, { "epoch": 1.82, "learning_rate": 1.6403891146410542e-05, "loss": 0.0105, "step": 74010 }, { "epoch": 1.82, "learning_rate": 1.640339859623199e-05, "loss": 0.0157, "step": 74020 }, { "epoch": 1.82, "learning_rate": 1.6402906046053444e-05, "loss": 0.0135, "step": 74030 }, { "epoch": 1.82, "learning_rate": 1.6402413495874893e-05, "loss": 0.0119, "step": 74040 }, { "epoch": 1.82, "learning_rate": 1.6401920945696342e-05, "loss": 0.0112, "step": 74050 }, { "epoch": 1.82, "learning_rate": 1.6401428395517795e-05, "loss": 0.0105, "step": 74060 }, { "epoch": 1.82, "learning_rate": 1.6400935845339244e-05, "loss": 0.0107, "step": 74070 }, { "epoch": 1.82, "learning_rate": 1.6400443295160696e-05, "loss": 0.0197, "step": 74080 }, { "epoch": 1.82, "learning_rate": 1.6399950744982145e-05, "loss": 0.0115, "step": 74090 }, { "epoch": 1.82, "learning_rate": 1.6399458194803598e-05, "loss": 0.0113, "step": 74100 }, { "epoch": 1.82, "learning_rate": 1.6398965644625047e-05, "loss": 0.0102, "step": 74110 }, { "epoch": 1.82, "learning_rate": 1.63984730944465e-05, "loss": 0.011, "step": 74120 }, { "epoch": 1.82, "learning_rate": 1.639798054426795e-05, "loss": 0.0114, "step": 74130 }, { "epoch": 1.82, "learning_rate": 1.6397487994089398e-05, "loss": 0.011, "step": 74140 }, { "epoch": 1.82, "learning_rate": 1.639699544391085e-05, "loss": 0.0113, "step": 74150 }, { "epoch": 1.82, "learning_rate": 1.63965028937323e-05, "loss": 0.0104, "step": 74160 }, { "epoch": 1.82, "learning_rate": 1.639601034355375e-05, "loss": 0.0127, "step": 74170 }, { "epoch": 1.82, "learning_rate": 1.63955177933752e-05, "loss": 0.0176, "step": 74180 }, { "epoch": 1.82, "learning_rate": 1.639502524319665e-05, "loss": 0.0109, "step": 74190 }, { "epoch": 1.82, "learning_rate": 1.6394532693018103e-05, "loss": 0.0126, "step": 74200 }, { "epoch": 1.82, "learning_rate": 1.6394040142839552e-05, "loss": 0.0175, "step": 74210 }, { "epoch": 1.82, "learning_rate": 1.6393547592661005e-05, "loss": 0.0115, "step": 74220 }, { "epoch": 1.82, "learning_rate": 1.6393055042482454e-05, "loss": 0.01, "step": 74230 }, { "epoch": 1.82, "learning_rate": 1.6392562492303906e-05, "loss": 0.0122, "step": 74240 }, { "epoch": 1.82, "learning_rate": 1.6392069942125356e-05, "loss": 0.0125, "step": 74250 }, { "epoch": 1.82, "learning_rate": 1.6391577391946808e-05, "loss": 0.0196, "step": 74260 }, { "epoch": 1.82, "learning_rate": 1.6391084841768257e-05, "loss": 0.0114, "step": 74270 }, { "epoch": 1.82, "learning_rate": 1.6390592291589706e-05, "loss": 0.0116, "step": 74280 }, { "epoch": 1.83, "learning_rate": 1.639009974141116e-05, "loss": 0.0109, "step": 74290 }, { "epoch": 1.83, "learning_rate": 1.6389607191232608e-05, "loss": 0.0117, "step": 74300 }, { "epoch": 1.83, "learning_rate": 1.6389114641054057e-05, "loss": 0.0194, "step": 74310 }, { "epoch": 1.83, "learning_rate": 1.638862209087551e-05, "loss": 0.0128, "step": 74320 }, { "epoch": 1.83, "learning_rate": 1.638812954069696e-05, "loss": 0.0138, "step": 74330 }, { "epoch": 1.83, "learning_rate": 1.6387636990518408e-05, "loss": 0.011, "step": 74340 }, { "epoch": 1.83, "learning_rate": 1.638714444033986e-05, "loss": 0.0104, "step": 74350 }, { "epoch": 1.83, "learning_rate": 1.638665189016131e-05, "loss": 0.013, "step": 74360 }, { "epoch": 1.83, "learning_rate": 1.6386159339982762e-05, "loss": 0.0124, "step": 74370 }, { "epoch": 1.83, "learning_rate": 1.6385666789804215e-05, "loss": 0.0109, "step": 74380 }, { "epoch": 1.83, "learning_rate": 1.6385174239625664e-05, "loss": 0.0164, "step": 74390 }, { "epoch": 1.83, "learning_rate": 1.6384681689447113e-05, "loss": 0.0261, "step": 74400 }, { "epoch": 1.83, "learning_rate": 1.6384189139268566e-05, "loss": 0.0112, "step": 74410 }, { "epoch": 1.83, "learning_rate": 1.6383696589090015e-05, "loss": 0.0116, "step": 74420 }, { "epoch": 1.83, "learning_rate": 1.6383204038911464e-05, "loss": 0.0116, "step": 74430 }, { "epoch": 1.83, "learning_rate": 1.6382711488732916e-05, "loss": 0.0103, "step": 74440 }, { "epoch": 1.83, "learning_rate": 1.6382218938554366e-05, "loss": 0.0095, "step": 74450 }, { "epoch": 1.83, "learning_rate": 1.6381726388375818e-05, "loss": 0.0173, "step": 74460 }, { "epoch": 1.83, "learning_rate": 1.6381233838197267e-05, "loss": 0.0217, "step": 74470 }, { "epoch": 1.83, "learning_rate": 1.6380741288018716e-05, "loss": 0.0101, "step": 74480 }, { "epoch": 1.83, "learning_rate": 1.638024873784017e-05, "loss": 0.0187, "step": 74490 }, { "epoch": 1.83, "learning_rate": 1.6379756187661618e-05, "loss": 0.0126, "step": 74500 }, { "epoch": 1.83, "learning_rate": 1.637926363748307e-05, "loss": 0.0229, "step": 74510 }, { "epoch": 1.83, "learning_rate": 1.637877108730452e-05, "loss": 0.0132, "step": 74520 }, { "epoch": 1.83, "learning_rate": 1.6378278537125972e-05, "loss": 0.0115, "step": 74530 }, { "epoch": 1.83, "learning_rate": 1.637778598694742e-05, "loss": 0.0143, "step": 74540 }, { "epoch": 1.83, "learning_rate": 1.6377293436768874e-05, "loss": 0.0101, "step": 74550 }, { "epoch": 1.83, "learning_rate": 1.6376800886590323e-05, "loss": 0.0106, "step": 74560 }, { "epoch": 1.83, "learning_rate": 1.6376308336411772e-05, "loss": 0.0202, "step": 74570 }, { "epoch": 1.83, "learning_rate": 1.6375815786233225e-05, "loss": 0.0171, "step": 74580 }, { "epoch": 1.83, "learning_rate": 1.6375323236054674e-05, "loss": 0.0097, "step": 74590 }, { "epoch": 1.83, "learning_rate": 1.6374830685876123e-05, "loss": 0.0087, "step": 74600 }, { "epoch": 1.83, "learning_rate": 1.6374338135697576e-05, "loss": 0.0176, "step": 74610 }, { "epoch": 1.83, "learning_rate": 1.6373845585519025e-05, "loss": 0.0117, "step": 74620 }, { "epoch": 1.83, "learning_rate": 1.6373353035340477e-05, "loss": 0.0112, "step": 74630 }, { "epoch": 1.83, "learning_rate": 1.6372860485161927e-05, "loss": 0.0107, "step": 74640 }, { "epoch": 1.83, "learning_rate": 1.637236793498338e-05, "loss": 0.0104, "step": 74650 }, { "epoch": 1.83, "learning_rate": 1.6371875384804828e-05, "loss": 0.0112, "step": 74660 }, { "epoch": 1.83, "learning_rate": 1.637138283462628e-05, "loss": 0.01, "step": 74670 }, { "epoch": 1.83, "learning_rate": 1.637089028444773e-05, "loss": 0.0145, "step": 74680 }, { "epoch": 1.83, "learning_rate": 1.6370397734269182e-05, "loss": 0.0112, "step": 74690 }, { "epoch": 1.84, "learning_rate": 1.636990518409063e-05, "loss": 0.0118, "step": 74700 }, { "epoch": 1.84, "learning_rate": 1.636941263391208e-05, "loss": 0.0229, "step": 74710 }, { "epoch": 1.84, "learning_rate": 1.6368920083733533e-05, "loss": 0.0118, "step": 74720 }, { "epoch": 1.84, "learning_rate": 1.6368427533554982e-05, "loss": 0.0113, "step": 74730 }, { "epoch": 1.84, "learning_rate": 1.636793498337643e-05, "loss": 0.0173, "step": 74740 }, { "epoch": 1.84, "learning_rate": 1.6367442433197884e-05, "loss": 0.0188, "step": 74750 }, { "epoch": 1.84, "learning_rate": 1.6366949883019333e-05, "loss": 0.0113, "step": 74760 }, { "epoch": 1.84, "learning_rate": 1.6366457332840782e-05, "loss": 0.0112, "step": 74770 }, { "epoch": 1.84, "learning_rate": 1.6365964782662235e-05, "loss": 0.0198, "step": 74780 }, { "epoch": 1.84, "learning_rate": 1.6365472232483684e-05, "loss": 0.0107, "step": 74790 }, { "epoch": 1.84, "learning_rate": 1.6364979682305137e-05, "loss": 0.0125, "step": 74800 }, { "epoch": 1.84, "learning_rate": 1.6364487132126586e-05, "loss": 0.0123, "step": 74810 }, { "epoch": 1.84, "learning_rate": 1.636399458194804e-05, "loss": 0.0147, "step": 74820 }, { "epoch": 1.84, "learning_rate": 1.6363502031769487e-05, "loss": 0.0199, "step": 74830 }, { "epoch": 1.84, "learning_rate": 1.636300948159094e-05, "loss": 0.0189, "step": 74840 }, { "epoch": 1.84, "learning_rate": 1.636251693141239e-05, "loss": 0.0102, "step": 74850 }, { "epoch": 1.84, "learning_rate": 1.6362024381233838e-05, "loss": 0.01, "step": 74860 }, { "epoch": 1.84, "learning_rate": 1.636153183105529e-05, "loss": 0.012, "step": 74870 }, { "epoch": 1.84, "learning_rate": 1.636103928087674e-05, "loss": 0.0105, "step": 74880 }, { "epoch": 1.84, "learning_rate": 1.636054673069819e-05, "loss": 0.0113, "step": 74890 }, { "epoch": 1.84, "learning_rate": 1.636005418051964e-05, "loss": 0.0093, "step": 74900 }, { "epoch": 1.84, "learning_rate": 1.635956163034109e-05, "loss": 0.0102, "step": 74910 }, { "epoch": 1.84, "learning_rate": 1.6359069080162543e-05, "loss": 0.0096, "step": 74920 }, { "epoch": 1.84, "learning_rate": 1.6358576529983993e-05, "loss": 0.0143, "step": 74930 }, { "epoch": 1.84, "learning_rate": 1.6358083979805445e-05, "loss": 0.0234, "step": 74940 }, { "epoch": 1.84, "learning_rate": 1.6357591429626894e-05, "loss": 0.0109, "step": 74950 }, { "epoch": 1.84, "learning_rate": 1.6357098879448347e-05, "loss": 0.0132, "step": 74960 }, { "epoch": 1.84, "learning_rate": 1.6356606329269796e-05, "loss": 0.0168, "step": 74970 }, { "epoch": 1.84, "learning_rate": 1.635611377909125e-05, "loss": 0.0105, "step": 74980 }, { "epoch": 1.84, "learning_rate": 1.6355621228912698e-05, "loss": 0.0133, "step": 74990 }, { "epoch": 1.84, "learning_rate": 1.6355128678734147e-05, "loss": 0.0106, "step": 75000 }, { "epoch": 1.84, "eval_bleu": 98.0119, "eval_gen_len_ratio": 1.0019, "eval_loss": 0.009336445480585098, "eval_runtime": 256.8288, "eval_samples_per_second": 7.266, "eval_steps_per_second": 0.23, "step": 75000 }, { "epoch": 1.84, "learning_rate": 1.63546361285556e-05, "loss": 0.0169, "step": 75010 }, { "epoch": 1.84, "learning_rate": 1.635414357837705e-05, "loss": 0.0107, "step": 75020 }, { "epoch": 1.84, "learning_rate": 1.6353651028198498e-05, "loss": 0.0114, "step": 75030 }, { "epoch": 1.84, "learning_rate": 1.635315847801995e-05, "loss": 0.0142, "step": 75040 }, { "epoch": 1.84, "learning_rate": 1.63526659278414e-05, "loss": 0.0095, "step": 75050 }, { "epoch": 1.84, "learning_rate": 1.635217337766285e-05, "loss": 0.0211, "step": 75060 }, { "epoch": 1.84, "learning_rate": 1.63516808274843e-05, "loss": 0.0137, "step": 75070 }, { "epoch": 1.84, "learning_rate": 1.635118827730575e-05, "loss": 0.021, "step": 75080 }, { "epoch": 1.84, "learning_rate": 1.6350695727127203e-05, "loss": 0.0108, "step": 75090 }, { "epoch": 1.84, "learning_rate": 1.6350203176948655e-05, "loss": 0.0121, "step": 75100 }, { "epoch": 1.85, "learning_rate": 1.6349710626770104e-05, "loss": 0.0105, "step": 75110 }, { "epoch": 1.85, "learning_rate": 1.6349218076591553e-05, "loss": 0.0118, "step": 75120 }, { "epoch": 1.85, "learning_rate": 1.6348725526413006e-05, "loss": 0.0096, "step": 75130 }, { "epoch": 1.85, "learning_rate": 1.6348232976234455e-05, "loss": 0.0101, "step": 75140 }, { "epoch": 1.85, "learning_rate": 1.6347740426055904e-05, "loss": 0.0256, "step": 75150 }, { "epoch": 1.85, "learning_rate": 1.6347247875877357e-05, "loss": 0.0104, "step": 75160 }, { "epoch": 1.85, "learning_rate": 1.6346755325698806e-05, "loss": 0.0113, "step": 75170 }, { "epoch": 1.85, "learning_rate": 1.634626277552026e-05, "loss": 0.0132, "step": 75180 }, { "epoch": 1.85, "learning_rate": 1.6345770225341708e-05, "loss": 0.0112, "step": 75190 }, { "epoch": 1.85, "learning_rate": 1.6345277675163157e-05, "loss": 0.0108, "step": 75200 }, { "epoch": 1.85, "learning_rate": 1.634478512498461e-05, "loss": 0.0115, "step": 75210 }, { "epoch": 1.85, "learning_rate": 1.634429257480606e-05, "loss": 0.0122, "step": 75220 }, { "epoch": 1.85, "learning_rate": 1.634380002462751e-05, "loss": 0.0109, "step": 75230 }, { "epoch": 1.85, "learning_rate": 1.634330747444896e-05, "loss": 0.011, "step": 75240 }, { "epoch": 1.85, "learning_rate": 1.6342814924270413e-05, "loss": 0.0118, "step": 75250 }, { "epoch": 1.85, "learning_rate": 1.6342322374091862e-05, "loss": 0.0117, "step": 75260 }, { "epoch": 1.85, "learning_rate": 1.6341829823913314e-05, "loss": 0.0099, "step": 75270 }, { "epoch": 1.85, "learning_rate": 1.6341337273734764e-05, "loss": 0.0151, "step": 75280 }, { "epoch": 1.85, "learning_rate": 1.6340844723556213e-05, "loss": 0.0158, "step": 75290 }, { "epoch": 1.85, "learning_rate": 1.6340352173377665e-05, "loss": 0.0102, "step": 75300 }, { "epoch": 1.85, "learning_rate": 1.6339859623199114e-05, "loss": 0.0109, "step": 75310 }, { "epoch": 1.85, "learning_rate": 1.6339367073020564e-05, "loss": 0.0117, "step": 75320 }, { "epoch": 1.85, "learning_rate": 1.6338874522842016e-05, "loss": 0.0108, "step": 75330 }, { "epoch": 1.85, "learning_rate": 1.6338381972663465e-05, "loss": 0.0104, "step": 75340 }, { "epoch": 1.85, "learning_rate": 1.6337889422484918e-05, "loss": 0.0122, "step": 75350 }, { "epoch": 1.85, "learning_rate": 1.6337396872306367e-05, "loss": 0.0094, "step": 75360 }, { "epoch": 1.85, "learning_rate": 1.633690432212782e-05, "loss": 0.0112, "step": 75370 }, { "epoch": 1.85, "learning_rate": 1.633641177194927e-05, "loss": 0.0099, "step": 75380 }, { "epoch": 1.85, "learning_rate": 1.633591922177072e-05, "loss": 0.0169, "step": 75390 }, { "epoch": 1.85, "learning_rate": 1.633542667159217e-05, "loss": 0.0099, "step": 75400 }, { "epoch": 1.85, "learning_rate": 1.6334934121413623e-05, "loss": 0.0163, "step": 75410 }, { "epoch": 1.85, "learning_rate": 1.6334441571235072e-05, "loss": 0.0102, "step": 75420 }, { "epoch": 1.85, "learning_rate": 1.633394902105652e-05, "loss": 0.0141, "step": 75430 }, { "epoch": 1.85, "learning_rate": 1.6333456470877974e-05, "loss": 0.0108, "step": 75440 }, { "epoch": 1.85, "learning_rate": 1.6332963920699423e-05, "loss": 0.0103, "step": 75450 }, { "epoch": 1.85, "learning_rate": 1.6332471370520872e-05, "loss": 0.011, "step": 75460 }, { "epoch": 1.85, "learning_rate": 1.6331978820342324e-05, "loss": 0.0111, "step": 75470 }, { "epoch": 1.85, "learning_rate": 1.6331486270163774e-05, "loss": 0.0117, "step": 75480 }, { "epoch": 1.85, "learning_rate": 1.6330993719985223e-05, "loss": 0.0104, "step": 75490 }, { "epoch": 1.85, "learning_rate": 1.6330501169806675e-05, "loss": 0.0101, "step": 75500 }, { "epoch": 1.86, "learning_rate": 1.6330008619628124e-05, "loss": 0.0113, "step": 75510 }, { "epoch": 1.86, "learning_rate": 1.6329516069449577e-05, "loss": 0.0131, "step": 75520 }, { "epoch": 1.86, "learning_rate": 1.6329023519271026e-05, "loss": 0.0127, "step": 75530 }, { "epoch": 1.86, "learning_rate": 1.632853096909248e-05, "loss": 0.0211, "step": 75540 }, { "epoch": 1.86, "learning_rate": 1.6328038418913928e-05, "loss": 0.0112, "step": 75550 }, { "epoch": 1.86, "learning_rate": 1.632754586873538e-05, "loss": 0.0101, "step": 75560 }, { "epoch": 1.86, "learning_rate": 1.632705331855683e-05, "loss": 0.012, "step": 75570 }, { "epoch": 1.86, "learning_rate": 1.632656076837828e-05, "loss": 0.0111, "step": 75580 }, { "epoch": 1.86, "learning_rate": 1.632606821819973e-05, "loss": 0.0123, "step": 75590 }, { "epoch": 1.86, "learning_rate": 1.632557566802118e-05, "loss": 0.0106, "step": 75600 }, { "epoch": 1.86, "learning_rate": 1.632508311784263e-05, "loss": 0.0096, "step": 75610 }, { "epoch": 1.86, "learning_rate": 1.6324590567664082e-05, "loss": 0.0158, "step": 75620 }, { "epoch": 1.86, "learning_rate": 1.632409801748553e-05, "loss": 0.0105, "step": 75630 }, { "epoch": 1.86, "learning_rate": 1.6323605467306984e-05, "loss": 0.0104, "step": 75640 }, { "epoch": 1.86, "learning_rate": 1.6323112917128433e-05, "loss": 0.0116, "step": 75650 }, { "epoch": 1.86, "learning_rate": 1.6322620366949885e-05, "loss": 0.014, "step": 75660 }, { "epoch": 1.86, "learning_rate": 1.6322127816771335e-05, "loss": 0.0164, "step": 75670 }, { "epoch": 1.86, "learning_rate": 1.6321635266592787e-05, "loss": 0.0118, "step": 75680 }, { "epoch": 1.86, "learning_rate": 1.6321142716414236e-05, "loss": 0.0113, "step": 75690 }, { "epoch": 1.86, "learning_rate": 1.632065016623569e-05, "loss": 0.0095, "step": 75700 }, { "epoch": 1.86, "learning_rate": 1.6320157616057138e-05, "loss": 0.0119, "step": 75710 }, { "epoch": 1.86, "learning_rate": 1.6319665065878587e-05, "loss": 0.0106, "step": 75720 }, { "epoch": 1.86, "learning_rate": 1.631917251570004e-05, "loss": 0.0156, "step": 75730 }, { "epoch": 1.86, "learning_rate": 1.631867996552149e-05, "loss": 0.0109, "step": 75740 }, { "epoch": 1.86, "learning_rate": 1.6318187415342938e-05, "loss": 0.0181, "step": 75750 }, { "epoch": 1.86, "learning_rate": 1.631769486516439e-05, "loss": 0.011, "step": 75760 }, { "epoch": 1.86, "learning_rate": 1.631720231498584e-05, "loss": 0.0114, "step": 75770 }, { "epoch": 1.86, "learning_rate": 1.631670976480729e-05, "loss": 0.0185, "step": 75780 }, { "epoch": 1.86, "learning_rate": 1.631621721462874e-05, "loss": 0.0122, "step": 75790 }, { "epoch": 1.86, "learning_rate": 1.631572466445019e-05, "loss": 0.0126, "step": 75800 }, { "epoch": 1.86, "learning_rate": 1.6315232114271643e-05, "loss": 0.0129, "step": 75810 }, { "epoch": 1.86, "learning_rate": 1.6314739564093095e-05, "loss": 0.0191, "step": 75820 }, { "epoch": 1.86, "learning_rate": 1.6314247013914545e-05, "loss": 0.0107, "step": 75830 }, { "epoch": 1.86, "learning_rate": 1.6313754463735994e-05, "loss": 0.0221, "step": 75840 }, { "epoch": 1.86, "learning_rate": 1.6313261913557446e-05, "loss": 0.0136, "step": 75850 }, { "epoch": 1.86, "learning_rate": 1.6312769363378895e-05, "loss": 0.0197, "step": 75860 }, { "epoch": 1.86, "learning_rate": 1.6312276813200345e-05, "loss": 0.0103, "step": 75870 }, { "epoch": 1.86, "learning_rate": 1.6311784263021797e-05, "loss": 0.0185, "step": 75880 }, { "epoch": 1.86, "learning_rate": 1.6311291712843246e-05, "loss": 0.0112, "step": 75890 }, { "epoch": 1.86, "learning_rate": 1.63107991626647e-05, "loss": 0.0176, "step": 75900 }, { "epoch": 1.86, "learning_rate": 1.6310306612486148e-05, "loss": 0.0184, "step": 75910 }, { "epoch": 1.87, "learning_rate": 1.6309814062307597e-05, "loss": 0.0109, "step": 75920 }, { "epoch": 1.87, "learning_rate": 1.630932151212905e-05, "loss": 0.0104, "step": 75930 }, { "epoch": 1.87, "learning_rate": 1.63088289619505e-05, "loss": 0.0192, "step": 75940 }, { "epoch": 1.87, "learning_rate": 1.630833641177195e-05, "loss": 0.01, "step": 75950 }, { "epoch": 1.87, "learning_rate": 1.63078438615934e-05, "loss": 0.0111, "step": 75960 }, { "epoch": 1.87, "learning_rate": 1.6307351311414853e-05, "loss": 0.0188, "step": 75970 }, { "epoch": 1.87, "learning_rate": 1.6306858761236302e-05, "loss": 0.0136, "step": 75980 }, { "epoch": 1.87, "learning_rate": 1.6306366211057755e-05, "loss": 0.0125, "step": 75990 }, { "epoch": 1.87, "learning_rate": 1.6305873660879204e-05, "loss": 0.0122, "step": 76000 }, { "epoch": 1.87, "learning_rate": 1.6305381110700653e-05, "loss": 0.0114, "step": 76010 }, { "epoch": 1.87, "learning_rate": 1.6304888560522106e-05, "loss": 0.0106, "step": 76020 }, { "epoch": 1.87, "learning_rate": 1.6304396010343555e-05, "loss": 0.0118, "step": 76030 }, { "epoch": 1.87, "learning_rate": 1.6303903460165004e-05, "loss": 0.018, "step": 76040 }, { "epoch": 1.87, "learning_rate": 1.6303410909986456e-05, "loss": 0.0161, "step": 76050 }, { "epoch": 1.87, "learning_rate": 1.6302918359807906e-05, "loss": 0.0133, "step": 76060 }, { "epoch": 1.87, "learning_rate": 1.6302425809629358e-05, "loss": 0.0113, "step": 76070 }, { "epoch": 1.87, "learning_rate": 1.6301933259450807e-05, "loss": 0.01, "step": 76080 }, { "epoch": 1.87, "learning_rate": 1.630144070927226e-05, "loss": 0.0127, "step": 76090 }, { "epoch": 1.87, "learning_rate": 1.630094815909371e-05, "loss": 0.0189, "step": 76100 }, { "epoch": 1.87, "learning_rate": 1.630045560891516e-05, "loss": 0.0133, "step": 76110 }, { "epoch": 1.87, "learning_rate": 1.629996305873661e-05, "loss": 0.0121, "step": 76120 }, { "epoch": 1.87, "learning_rate": 1.6299470508558063e-05, "loss": 0.0114, "step": 76130 }, { "epoch": 1.87, "learning_rate": 1.6298977958379512e-05, "loss": 0.0143, "step": 76140 }, { "epoch": 1.87, "learning_rate": 1.629848540820096e-05, "loss": 0.0128, "step": 76150 }, { "epoch": 1.87, "learning_rate": 1.6297992858022414e-05, "loss": 0.016, "step": 76160 }, { "epoch": 1.87, "learning_rate": 1.6297500307843863e-05, "loss": 0.0118, "step": 76170 }, { "epoch": 1.87, "learning_rate": 1.6297007757665312e-05, "loss": 0.012, "step": 76180 }, { "epoch": 1.87, "learning_rate": 1.6296515207486765e-05, "loss": 0.0183, "step": 76190 }, { "epoch": 1.87, "learning_rate": 1.6296022657308214e-05, "loss": 0.0108, "step": 76200 }, { "epoch": 1.87, "learning_rate": 1.6295530107129663e-05, "loss": 0.0115, "step": 76210 }, { "epoch": 1.87, "learning_rate": 1.6295037556951116e-05, "loss": 0.0137, "step": 76220 }, { "epoch": 1.87, "learning_rate": 1.6294545006772565e-05, "loss": 0.0108, "step": 76230 }, { "epoch": 1.87, "learning_rate": 1.6294052456594017e-05, "loss": 0.0156, "step": 76240 }, { "epoch": 1.87, "learning_rate": 1.6293559906415466e-05, "loss": 0.0134, "step": 76250 }, { "epoch": 1.87, "learning_rate": 1.629306735623692e-05, "loss": 0.0116, "step": 76260 }, { "epoch": 1.87, "learning_rate": 1.6292574806058368e-05, "loss": 0.0169, "step": 76270 }, { "epoch": 1.87, "learning_rate": 1.629208225587982e-05, "loss": 0.0106, "step": 76280 }, { "epoch": 1.87, "learning_rate": 1.629158970570127e-05, "loss": 0.0173, "step": 76290 }, { "epoch": 1.87, "learning_rate": 1.629109715552272e-05, "loss": 0.0105, "step": 76300 }, { "epoch": 1.87, "learning_rate": 1.629060460534417e-05, "loss": 0.0125, "step": 76310 }, { "epoch": 1.87, "learning_rate": 1.629011205516562e-05, "loss": 0.0097, "step": 76320 }, { "epoch": 1.88, "learning_rate": 1.628961950498707e-05, "loss": 0.0119, "step": 76330 }, { "epoch": 1.88, "learning_rate": 1.6289126954808522e-05, "loss": 0.0177, "step": 76340 }, { "epoch": 1.88, "learning_rate": 1.628863440462997e-05, "loss": 0.0226, "step": 76350 }, { "epoch": 1.88, "learning_rate": 1.6288141854451424e-05, "loss": 0.0104, "step": 76360 }, { "epoch": 1.88, "learning_rate": 1.6287649304272873e-05, "loss": 0.0105, "step": 76370 }, { "epoch": 1.88, "learning_rate": 1.6287156754094326e-05, "loss": 0.0099, "step": 76380 }, { "epoch": 1.88, "learning_rate": 1.6286664203915775e-05, "loss": 0.0205, "step": 76390 }, { "epoch": 1.88, "learning_rate": 1.6286171653737227e-05, "loss": 0.012, "step": 76400 }, { "epoch": 1.88, "learning_rate": 1.6285679103558677e-05, "loss": 0.0103, "step": 76410 }, { "epoch": 1.88, "learning_rate": 1.628518655338013e-05, "loss": 0.0118, "step": 76420 }, { "epoch": 1.88, "learning_rate": 1.6284694003201578e-05, "loss": 0.0114, "step": 76430 }, { "epoch": 1.88, "learning_rate": 1.6284201453023027e-05, "loss": 0.0201, "step": 76440 }, { "epoch": 1.88, "learning_rate": 1.628370890284448e-05, "loss": 0.0117, "step": 76450 }, { "epoch": 1.88, "learning_rate": 1.628321635266593e-05, "loss": 0.0194, "step": 76460 }, { "epoch": 1.88, "learning_rate": 1.6282723802487378e-05, "loss": 0.0099, "step": 76470 }, { "epoch": 1.88, "learning_rate": 1.628223125230883e-05, "loss": 0.0162, "step": 76480 }, { "epoch": 1.88, "learning_rate": 1.628173870213028e-05, "loss": 0.0127, "step": 76490 }, { "epoch": 1.88, "learning_rate": 1.628124615195173e-05, "loss": 0.0107, "step": 76500 }, { "epoch": 1.88, "learning_rate": 1.628075360177318e-05, "loss": 0.0109, "step": 76510 }, { "epoch": 1.88, "learning_rate": 1.628026105159463e-05, "loss": 0.0114, "step": 76520 }, { "epoch": 1.88, "learning_rate": 1.6279768501416083e-05, "loss": 0.0096, "step": 76530 }, { "epoch": 1.88, "learning_rate": 1.6279275951237536e-05, "loss": 0.0119, "step": 76540 }, { "epoch": 1.88, "learning_rate": 1.6278783401058985e-05, "loss": 0.0205, "step": 76550 }, { "epoch": 1.88, "learning_rate": 1.6278290850880434e-05, "loss": 0.0125, "step": 76560 }, { "epoch": 1.88, "learning_rate": 1.6277798300701887e-05, "loss": 0.0108, "step": 76570 }, { "epoch": 1.88, "learning_rate": 1.6277305750523336e-05, "loss": 0.03, "step": 76580 }, { "epoch": 1.88, "learning_rate": 1.6276813200344785e-05, "loss": 0.0119, "step": 76590 }, { "epoch": 1.88, "learning_rate": 1.6276320650166237e-05, "loss": 0.0187, "step": 76600 }, { "epoch": 1.88, "learning_rate": 1.6275828099987687e-05, "loss": 0.0121, "step": 76610 }, { "epoch": 1.88, "learning_rate": 1.627533554980914e-05, "loss": 0.0121, "step": 76620 }, { "epoch": 1.88, "learning_rate": 1.627484299963059e-05, "loss": 0.0114, "step": 76630 }, { "epoch": 1.88, "learning_rate": 1.6274350449452037e-05, "loss": 0.0206, "step": 76640 }, { "epoch": 1.88, "learning_rate": 1.627385789927349e-05, "loss": 0.0189, "step": 76650 }, { "epoch": 1.88, "learning_rate": 1.627336534909494e-05, "loss": 0.0173, "step": 76660 }, { "epoch": 1.88, "learning_rate": 1.6272872798916392e-05, "loss": 0.0106, "step": 76670 }, { "epoch": 1.88, "learning_rate": 1.627238024873784e-05, "loss": 0.0123, "step": 76680 }, { "epoch": 1.88, "learning_rate": 1.6271887698559293e-05, "loss": 0.0127, "step": 76690 }, { "epoch": 1.88, "learning_rate": 1.6271395148380743e-05, "loss": 0.0128, "step": 76700 }, { "epoch": 1.88, "learning_rate": 1.6270902598202195e-05, "loss": 0.0109, "step": 76710 }, { "epoch": 1.88, "learning_rate": 1.6270410048023644e-05, "loss": 0.0276, "step": 76720 }, { "epoch": 1.89, "learning_rate": 1.6269917497845093e-05, "loss": 0.011, "step": 76730 }, { "epoch": 1.89, "learning_rate": 1.6269424947666546e-05, "loss": 0.0122, "step": 76740 }, { "epoch": 1.89, "learning_rate": 1.6268932397487995e-05, "loss": 0.0095, "step": 76750 }, { "epoch": 1.89, "learning_rate": 1.6268439847309444e-05, "loss": 0.0127, "step": 76760 }, { "epoch": 1.89, "learning_rate": 1.6267947297130897e-05, "loss": 0.0118, "step": 76770 }, { "epoch": 1.89, "learning_rate": 1.6267454746952346e-05, "loss": 0.0119, "step": 76780 }, { "epoch": 1.89, "learning_rate": 1.62669621967738e-05, "loss": 0.0092, "step": 76790 }, { "epoch": 1.89, "learning_rate": 1.6266469646595248e-05, "loss": 0.0154, "step": 76800 }, { "epoch": 1.89, "learning_rate": 1.62659770964167e-05, "loss": 0.0121, "step": 76810 }, { "epoch": 1.89, "learning_rate": 1.626548454623815e-05, "loss": 0.0112, "step": 76820 }, { "epoch": 1.89, "learning_rate": 1.6264991996059602e-05, "loss": 0.0101, "step": 76830 }, { "epoch": 1.89, "learning_rate": 1.626449944588105e-05, "loss": 0.0174, "step": 76840 }, { "epoch": 1.89, "learning_rate": 1.6264006895702503e-05, "loss": 0.0122, "step": 76850 }, { "epoch": 1.89, "learning_rate": 1.6263514345523953e-05, "loss": 0.0236, "step": 76860 }, { "epoch": 1.89, "learning_rate": 1.6263021795345402e-05, "loss": 0.0123, "step": 76870 }, { "epoch": 1.89, "learning_rate": 1.6262529245166854e-05, "loss": 0.0105, "step": 76880 }, { "epoch": 1.89, "learning_rate": 1.6262036694988303e-05, "loss": 0.0096, "step": 76890 }, { "epoch": 1.89, "learning_rate": 1.6261544144809753e-05, "loss": 0.01, "step": 76900 }, { "epoch": 1.89, "learning_rate": 1.6261051594631205e-05, "loss": 0.012, "step": 76910 }, { "epoch": 1.89, "learning_rate": 1.6260559044452654e-05, "loss": 0.0111, "step": 76920 }, { "epoch": 1.89, "learning_rate": 1.6260066494274103e-05, "loss": 0.0128, "step": 76930 }, { "epoch": 1.89, "learning_rate": 1.6259573944095556e-05, "loss": 0.0106, "step": 76940 }, { "epoch": 1.89, "learning_rate": 1.6259081393917005e-05, "loss": 0.0123, "step": 76950 }, { "epoch": 1.89, "learning_rate": 1.6258588843738458e-05, "loss": 0.0099, "step": 76960 }, { "epoch": 1.89, "learning_rate": 1.6258096293559907e-05, "loss": 0.0117, "step": 76970 }, { "epoch": 1.89, "learning_rate": 1.625760374338136e-05, "loss": 0.0117, "step": 76980 }, { "epoch": 1.89, "learning_rate": 1.625711119320281e-05, "loss": 0.0093, "step": 76990 }, { "epoch": 1.89, "learning_rate": 1.625661864302426e-05, "loss": 0.0116, "step": 77000 }, { "epoch": 1.89, "learning_rate": 1.625612609284571e-05, "loss": 0.0102, "step": 77010 }, { "epoch": 1.89, "learning_rate": 1.625563354266716e-05, "loss": 0.0118, "step": 77020 }, { "epoch": 1.89, "learning_rate": 1.6255140992488612e-05, "loss": 0.0091, "step": 77030 }, { "epoch": 1.89, "learning_rate": 1.625464844231006e-05, "loss": 0.0111, "step": 77040 }, { "epoch": 1.89, "learning_rate": 1.625415589213151e-05, "loss": 0.012, "step": 77050 }, { "epoch": 1.89, "learning_rate": 1.6253663341952963e-05, "loss": 0.0122, "step": 77060 }, { "epoch": 1.89, "learning_rate": 1.6253170791774412e-05, "loss": 0.0101, "step": 77070 }, { "epoch": 1.89, "learning_rate": 1.6252678241595864e-05, "loss": 0.0126, "step": 77080 }, { "epoch": 1.89, "learning_rate": 1.6252185691417314e-05, "loss": 0.0097, "step": 77090 }, { "epoch": 1.89, "learning_rate": 1.6251693141238766e-05, "loss": 0.0128, "step": 77100 }, { "epoch": 1.89, "learning_rate": 1.6251200591060215e-05, "loss": 0.0218, "step": 77110 }, { "epoch": 1.89, "learning_rate": 1.6250708040881668e-05, "loss": 0.0202, "step": 77120 }, { "epoch": 1.89, "learning_rate": 1.6250215490703117e-05, "loss": 0.011, "step": 77130 }, { "epoch": 1.9, "learning_rate": 1.624972294052457e-05, "loss": 0.0092, "step": 77140 }, { "epoch": 1.9, "learning_rate": 1.624923039034602e-05, "loss": 0.015, "step": 77150 }, { "epoch": 1.9, "learning_rate": 1.6248737840167468e-05, "loss": 0.0113, "step": 77160 }, { "epoch": 1.9, "learning_rate": 1.624824528998892e-05, "loss": 0.011, "step": 77170 }, { "epoch": 1.9, "learning_rate": 1.624775273981037e-05, "loss": 0.0151, "step": 77180 }, { "epoch": 1.9, "learning_rate": 1.624726018963182e-05, "loss": 0.0106, "step": 77190 }, { "epoch": 1.9, "learning_rate": 1.624676763945327e-05, "loss": 0.0103, "step": 77200 }, { "epoch": 1.9, "learning_rate": 1.624627508927472e-05, "loss": 0.0152, "step": 77210 }, { "epoch": 1.9, "learning_rate": 1.624578253909617e-05, "loss": 0.0215, "step": 77220 }, { "epoch": 1.9, "learning_rate": 1.6245289988917622e-05, "loss": 0.0158, "step": 77230 }, { "epoch": 1.9, "learning_rate": 1.624479743873907e-05, "loss": 0.0129, "step": 77240 }, { "epoch": 1.9, "learning_rate": 1.6244304888560524e-05, "loss": 0.0102, "step": 77250 }, { "epoch": 1.9, "learning_rate": 1.6243812338381976e-05, "loss": 0.0117, "step": 77260 }, { "epoch": 1.9, "learning_rate": 1.6243319788203425e-05, "loss": 0.012, "step": 77270 }, { "epoch": 1.9, "learning_rate": 1.6242827238024874e-05, "loss": 0.0104, "step": 77280 }, { "epoch": 1.9, "learning_rate": 1.6242334687846327e-05, "loss": 0.0107, "step": 77290 }, { "epoch": 1.9, "learning_rate": 1.6241842137667776e-05, "loss": 0.0108, "step": 77300 }, { "epoch": 1.9, "learning_rate": 1.6241349587489225e-05, "loss": 0.0105, "step": 77310 }, { "epoch": 1.9, "learning_rate": 1.6240857037310678e-05, "loss": 0.0114, "step": 77320 }, { "epoch": 1.9, "learning_rate": 1.6240364487132127e-05, "loss": 0.0176, "step": 77330 }, { "epoch": 1.9, "learning_rate": 1.623987193695358e-05, "loss": 0.0104, "step": 77340 }, { "epoch": 1.9, "learning_rate": 1.623937938677503e-05, "loss": 0.0115, "step": 77350 }, { "epoch": 1.9, "learning_rate": 1.6238886836596478e-05, "loss": 0.0116, "step": 77360 }, { "epoch": 1.9, "learning_rate": 1.623839428641793e-05, "loss": 0.0196, "step": 77370 }, { "epoch": 1.9, "learning_rate": 1.623790173623938e-05, "loss": 0.0116, "step": 77380 }, { "epoch": 1.9, "learning_rate": 1.6237409186060832e-05, "loss": 0.0199, "step": 77390 }, { "epoch": 1.9, "learning_rate": 1.623691663588228e-05, "loss": 0.0163, "step": 77400 }, { "epoch": 1.9, "learning_rate": 1.6236424085703734e-05, "loss": 0.0113, "step": 77410 }, { "epoch": 1.9, "learning_rate": 1.6235931535525183e-05, "loss": 0.0108, "step": 77420 }, { "epoch": 1.9, "learning_rate": 1.6235438985346635e-05, "loss": 0.0161, "step": 77430 }, { "epoch": 1.9, "learning_rate": 1.6234946435168085e-05, "loss": 0.012, "step": 77440 }, { "epoch": 1.9, "learning_rate": 1.6234453884989534e-05, "loss": 0.0103, "step": 77450 }, { "epoch": 1.9, "learning_rate": 1.6233961334810986e-05, "loss": 0.0135, "step": 77460 }, { "epoch": 1.9, "learning_rate": 1.6233468784632435e-05, "loss": 0.0105, "step": 77470 }, { "epoch": 1.9, "learning_rate": 1.6232976234453885e-05, "loss": 0.0104, "step": 77480 }, { "epoch": 1.9, "learning_rate": 1.6232483684275337e-05, "loss": 0.0111, "step": 77490 }, { "epoch": 1.9, "learning_rate": 1.6231991134096786e-05, "loss": 0.0115, "step": 77500 }, { "epoch": 1.9, "learning_rate": 1.623149858391824e-05, "loss": 0.0299, "step": 77510 }, { "epoch": 1.9, "learning_rate": 1.6231006033739688e-05, "loss": 0.0109, "step": 77520 }, { "epoch": 1.9, "learning_rate": 1.623051348356114e-05, "loss": 0.0254, "step": 77530 }, { "epoch": 1.9, "learning_rate": 1.623002093338259e-05, "loss": 0.0091, "step": 77540 }, { "epoch": 1.91, "learning_rate": 1.6229528383204042e-05, "loss": 0.012, "step": 77550 }, { "epoch": 1.91, "learning_rate": 1.622903583302549e-05, "loss": 0.0117, "step": 77560 }, { "epoch": 1.91, "learning_rate": 1.622854328284694e-05, "loss": 0.0111, "step": 77570 }, { "epoch": 1.91, "learning_rate": 1.6228050732668393e-05, "loss": 0.0113, "step": 77580 }, { "epoch": 1.91, "learning_rate": 1.6227558182489842e-05, "loss": 0.0173, "step": 77590 }, { "epoch": 1.91, "learning_rate": 1.6227065632311295e-05, "loss": 0.0111, "step": 77600 }, { "epoch": 1.91, "learning_rate": 1.6226573082132744e-05, "loss": 0.0112, "step": 77610 }, { "epoch": 1.91, "learning_rate": 1.6226080531954193e-05, "loss": 0.0119, "step": 77620 }, { "epoch": 1.91, "learning_rate": 1.6225587981775645e-05, "loss": 0.0134, "step": 77630 }, { "epoch": 1.91, "learning_rate": 1.6225095431597095e-05, "loss": 0.012, "step": 77640 }, { "epoch": 1.91, "learning_rate": 1.6224602881418544e-05, "loss": 0.0109, "step": 77650 }, { "epoch": 1.91, "learning_rate": 1.6224110331239996e-05, "loss": 0.012, "step": 77660 }, { "epoch": 1.91, "learning_rate": 1.6223617781061445e-05, "loss": 0.0124, "step": 77670 }, { "epoch": 1.91, "learning_rate": 1.6223125230882898e-05, "loss": 0.0121, "step": 77680 }, { "epoch": 1.91, "learning_rate": 1.6222632680704347e-05, "loss": 0.0117, "step": 77690 }, { "epoch": 1.91, "learning_rate": 1.62221401305258e-05, "loss": 0.0111, "step": 77700 }, { "epoch": 1.91, "learning_rate": 1.622164758034725e-05, "loss": 0.0112, "step": 77710 }, { "epoch": 1.91, "learning_rate": 1.62211550301687e-05, "loss": 0.0112, "step": 77720 }, { "epoch": 1.91, "learning_rate": 1.622066247999015e-05, "loss": 0.0177, "step": 77730 }, { "epoch": 1.91, "learning_rate": 1.62201699298116e-05, "loss": 0.011, "step": 77740 }, { "epoch": 1.91, "learning_rate": 1.6219677379633052e-05, "loss": 0.0168, "step": 77750 }, { "epoch": 1.91, "learning_rate": 1.62191848294545e-05, "loss": 0.0117, "step": 77760 }, { "epoch": 1.91, "learning_rate": 1.621869227927595e-05, "loss": 0.0111, "step": 77770 }, { "epoch": 1.91, "learning_rate": 1.6218199729097403e-05, "loss": 0.0093, "step": 77780 }, { "epoch": 1.91, "learning_rate": 1.6217707178918852e-05, "loss": 0.0116, "step": 77790 }, { "epoch": 1.91, "learning_rate": 1.6217214628740305e-05, "loss": 0.0163, "step": 77800 }, { "epoch": 1.91, "learning_rate": 1.6216722078561754e-05, "loss": 0.0105, "step": 77810 }, { "epoch": 1.91, "learning_rate": 1.6216229528383206e-05, "loss": 0.0109, "step": 77820 }, { "epoch": 1.91, "learning_rate": 1.6215736978204656e-05, "loss": 0.0109, "step": 77830 }, { "epoch": 1.91, "learning_rate": 1.6215244428026108e-05, "loss": 0.0095, "step": 77840 }, { "epoch": 1.91, "learning_rate": 1.6214751877847557e-05, "loss": 0.0207, "step": 77850 }, { "epoch": 1.91, "learning_rate": 1.621425932766901e-05, "loss": 0.0116, "step": 77860 }, { "epoch": 1.91, "learning_rate": 1.621376677749046e-05, "loss": 0.0105, "step": 77870 }, { "epoch": 1.91, "learning_rate": 1.6213274227311908e-05, "loss": 0.022, "step": 77880 }, { "epoch": 1.91, "learning_rate": 1.621278167713336e-05, "loss": 0.0187, "step": 77890 }, { "epoch": 1.91, "learning_rate": 1.621228912695481e-05, "loss": 0.0119, "step": 77900 }, { "epoch": 1.91, "learning_rate": 1.621179657677626e-05, "loss": 0.0161, "step": 77910 }, { "epoch": 1.91, "learning_rate": 1.621130402659771e-05, "loss": 0.0113, "step": 77920 }, { "epoch": 1.91, "learning_rate": 1.621081147641916e-05, "loss": 0.0086, "step": 77930 }, { "epoch": 1.91, "learning_rate": 1.621031892624061e-05, "loss": 0.01, "step": 77940 }, { "epoch": 1.91, "learning_rate": 1.6209826376062062e-05, "loss": 0.0117, "step": 77950 }, { "epoch": 1.92, "learning_rate": 1.620933382588351e-05, "loss": 0.0159, "step": 77960 }, { "epoch": 1.92, "learning_rate": 1.6208841275704964e-05, "loss": 0.0104, "step": 77970 }, { "epoch": 1.92, "learning_rate": 1.6208348725526417e-05, "loss": 0.0101, "step": 77980 }, { "epoch": 1.92, "learning_rate": 1.6207856175347866e-05, "loss": 0.0105, "step": 77990 }, { "epoch": 1.92, "learning_rate": 1.6207363625169315e-05, "loss": 0.0127, "step": 78000 }, { "epoch": 1.92, "learning_rate": 1.6206871074990767e-05, "loss": 0.0109, "step": 78010 }, { "epoch": 1.92, "learning_rate": 1.6206378524812216e-05, "loss": 0.0174, "step": 78020 }, { "epoch": 1.92, "learning_rate": 1.6205885974633666e-05, "loss": 0.018, "step": 78030 }, { "epoch": 1.92, "learning_rate": 1.6205393424455118e-05, "loss": 0.009, "step": 78040 }, { "epoch": 1.92, "learning_rate": 1.6204900874276567e-05, "loss": 0.0123, "step": 78050 }, { "epoch": 1.92, "learning_rate": 1.620440832409802e-05, "loss": 0.0144, "step": 78060 }, { "epoch": 1.92, "learning_rate": 1.620391577391947e-05, "loss": 0.0122, "step": 78070 }, { "epoch": 1.92, "learning_rate": 1.6203423223740918e-05, "loss": 0.0116, "step": 78080 }, { "epoch": 1.92, "learning_rate": 1.620293067356237e-05, "loss": 0.0122, "step": 78090 }, { "epoch": 1.92, "learning_rate": 1.620243812338382e-05, "loss": 0.0166, "step": 78100 }, { "epoch": 1.92, "learning_rate": 1.6201945573205272e-05, "loss": 0.012, "step": 78110 }, { "epoch": 1.92, "learning_rate": 1.620145302302672e-05, "loss": 0.012, "step": 78120 }, { "epoch": 1.92, "learning_rate": 1.6200960472848174e-05, "loss": 0.0113, "step": 78130 }, { "epoch": 1.92, "learning_rate": 1.6200467922669623e-05, "loss": 0.0187, "step": 78140 }, { "epoch": 1.92, "learning_rate": 1.6199975372491076e-05, "loss": 0.0108, "step": 78150 }, { "epoch": 1.92, "learning_rate": 1.6199482822312525e-05, "loss": 0.0209, "step": 78160 }, { "epoch": 1.92, "learning_rate": 1.6198990272133974e-05, "loss": 0.0189, "step": 78170 }, { "epoch": 1.92, "learning_rate": 1.6198497721955427e-05, "loss": 0.0098, "step": 78180 }, { "epoch": 1.92, "learning_rate": 1.6198005171776876e-05, "loss": 0.0224, "step": 78190 }, { "epoch": 1.92, "learning_rate": 1.6197512621598325e-05, "loss": 0.0115, "step": 78200 }, { "epoch": 1.92, "learning_rate": 1.6197020071419777e-05, "loss": 0.0161, "step": 78210 }, { "epoch": 1.92, "learning_rate": 1.6196527521241227e-05, "loss": 0.0114, "step": 78220 }, { "epoch": 1.92, "learning_rate": 1.619603497106268e-05, "loss": 0.0115, "step": 78230 }, { "epoch": 1.92, "learning_rate": 1.6195542420884128e-05, "loss": 0.0116, "step": 78240 }, { "epoch": 1.92, "learning_rate": 1.619504987070558e-05, "loss": 0.0287, "step": 78250 }, { "epoch": 1.92, "learning_rate": 1.619455732052703e-05, "loss": 0.0164, "step": 78260 }, { "epoch": 1.92, "learning_rate": 1.6194064770348482e-05, "loss": 0.019, "step": 78270 }, { "epoch": 1.92, "learning_rate": 1.619357222016993e-05, "loss": 0.0166, "step": 78280 }, { "epoch": 1.92, "learning_rate": 1.619307966999138e-05, "loss": 0.011, "step": 78290 }, { "epoch": 1.92, "learning_rate": 1.6192587119812833e-05, "loss": 0.0105, "step": 78300 }, { "epoch": 1.92, "learning_rate": 1.6192094569634282e-05, "loss": 0.0109, "step": 78310 }, { "epoch": 1.92, "learning_rate": 1.6191602019455735e-05, "loss": 0.0108, "step": 78320 }, { "epoch": 1.92, "learning_rate": 1.6191109469277184e-05, "loss": 0.0101, "step": 78330 }, { "epoch": 1.92, "learning_rate": 1.6190616919098633e-05, "loss": 0.0114, "step": 78340 }, { "epoch": 1.92, "learning_rate": 1.6190124368920086e-05, "loss": 0.0155, "step": 78350 }, { "epoch": 1.93, "learning_rate": 1.6189631818741535e-05, "loss": 0.014, "step": 78360 }, { "epoch": 1.93, "learning_rate": 1.6189139268562984e-05, "loss": 0.0113, "step": 78370 }, { "epoch": 1.93, "learning_rate": 1.6188646718384437e-05, "loss": 0.0143, "step": 78380 }, { "epoch": 1.93, "learning_rate": 1.6188154168205886e-05, "loss": 0.0109, "step": 78390 }, { "epoch": 1.93, "learning_rate": 1.618766161802734e-05, "loss": 0.0114, "step": 78400 }, { "epoch": 1.93, "learning_rate": 1.6187169067848787e-05, "loss": 0.0108, "step": 78410 }, { "epoch": 1.93, "learning_rate": 1.618667651767024e-05, "loss": 0.0099, "step": 78420 }, { "epoch": 1.93, "learning_rate": 1.618618396749169e-05, "loss": 0.0116, "step": 78430 }, { "epoch": 1.93, "learning_rate": 1.6185691417313142e-05, "loss": 0.0095, "step": 78440 }, { "epoch": 1.93, "learning_rate": 1.618519886713459e-05, "loss": 0.0126, "step": 78450 }, { "epoch": 1.93, "learning_rate": 1.618470631695604e-05, "loss": 0.011, "step": 78460 }, { "epoch": 1.93, "learning_rate": 1.6184213766777493e-05, "loss": 0.013, "step": 78470 }, { "epoch": 1.93, "learning_rate": 1.6183721216598942e-05, "loss": 0.0104, "step": 78480 }, { "epoch": 1.93, "learning_rate": 1.618322866642039e-05, "loss": 0.0132, "step": 78490 }, { "epoch": 1.93, "learning_rate": 1.6182736116241843e-05, "loss": 0.0122, "step": 78500 }, { "epoch": 1.93, "learning_rate": 1.6182243566063293e-05, "loss": 0.017, "step": 78510 }, { "epoch": 1.93, "learning_rate": 1.6181751015884745e-05, "loss": 0.0112, "step": 78520 }, { "epoch": 1.93, "learning_rate": 1.6181258465706194e-05, "loss": 0.0103, "step": 78530 }, { "epoch": 1.93, "learning_rate": 1.6180765915527647e-05, "loss": 0.0125, "step": 78540 }, { "epoch": 1.93, "learning_rate": 1.6180273365349096e-05, "loss": 0.0103, "step": 78550 }, { "epoch": 1.93, "learning_rate": 1.617978081517055e-05, "loss": 0.013, "step": 78560 }, { "epoch": 1.93, "learning_rate": 1.6179288264991998e-05, "loss": 0.0153, "step": 78570 }, { "epoch": 1.93, "learning_rate": 1.617879571481345e-05, "loss": 0.0105, "step": 78580 }, { "epoch": 1.93, "learning_rate": 1.61783031646349e-05, "loss": 0.0154, "step": 78590 }, { "epoch": 1.93, "learning_rate": 1.617781061445635e-05, "loss": 0.0099, "step": 78600 }, { "epoch": 1.93, "learning_rate": 1.61773180642778e-05, "loss": 0.015, "step": 78610 }, { "epoch": 1.93, "learning_rate": 1.617682551409925e-05, "loss": 0.0115, "step": 78620 }, { "epoch": 1.93, "learning_rate": 1.61763329639207e-05, "loss": 0.017, "step": 78630 }, { "epoch": 1.93, "learning_rate": 1.6175840413742152e-05, "loss": 0.0222, "step": 78640 }, { "epoch": 1.93, "learning_rate": 1.61753478635636e-05, "loss": 0.0171, "step": 78650 }, { "epoch": 1.93, "learning_rate": 1.617485531338505e-05, "loss": 0.014, "step": 78660 }, { "epoch": 1.93, "learning_rate": 1.6174362763206503e-05, "loss": 0.0115, "step": 78670 }, { "epoch": 1.93, "learning_rate": 1.6173870213027952e-05, "loss": 0.0096, "step": 78680 }, { "epoch": 1.93, "learning_rate": 1.6173377662849404e-05, "loss": 0.0112, "step": 78690 }, { "epoch": 1.93, "learning_rate": 1.6172885112670857e-05, "loss": 0.0117, "step": 78700 }, { "epoch": 1.93, "learning_rate": 1.6172392562492306e-05, "loss": 0.0122, "step": 78710 }, { "epoch": 1.93, "learning_rate": 1.6171900012313755e-05, "loss": 0.0126, "step": 78720 }, { "epoch": 1.93, "learning_rate": 1.6171407462135208e-05, "loss": 0.0103, "step": 78730 }, { "epoch": 1.93, "learning_rate": 1.6170914911956657e-05, "loss": 0.0108, "step": 78740 }, { "epoch": 1.93, "learning_rate": 1.6170422361778106e-05, "loss": 0.0122, "step": 78750 }, { "epoch": 1.93, "learning_rate": 1.616992981159956e-05, "loss": 0.0148, "step": 78760 }, { "epoch": 1.94, "learning_rate": 1.6169437261421008e-05, "loss": 0.0174, "step": 78770 }, { "epoch": 1.94, "learning_rate": 1.616894471124246e-05, "loss": 0.013, "step": 78780 }, { "epoch": 1.94, "learning_rate": 1.616845216106391e-05, "loss": 0.0134, "step": 78790 }, { "epoch": 1.94, "learning_rate": 1.616795961088536e-05, "loss": 0.0188, "step": 78800 }, { "epoch": 1.94, "learning_rate": 1.616746706070681e-05, "loss": 0.0108, "step": 78810 }, { "epoch": 1.94, "learning_rate": 1.616697451052826e-05, "loss": 0.0094, "step": 78820 }, { "epoch": 1.94, "learning_rate": 1.6166481960349713e-05, "loss": 0.0118, "step": 78830 }, { "epoch": 1.94, "learning_rate": 1.6165989410171162e-05, "loss": 0.0117, "step": 78840 }, { "epoch": 1.94, "learning_rate": 1.6165496859992614e-05, "loss": 0.0113, "step": 78850 }, { "epoch": 1.94, "learning_rate": 1.6165004309814064e-05, "loss": 0.01, "step": 78860 }, { "epoch": 1.94, "learning_rate": 1.6164511759635516e-05, "loss": 0.0108, "step": 78870 }, { "epoch": 1.94, "learning_rate": 1.6164019209456965e-05, "loss": 0.0118, "step": 78880 }, { "epoch": 1.94, "learning_rate": 1.6163526659278414e-05, "loss": 0.0116, "step": 78890 }, { "epoch": 1.94, "learning_rate": 1.6163034109099867e-05, "loss": 0.0099, "step": 78900 }, { "epoch": 1.94, "learning_rate": 1.6162541558921316e-05, "loss": 0.0112, "step": 78910 }, { "epoch": 1.94, "learning_rate": 1.6162049008742765e-05, "loss": 0.0125, "step": 78920 }, { "epoch": 1.94, "learning_rate": 1.6161556458564218e-05, "loss": 0.0122, "step": 78930 }, { "epoch": 1.94, "learning_rate": 1.6161063908385667e-05, "loss": 0.013, "step": 78940 }, { "epoch": 1.94, "learning_rate": 1.616057135820712e-05, "loss": 0.0249, "step": 78950 }, { "epoch": 1.94, "learning_rate": 1.616007880802857e-05, "loss": 0.0126, "step": 78960 }, { "epoch": 1.94, "learning_rate": 1.615958625785002e-05, "loss": 0.01, "step": 78970 }, { "epoch": 1.94, "learning_rate": 1.615909370767147e-05, "loss": 0.0117, "step": 78980 }, { "epoch": 1.94, "learning_rate": 1.6158601157492923e-05, "loss": 0.0155, "step": 78990 }, { "epoch": 1.94, "learning_rate": 1.6158108607314372e-05, "loss": 0.0104, "step": 79000 }, { "epoch": 1.94, "learning_rate": 1.615761605713582e-05, "loss": 0.0129, "step": 79010 }, { "epoch": 1.94, "learning_rate": 1.6157123506957274e-05, "loss": 0.0132, "step": 79020 }, { "epoch": 1.94, "learning_rate": 1.6156630956778723e-05, "loss": 0.0116, "step": 79030 }, { "epoch": 1.94, "learning_rate": 1.6156138406600175e-05, "loss": 0.0103, "step": 79040 }, { "epoch": 1.94, "learning_rate": 1.6155645856421624e-05, "loss": 0.0117, "step": 79050 }, { "epoch": 1.94, "learning_rate": 1.6155153306243074e-05, "loss": 0.0082, "step": 79060 }, { "epoch": 1.94, "learning_rate": 1.6154660756064526e-05, "loss": 0.0103, "step": 79070 }, { "epoch": 1.94, "learning_rate": 1.6154168205885975e-05, "loss": 0.0107, "step": 79080 }, { "epoch": 1.94, "learning_rate": 1.6153675655707424e-05, "loss": 0.0109, "step": 79090 }, { "epoch": 1.94, "learning_rate": 1.6153183105528877e-05, "loss": 0.0108, "step": 79100 }, { "epoch": 1.94, "learning_rate": 1.6152690555350326e-05, "loss": 0.0142, "step": 79110 }, { "epoch": 1.94, "learning_rate": 1.615219800517178e-05, "loss": 0.012, "step": 79120 }, { "epoch": 1.94, "learning_rate": 1.6151705454993228e-05, "loss": 0.0108, "step": 79130 }, { "epoch": 1.94, "learning_rate": 1.615121290481468e-05, "loss": 0.0139, "step": 79140 }, { "epoch": 1.94, "learning_rate": 1.615072035463613e-05, "loss": 0.0107, "step": 79150 }, { "epoch": 1.94, "learning_rate": 1.6150227804457582e-05, "loss": 0.0122, "step": 79160 }, { "epoch": 1.94, "learning_rate": 1.614973525427903e-05, "loss": 0.0112, "step": 79170 }, { "epoch": 1.95, "learning_rate": 1.614924270410048e-05, "loss": 0.015, "step": 79180 }, { "epoch": 1.95, "learning_rate": 1.6148750153921933e-05, "loss": 0.0118, "step": 79190 }, { "epoch": 1.95, "learning_rate": 1.6148257603743382e-05, "loss": 0.0146, "step": 79200 }, { "epoch": 1.95, "learning_rate": 1.614776505356483e-05, "loss": 0.0115, "step": 79210 }, { "epoch": 1.95, "learning_rate": 1.6147272503386284e-05, "loss": 0.0154, "step": 79220 }, { "epoch": 1.95, "learning_rate": 1.6146779953207733e-05, "loss": 0.0095, "step": 79230 }, { "epoch": 1.95, "learning_rate": 1.6146287403029185e-05, "loss": 0.0117, "step": 79240 }, { "epoch": 1.95, "learning_rate": 1.6145794852850635e-05, "loss": 0.0108, "step": 79250 }, { "epoch": 1.95, "learning_rate": 1.6145302302672087e-05, "loss": 0.0119, "step": 79260 }, { "epoch": 1.95, "learning_rate": 1.6144809752493536e-05, "loss": 0.0204, "step": 79270 }, { "epoch": 1.95, "learning_rate": 1.614431720231499e-05, "loss": 0.0121, "step": 79280 }, { "epoch": 1.95, "learning_rate": 1.6143824652136438e-05, "loss": 0.0101, "step": 79290 }, { "epoch": 1.95, "learning_rate": 1.614333210195789e-05, "loss": 0.0114, "step": 79300 }, { "epoch": 1.95, "learning_rate": 1.614283955177934e-05, "loss": 0.0123, "step": 79310 }, { "epoch": 1.95, "learning_rate": 1.614234700160079e-05, "loss": 0.0103, "step": 79320 }, { "epoch": 1.95, "learning_rate": 1.614185445142224e-05, "loss": 0.0102, "step": 79330 }, { "epoch": 1.95, "learning_rate": 1.614136190124369e-05, "loss": 0.0122, "step": 79340 }, { "epoch": 1.95, "learning_rate": 1.614086935106514e-05, "loss": 0.011, "step": 79350 }, { "epoch": 1.95, "learning_rate": 1.6140376800886592e-05, "loss": 0.0102, "step": 79360 }, { "epoch": 1.95, "learning_rate": 1.613988425070804e-05, "loss": 0.0088, "step": 79370 }, { "epoch": 1.95, "learning_rate": 1.613939170052949e-05, "loss": 0.011, "step": 79380 }, { "epoch": 1.95, "learning_rate": 1.6138899150350943e-05, "loss": 0.0141, "step": 79390 }, { "epoch": 1.95, "learning_rate": 1.6138406600172392e-05, "loss": 0.0122, "step": 79400 }, { "epoch": 1.95, "learning_rate": 1.6137914049993845e-05, "loss": 0.0114, "step": 79410 }, { "epoch": 1.95, "learning_rate": 1.6137421499815297e-05, "loss": 0.0139, "step": 79420 }, { "epoch": 1.95, "learning_rate": 1.6136928949636746e-05, "loss": 0.013, "step": 79430 }, { "epoch": 1.95, "learning_rate": 1.6136436399458195e-05, "loss": 0.0142, "step": 79440 }, { "epoch": 1.95, "learning_rate": 1.6135943849279648e-05, "loss": 0.0122, "step": 79450 }, { "epoch": 1.95, "learning_rate": 1.6135451299101097e-05, "loss": 0.0098, "step": 79460 }, { "epoch": 1.95, "learning_rate": 1.6134958748922546e-05, "loss": 0.0135, "step": 79470 }, { "epoch": 1.95, "learning_rate": 1.6134466198744e-05, "loss": 0.0116, "step": 79480 }, { "epoch": 1.95, "learning_rate": 1.6133973648565448e-05, "loss": 0.0097, "step": 79490 }, { "epoch": 1.95, "learning_rate": 1.61334810983869e-05, "loss": 0.0112, "step": 79500 }, { "epoch": 1.95, "learning_rate": 1.613298854820835e-05, "loss": 0.0121, "step": 79510 }, { "epoch": 1.95, "learning_rate": 1.61324959980298e-05, "loss": 0.0134, "step": 79520 }, { "epoch": 1.95, "learning_rate": 1.613200344785125e-05, "loss": 0.0215, "step": 79530 }, { "epoch": 1.95, "learning_rate": 1.61315108976727e-05, "loss": 0.0095, "step": 79540 }, { "epoch": 1.95, "learning_rate": 1.6131018347494153e-05, "loss": 0.0091, "step": 79550 }, { "epoch": 1.95, "learning_rate": 1.6130525797315602e-05, "loss": 0.0133, "step": 79560 }, { "epoch": 1.95, "learning_rate": 1.6130033247137055e-05, "loss": 0.0159, "step": 79570 }, { "epoch": 1.96, "learning_rate": 1.6129540696958504e-05, "loss": 0.0158, "step": 79580 }, { "epoch": 1.96, "learning_rate": 1.6129048146779956e-05, "loss": 0.0171, "step": 79590 }, { "epoch": 1.96, "learning_rate": 1.6128555596601406e-05, "loss": 0.0144, "step": 79600 }, { "epoch": 1.96, "learning_rate": 1.6128063046422855e-05, "loss": 0.01, "step": 79610 }, { "epoch": 1.96, "learning_rate": 1.6127570496244307e-05, "loss": 0.0106, "step": 79620 }, { "epoch": 1.96, "learning_rate": 1.6127077946065756e-05, "loss": 0.0118, "step": 79630 }, { "epoch": 1.96, "learning_rate": 1.6126585395887206e-05, "loss": 0.0129, "step": 79640 }, { "epoch": 1.96, "learning_rate": 1.6126092845708658e-05, "loss": 0.0119, "step": 79650 }, { "epoch": 1.96, "learning_rate": 1.6125600295530107e-05, "loss": 0.0096, "step": 79660 }, { "epoch": 1.96, "learning_rate": 1.612510774535156e-05, "loss": 0.0108, "step": 79670 }, { "epoch": 1.96, "learning_rate": 1.612461519517301e-05, "loss": 0.0162, "step": 79680 }, { "epoch": 1.96, "learning_rate": 1.612412264499446e-05, "loss": 0.0099, "step": 79690 }, { "epoch": 1.96, "learning_rate": 1.612363009481591e-05, "loss": 0.0109, "step": 79700 }, { "epoch": 1.96, "learning_rate": 1.6123137544637363e-05, "loss": 0.0146, "step": 79710 }, { "epoch": 1.96, "learning_rate": 1.6122644994458812e-05, "loss": 0.0108, "step": 79720 }, { "epoch": 1.96, "learning_rate": 1.612215244428026e-05, "loss": 0.0227, "step": 79730 }, { "epoch": 1.96, "learning_rate": 1.6121659894101714e-05, "loss": 0.0098, "step": 79740 }, { "epoch": 1.96, "learning_rate": 1.6121167343923163e-05, "loss": 0.0161, "step": 79750 }, { "epoch": 1.96, "learning_rate": 1.6120674793744616e-05, "loss": 0.0099, "step": 79760 }, { "epoch": 1.96, "learning_rate": 1.6120182243566065e-05, "loss": 0.0216, "step": 79770 }, { "epoch": 1.96, "learning_rate": 1.6119689693387514e-05, "loss": 0.0103, "step": 79780 }, { "epoch": 1.96, "learning_rate": 1.6119197143208967e-05, "loss": 0.015, "step": 79790 }, { "epoch": 1.96, "learning_rate": 1.6118704593030416e-05, "loss": 0.0196, "step": 79800 }, { "epoch": 1.96, "learning_rate": 1.6118212042851865e-05, "loss": 0.0114, "step": 79810 }, { "epoch": 1.96, "learning_rate": 1.6117719492673317e-05, "loss": 0.0109, "step": 79820 }, { "epoch": 1.96, "learning_rate": 1.6117226942494766e-05, "loss": 0.0168, "step": 79830 }, { "epoch": 1.96, "learning_rate": 1.611673439231622e-05, "loss": 0.0114, "step": 79840 }, { "epoch": 1.96, "learning_rate": 1.6116241842137668e-05, "loss": 0.0094, "step": 79850 }, { "epoch": 1.96, "learning_rate": 1.611574929195912e-05, "loss": 0.0098, "step": 79860 }, { "epoch": 1.96, "learning_rate": 1.611525674178057e-05, "loss": 0.011, "step": 79870 }, { "epoch": 1.96, "learning_rate": 1.6114764191602022e-05, "loss": 0.0116, "step": 79880 }, { "epoch": 1.96, "learning_rate": 1.611427164142347e-05, "loss": 0.0124, "step": 79890 }, { "epoch": 1.96, "learning_rate": 1.611377909124492e-05, "loss": 0.0101, "step": 79900 }, { "epoch": 1.96, "learning_rate": 1.6113286541066373e-05, "loss": 0.0143, "step": 79910 }, { "epoch": 1.96, "learning_rate": 1.6112793990887822e-05, "loss": 0.0136, "step": 79920 }, { "epoch": 1.96, "learning_rate": 1.611230144070927e-05, "loss": 0.0133, "step": 79930 }, { "epoch": 1.96, "learning_rate": 1.6111808890530724e-05, "loss": 0.0115, "step": 79940 }, { "epoch": 1.96, "learning_rate": 1.6111316340352173e-05, "loss": 0.0122, "step": 79950 }, { "epoch": 1.96, "learning_rate": 1.6110823790173626e-05, "loss": 0.0182, "step": 79960 }, { "epoch": 1.96, "learning_rate": 1.6110331239995075e-05, "loss": 0.0082, "step": 79970 }, { "epoch": 1.96, "learning_rate": 1.6109838689816527e-05, "loss": 0.0189, "step": 79980 }, { "epoch": 1.97, "learning_rate": 1.6109346139637977e-05, "loss": 0.011, "step": 79990 }, { "epoch": 1.97, "learning_rate": 1.610885358945943e-05, "loss": 0.0159, "step": 80000 }, { "epoch": 1.97, "eval_bleu": 97.9608, "eval_gen_len_ratio": 1.002, "eval_loss": 0.009234806522727013, "eval_runtime": 254.192, "eval_samples_per_second": 7.341, "eval_steps_per_second": 0.232, "step": 80000 }, { "epoch": 1.97, "learning_rate": 1.6108361039280878e-05, "loss": 0.0112, "step": 80010 }, { "epoch": 1.97, "learning_rate": 1.610786848910233e-05, "loss": 0.0129, "step": 80020 }, { "epoch": 1.97, "learning_rate": 1.610737593892378e-05, "loss": 0.0106, "step": 80030 }, { "epoch": 1.97, "learning_rate": 1.610688338874523e-05, "loss": 0.0113, "step": 80040 }, { "epoch": 1.97, "learning_rate": 1.610639083856668e-05, "loss": 0.0105, "step": 80050 }, { "epoch": 1.97, "learning_rate": 1.610589828838813e-05, "loss": 0.0145, "step": 80060 }, { "epoch": 1.97, "learning_rate": 1.610540573820958e-05, "loss": 0.0102, "step": 80070 }, { "epoch": 1.97, "learning_rate": 1.6104913188031032e-05, "loss": 0.0101, "step": 80080 }, { "epoch": 1.97, "learning_rate": 1.610442063785248e-05, "loss": 0.0105, "step": 80090 }, { "epoch": 1.97, "learning_rate": 1.610392808767393e-05, "loss": 0.0122, "step": 80100 }, { "epoch": 1.97, "learning_rate": 1.6103435537495383e-05, "loss": 0.0104, "step": 80110 }, { "epoch": 1.97, "learning_rate": 1.6102942987316832e-05, "loss": 0.0131, "step": 80120 }, { "epoch": 1.97, "learning_rate": 1.6102450437138285e-05, "loss": 0.0134, "step": 80130 }, { "epoch": 1.97, "learning_rate": 1.6101957886959738e-05, "loss": 0.0173, "step": 80140 }, { "epoch": 1.97, "learning_rate": 1.6101465336781187e-05, "loss": 0.0097, "step": 80150 }, { "epoch": 1.97, "learning_rate": 1.6100972786602636e-05, "loss": 0.0134, "step": 80160 }, { "epoch": 1.97, "learning_rate": 1.610048023642409e-05, "loss": 0.0115, "step": 80170 }, { "epoch": 1.97, "learning_rate": 1.6099987686245538e-05, "loss": 0.0108, "step": 80180 }, { "epoch": 1.97, "learning_rate": 1.6099495136066987e-05, "loss": 0.0115, "step": 80190 }, { "epoch": 1.97, "learning_rate": 1.609900258588844e-05, "loss": 0.0186, "step": 80200 }, { "epoch": 1.97, "learning_rate": 1.609851003570989e-05, "loss": 0.0099, "step": 80210 }, { "epoch": 1.97, "learning_rate": 1.609801748553134e-05, "loss": 0.0105, "step": 80220 }, { "epoch": 1.97, "learning_rate": 1.609752493535279e-05, "loss": 0.0146, "step": 80230 }, { "epoch": 1.97, "learning_rate": 1.609703238517424e-05, "loss": 0.0113, "step": 80240 }, { "epoch": 1.97, "learning_rate": 1.6096539834995692e-05, "loss": 0.0218, "step": 80250 }, { "epoch": 1.97, "learning_rate": 1.609604728481714e-05, "loss": 0.011, "step": 80260 }, { "epoch": 1.97, "learning_rate": 1.6095554734638593e-05, "loss": 0.0111, "step": 80270 }, { "epoch": 1.97, "learning_rate": 1.6095062184460043e-05, "loss": 0.011, "step": 80280 }, { "epoch": 1.97, "learning_rate": 1.6094569634281495e-05, "loss": 0.0108, "step": 80290 }, { "epoch": 1.97, "learning_rate": 1.6094077084102944e-05, "loss": 0.0109, "step": 80300 }, { "epoch": 1.97, "learning_rate": 1.6093584533924397e-05, "loss": 0.0117, "step": 80310 }, { "epoch": 1.97, "learning_rate": 1.6093091983745846e-05, "loss": 0.0163, "step": 80320 }, { "epoch": 1.97, "learning_rate": 1.6092599433567295e-05, "loss": 0.0226, "step": 80330 }, { "epoch": 1.97, "learning_rate": 1.6092106883388748e-05, "loss": 0.012, "step": 80340 }, { "epoch": 1.97, "learning_rate": 1.6091614333210197e-05, "loss": 0.0117, "step": 80350 }, { "epoch": 1.97, "learning_rate": 1.6091121783031646e-05, "loss": 0.0108, "step": 80360 }, { "epoch": 1.97, "learning_rate": 1.60906292328531e-05, "loss": 0.0098, "step": 80370 }, { "epoch": 1.97, "learning_rate": 1.6090136682674548e-05, "loss": 0.0115, "step": 80380 }, { "epoch": 1.97, "learning_rate": 1.6089644132496e-05, "loss": 0.0158, "step": 80390 }, { "epoch": 1.98, "learning_rate": 1.608915158231745e-05, "loss": 0.0123, "step": 80400 }, { "epoch": 1.98, "learning_rate": 1.6088659032138902e-05, "loss": 0.0094, "step": 80410 }, { "epoch": 1.98, "learning_rate": 1.608816648196035e-05, "loss": 0.0191, "step": 80420 }, { "epoch": 1.98, "learning_rate": 1.6087673931781803e-05, "loss": 0.0164, "step": 80430 }, { "epoch": 1.98, "learning_rate": 1.6087181381603253e-05, "loss": 0.0116, "step": 80440 }, { "epoch": 1.98, "learning_rate": 1.6086688831424702e-05, "loss": 0.011, "step": 80450 }, { "epoch": 1.98, "learning_rate": 1.6086196281246154e-05, "loss": 0.0128, "step": 80460 }, { "epoch": 1.98, "learning_rate": 1.6085703731067603e-05, "loss": 0.0115, "step": 80470 }, { "epoch": 1.98, "learning_rate": 1.6085211180889056e-05, "loss": 0.0115, "step": 80480 }, { "epoch": 1.98, "learning_rate": 1.6084718630710505e-05, "loss": 0.0121, "step": 80490 }, { "epoch": 1.98, "learning_rate": 1.6084226080531954e-05, "loss": 0.0123, "step": 80500 }, { "epoch": 1.98, "learning_rate": 1.6083733530353407e-05, "loss": 0.0194, "step": 80510 }, { "epoch": 1.98, "learning_rate": 1.6083240980174856e-05, "loss": 0.0099, "step": 80520 }, { "epoch": 1.98, "learning_rate": 1.6082748429996305e-05, "loss": 0.0128, "step": 80530 }, { "epoch": 1.98, "learning_rate": 1.6082255879817758e-05, "loss": 0.0107, "step": 80540 }, { "epoch": 1.98, "learning_rate": 1.6081763329639207e-05, "loss": 0.0119, "step": 80550 }, { "epoch": 1.98, "learning_rate": 1.608127077946066e-05, "loss": 0.0108, "step": 80560 }, { "epoch": 1.98, "learning_rate": 1.608077822928211e-05, "loss": 0.0096, "step": 80570 }, { "epoch": 1.98, "learning_rate": 1.608028567910356e-05, "loss": 0.0115, "step": 80580 }, { "epoch": 1.98, "learning_rate": 1.607979312892501e-05, "loss": 0.0111, "step": 80590 }, { "epoch": 1.98, "learning_rate": 1.6079300578746463e-05, "loss": 0.0105, "step": 80600 }, { "epoch": 1.98, "learning_rate": 1.6078808028567912e-05, "loss": 0.0114, "step": 80610 }, { "epoch": 1.98, "learning_rate": 1.607831547838936e-05, "loss": 0.0164, "step": 80620 }, { "epoch": 1.98, "learning_rate": 1.6077822928210814e-05, "loss": 0.0179, "step": 80630 }, { "epoch": 1.98, "learning_rate": 1.6077330378032263e-05, "loss": 0.0157, "step": 80640 }, { "epoch": 1.98, "learning_rate": 1.6076837827853712e-05, "loss": 0.0121, "step": 80650 }, { "epoch": 1.98, "learning_rate": 1.6076345277675164e-05, "loss": 0.0123, "step": 80660 }, { "epoch": 1.98, "learning_rate": 1.6075852727496614e-05, "loss": 0.0088, "step": 80670 }, { "epoch": 1.98, "learning_rate": 1.6075360177318066e-05, "loss": 0.0104, "step": 80680 }, { "epoch": 1.98, "learning_rate": 1.6074867627139515e-05, "loss": 0.0105, "step": 80690 }, { "epoch": 1.98, "learning_rate": 1.6074375076960968e-05, "loss": 0.0097, "step": 80700 }, { "epoch": 1.98, "learning_rate": 1.6073882526782417e-05, "loss": 0.0134, "step": 80710 }, { "epoch": 1.98, "learning_rate": 1.607338997660387e-05, "loss": 0.0109, "step": 80720 }, { "epoch": 1.98, "learning_rate": 1.607289742642532e-05, "loss": 0.012, "step": 80730 }, { "epoch": 1.98, "learning_rate": 1.607240487624677e-05, "loss": 0.0116, "step": 80740 }, { "epoch": 1.98, "learning_rate": 1.607191232606822e-05, "loss": 0.0096, "step": 80750 }, { "epoch": 1.98, "learning_rate": 1.607141977588967e-05, "loss": 0.0111, "step": 80760 }, { "epoch": 1.98, "learning_rate": 1.6070927225711122e-05, "loss": 0.011, "step": 80770 }, { "epoch": 1.98, "learning_rate": 1.607043467553257e-05, "loss": 0.0106, "step": 80780 }, { "epoch": 1.98, "learning_rate": 1.606994212535402e-05, "loss": 0.0121, "step": 80790 }, { "epoch": 1.99, "learning_rate": 1.6069449575175473e-05, "loss": 0.0102, "step": 80800 }, { "epoch": 1.99, "learning_rate": 1.6068957024996922e-05, "loss": 0.0131, "step": 80810 }, { "epoch": 1.99, "learning_rate": 1.606846447481837e-05, "loss": 0.0167, "step": 80820 }, { "epoch": 1.99, "learning_rate": 1.6067971924639824e-05, "loss": 0.0117, "step": 80830 }, { "epoch": 1.99, "learning_rate": 1.6067479374461273e-05, "loss": 0.0187, "step": 80840 }, { "epoch": 1.99, "learning_rate": 1.6066986824282725e-05, "loss": 0.0213, "step": 80850 }, { "epoch": 1.99, "learning_rate": 1.6066494274104178e-05, "loss": 0.0112, "step": 80860 }, { "epoch": 1.99, "learning_rate": 1.6066001723925627e-05, "loss": 0.0126, "step": 80870 }, { "epoch": 1.99, "learning_rate": 1.6065509173747076e-05, "loss": 0.0099, "step": 80880 }, { "epoch": 1.99, "learning_rate": 1.606501662356853e-05, "loss": 0.0177, "step": 80890 }, { "epoch": 1.99, "learning_rate": 1.6064524073389978e-05, "loss": 0.0125, "step": 80900 }, { "epoch": 1.99, "learning_rate": 1.6064031523211427e-05, "loss": 0.0124, "step": 80910 }, { "epoch": 1.99, "learning_rate": 1.606353897303288e-05, "loss": 0.0122, "step": 80920 }, { "epoch": 1.99, "learning_rate": 1.606304642285433e-05, "loss": 0.0095, "step": 80930 }, { "epoch": 1.99, "learning_rate": 1.606255387267578e-05, "loss": 0.0103, "step": 80940 }, { "epoch": 1.99, "learning_rate": 1.606206132249723e-05, "loss": 0.0169, "step": 80950 }, { "epoch": 1.99, "learning_rate": 1.606156877231868e-05, "loss": 0.0098, "step": 80960 }, { "epoch": 1.99, "learning_rate": 1.6061076222140132e-05, "loss": 0.0192, "step": 80970 }, { "epoch": 1.99, "learning_rate": 1.606058367196158e-05, "loss": 0.0101, "step": 80980 }, { "epoch": 1.99, "learning_rate": 1.6060091121783034e-05, "loss": 0.0222, "step": 80990 }, { "epoch": 1.99, "learning_rate": 1.6059598571604483e-05, "loss": 0.0109, "step": 81000 }, { "epoch": 1.99, "learning_rate": 1.6059106021425935e-05, "loss": 0.0116, "step": 81010 }, { "epoch": 1.99, "learning_rate": 1.6058613471247385e-05, "loss": 0.0113, "step": 81020 }, { "epoch": 1.99, "learning_rate": 1.6058120921068837e-05, "loss": 0.0104, "step": 81030 }, { "epoch": 1.99, "learning_rate": 1.6057628370890286e-05, "loss": 0.0109, "step": 81040 }, { "epoch": 1.99, "learning_rate": 1.6057135820711735e-05, "loss": 0.0233, "step": 81050 }, { "epoch": 1.99, "learning_rate": 1.6056643270533188e-05, "loss": 0.0123, "step": 81060 }, { "epoch": 1.99, "learning_rate": 1.6056150720354637e-05, "loss": 0.0184, "step": 81070 }, { "epoch": 1.99, "learning_rate": 1.6055658170176086e-05, "loss": 0.0154, "step": 81080 }, { "epoch": 1.99, "learning_rate": 1.605516561999754e-05, "loss": 0.0117, "step": 81090 }, { "epoch": 1.99, "learning_rate": 1.6054673069818988e-05, "loss": 0.0167, "step": 81100 }, { "epoch": 1.99, "learning_rate": 1.605418051964044e-05, "loss": 0.0113, "step": 81110 }, { "epoch": 1.99, "learning_rate": 1.605368796946189e-05, "loss": 0.0094, "step": 81120 }, { "epoch": 1.99, "learning_rate": 1.6053195419283342e-05, "loss": 0.0103, "step": 81130 }, { "epoch": 1.99, "learning_rate": 1.605270286910479e-05, "loss": 0.0098, "step": 81140 }, { "epoch": 1.99, "learning_rate": 1.6052210318926244e-05, "loss": 0.0114, "step": 81150 }, { "epoch": 1.99, "learning_rate": 1.6051717768747693e-05, "loss": 0.0156, "step": 81160 }, { "epoch": 1.99, "learning_rate": 1.6051225218569142e-05, "loss": 0.015, "step": 81170 }, { "epoch": 1.99, "learning_rate": 1.6050732668390595e-05, "loss": 0.0114, "step": 81180 }, { "epoch": 1.99, "learning_rate": 1.6050240118212044e-05, "loss": 0.0118, "step": 81190 }, { "epoch": 1.99, "learning_rate": 1.6049747568033496e-05, "loss": 0.0179, "step": 81200 }, { "epoch": 2.0, "learning_rate": 1.6049255017854946e-05, "loss": 0.0116, "step": 81210 }, { "epoch": 2.0, "learning_rate": 1.6048762467676395e-05, "loss": 0.0108, "step": 81220 }, { "epoch": 2.0, "learning_rate": 1.6048269917497847e-05, "loss": 0.0134, "step": 81230 }, { "epoch": 2.0, "learning_rate": 1.6047777367319296e-05, "loss": 0.0121, "step": 81240 }, { "epoch": 2.0, "learning_rate": 1.6047284817140745e-05, "loss": 0.01, "step": 81250 }, { "epoch": 2.0, "learning_rate": 1.6046792266962198e-05, "loss": 0.0145, "step": 81260 }, { "epoch": 2.0, "learning_rate": 1.6046299716783647e-05, "loss": 0.0194, "step": 81270 }, { "epoch": 2.0, "learning_rate": 1.60458071666051e-05, "loss": 0.0122, "step": 81280 }, { "epoch": 2.0, "learning_rate": 1.604531461642655e-05, "loss": 0.0143, "step": 81290 }, { "epoch": 2.0, "learning_rate": 1.6044822066248e-05, "loss": 0.0103, "step": 81300 }, { "epoch": 2.0, "learning_rate": 1.604432951606945e-05, "loss": 0.0173, "step": 81310 }, { "epoch": 2.0, "learning_rate": 1.6043836965890903e-05, "loss": 0.0099, "step": 81320 }, { "epoch": 2.0, "learning_rate": 1.6043344415712352e-05, "loss": 0.0105, "step": 81330 }, { "epoch": 2.0, "learning_rate": 1.60428518655338e-05, "loss": 0.0272, "step": 81340 }, { "epoch": 2.0, "learning_rate": 1.6042359315355254e-05, "loss": 0.016, "step": 81350 }, { "epoch": 2.0, "learning_rate": 1.6041866765176703e-05, "loss": 0.0095, "step": 81360 }, { "epoch": 2.0, "learning_rate": 1.6041374214998152e-05, "loss": 0.0115, "step": 81370 }, { "epoch": 2.0, "learning_rate": 1.6040881664819605e-05, "loss": 0.0123, "step": 81380 }, { "epoch": 2.0, "learning_rate": 1.6040389114641054e-05, "loss": 0.0112, "step": 81390 }, { "epoch": 2.0, "learning_rate": 1.6039896564462506e-05, "loss": 0.0101, "step": 81400 }, { "epoch": 2.0, "learning_rate": 1.6039404014283956e-05, "loss": 0.0194, "step": 81410 }, { "epoch": 2.0, "learning_rate": 1.6038911464105408e-05, "loss": 0.01, "step": 81420 }, { "epoch": 2.0, "learning_rate": 1.6038418913926857e-05, "loss": 0.0116, "step": 81430 }, { "epoch": 2.0, "learning_rate": 1.603792636374831e-05, "loss": 0.0142, "step": 81440 }, { "epoch": 2.0, "learning_rate": 1.603743381356976e-05, "loss": 0.0168, "step": 81450 }, { "epoch": 2.0, "learning_rate": 1.603694126339121e-05, "loss": 0.0121, "step": 81460 }, { "epoch": 2.0, "learning_rate": 1.603644871321266e-05, "loss": 0.0121, "step": 81470 }, { "epoch": 2.0, "learning_rate": 1.603595616303411e-05, "loss": 0.0124, "step": 81480 }, { "epoch": 2.0, "learning_rate": 1.6035463612855562e-05, "loss": 0.0187, "step": 81490 }, { "epoch": 2.0, "learning_rate": 1.603497106267701e-05, "loss": 0.0113, "step": 81500 }, { "epoch": 2.0, "learning_rate": 1.603447851249846e-05, "loss": 0.0091, "step": 81510 }, { "epoch": 2.0, "learning_rate": 1.6033985962319913e-05, "loss": 0.0254, "step": 81520 }, { "epoch": 2.0, "learning_rate": 1.6033493412141362e-05, "loss": 0.0098, "step": 81530 }, { "epoch": 2.0, "learning_rate": 1.603300086196281e-05, "loss": 0.0106, "step": 81540 }, { "epoch": 2.0, "learning_rate": 1.6032508311784264e-05, "loss": 0.0114, "step": 81550 }, { "epoch": 2.0, "learning_rate": 1.6032015761605713e-05, "loss": 0.0091, "step": 81560 }, { "epoch": 2.0, "learning_rate": 1.6031523211427166e-05, "loss": 0.0098, "step": 81570 }, { "epoch": 2.0, "learning_rate": 1.6031030661248618e-05, "loss": 0.0098, "step": 81580 }, { "epoch": 2.0, "learning_rate": 1.6030538111070067e-05, "loss": 0.0098, "step": 81590 }, { "epoch": 2.0, "learning_rate": 1.6030045560891517e-05, "loss": 0.0155, "step": 81600 }, { "epoch": 2.0, "learning_rate": 1.602955301071297e-05, "loss": 0.0114, "step": 81610 }, { "epoch": 2.01, "learning_rate": 1.6029060460534418e-05, "loss": 0.0094, "step": 81620 }, { "epoch": 2.01, "learning_rate": 1.6028567910355867e-05, "loss": 0.0103, "step": 81630 }, { "epoch": 2.01, "learning_rate": 1.602807536017732e-05, "loss": 0.012, "step": 81640 }, { "epoch": 2.01, "learning_rate": 1.602758280999877e-05, "loss": 0.0186, "step": 81650 }, { "epoch": 2.01, "learning_rate": 1.602709025982022e-05, "loss": 0.0101, "step": 81660 }, { "epoch": 2.01, "learning_rate": 1.602659770964167e-05, "loss": 0.0121, "step": 81670 }, { "epoch": 2.01, "learning_rate": 1.602610515946312e-05, "loss": 0.0101, "step": 81680 }, { "epoch": 2.01, "learning_rate": 1.6025612609284572e-05, "loss": 0.0102, "step": 81690 }, { "epoch": 2.01, "learning_rate": 1.602512005910602e-05, "loss": 0.0132, "step": 81700 }, { "epoch": 2.01, "learning_rate": 1.6024627508927474e-05, "loss": 0.0087, "step": 81710 }, { "epoch": 2.01, "learning_rate": 1.6024134958748923e-05, "loss": 0.0119, "step": 81720 }, { "epoch": 2.01, "learning_rate": 1.6023642408570376e-05, "loss": 0.0097, "step": 81730 }, { "epoch": 2.01, "learning_rate": 1.6023149858391825e-05, "loss": 0.0089, "step": 81740 }, { "epoch": 2.01, "learning_rate": 1.6022657308213277e-05, "loss": 0.0112, "step": 81750 }, { "epoch": 2.01, "learning_rate": 1.6022164758034727e-05, "loss": 0.0105, "step": 81760 }, { "epoch": 2.01, "learning_rate": 1.6021672207856176e-05, "loss": 0.0089, "step": 81770 }, { "epoch": 2.01, "learning_rate": 1.6021179657677628e-05, "loss": 0.0103, "step": 81780 }, { "epoch": 2.01, "learning_rate": 1.6020687107499077e-05, "loss": 0.0106, "step": 81790 }, { "epoch": 2.01, "learning_rate": 1.6020194557320527e-05, "loss": 0.0117, "step": 81800 }, { "epoch": 2.01, "learning_rate": 1.601970200714198e-05, "loss": 0.0103, "step": 81810 }, { "epoch": 2.01, "learning_rate": 1.6019209456963428e-05, "loss": 0.0096, "step": 81820 }, { "epoch": 2.01, "learning_rate": 1.601871690678488e-05, "loss": 0.0104, "step": 81830 }, { "epoch": 2.01, "learning_rate": 1.601822435660633e-05, "loss": 0.017, "step": 81840 }, { "epoch": 2.01, "learning_rate": 1.6017731806427782e-05, "loss": 0.0101, "step": 81850 }, { "epoch": 2.01, "learning_rate": 1.601723925624923e-05, "loss": 0.0101, "step": 81860 }, { "epoch": 2.01, "learning_rate": 1.6016746706070684e-05, "loss": 0.0099, "step": 81870 }, { "epoch": 2.01, "learning_rate": 1.6016254155892133e-05, "loss": 0.0101, "step": 81880 }, { "epoch": 2.01, "learning_rate": 1.6015761605713582e-05, "loss": 0.0216, "step": 81890 }, { "epoch": 2.01, "learning_rate": 1.6015269055535035e-05, "loss": 0.0173, "step": 81900 }, { "epoch": 2.01, "learning_rate": 1.6014776505356484e-05, "loss": 0.0127, "step": 81910 }, { "epoch": 2.01, "learning_rate": 1.6014283955177937e-05, "loss": 0.0117, "step": 81920 }, { "epoch": 2.01, "learning_rate": 1.6013791404999386e-05, "loss": 0.0101, "step": 81930 }, { "epoch": 2.01, "learning_rate": 1.6013298854820835e-05, "loss": 0.0118, "step": 81940 }, { "epoch": 2.01, "learning_rate": 1.6012806304642288e-05, "loss": 0.0085, "step": 81950 }, { "epoch": 2.01, "learning_rate": 1.6012313754463737e-05, "loss": 0.0194, "step": 81960 }, { "epoch": 2.01, "learning_rate": 1.6011821204285186e-05, "loss": 0.0101, "step": 81970 }, { "epoch": 2.01, "learning_rate": 1.601132865410664e-05, "loss": 0.0089, "step": 81980 }, { "epoch": 2.01, "learning_rate": 1.6010836103928088e-05, "loss": 0.009, "step": 81990 }, { "epoch": 2.01, "learning_rate": 1.601034355374954e-05, "loss": 0.0095, "step": 82000 }, { "epoch": 2.01, "learning_rate": 1.600985100357099e-05, "loss": 0.0089, "step": 82010 }, { "epoch": 2.01, "learning_rate": 1.6009358453392442e-05, "loss": 0.0104, "step": 82020 }, { "epoch": 2.02, "learning_rate": 1.600886590321389e-05, "loss": 0.0086, "step": 82030 }, { "epoch": 2.02, "learning_rate": 1.6008373353035343e-05, "loss": 0.0139, "step": 82040 }, { "epoch": 2.02, "learning_rate": 1.6007880802856793e-05, "loss": 0.0122, "step": 82050 }, { "epoch": 2.02, "learning_rate": 1.6007388252678242e-05, "loss": 0.0108, "step": 82060 }, { "epoch": 2.02, "learning_rate": 1.6006895702499694e-05, "loss": 0.0089, "step": 82070 }, { "epoch": 2.02, "learning_rate": 1.6006403152321143e-05, "loss": 0.0089, "step": 82080 }, { "epoch": 2.02, "learning_rate": 1.6005910602142593e-05, "loss": 0.0104, "step": 82090 }, { "epoch": 2.02, "learning_rate": 1.6005418051964045e-05, "loss": 0.011, "step": 82100 }, { "epoch": 2.02, "learning_rate": 1.6004925501785494e-05, "loss": 0.018, "step": 82110 }, { "epoch": 2.02, "learning_rate": 1.6004432951606947e-05, "loss": 0.0099, "step": 82120 }, { "epoch": 2.02, "learning_rate": 1.6003940401428396e-05, "loss": 0.011, "step": 82130 }, { "epoch": 2.02, "learning_rate": 1.600344785124985e-05, "loss": 0.0094, "step": 82140 }, { "epoch": 2.02, "learning_rate": 1.6002955301071298e-05, "loss": 0.0242, "step": 82150 }, { "epoch": 2.02, "learning_rate": 1.600246275089275e-05, "loss": 0.0091, "step": 82160 }, { "epoch": 2.02, "learning_rate": 1.60019702007142e-05, "loss": 0.0099, "step": 82170 }, { "epoch": 2.02, "learning_rate": 1.6001477650535652e-05, "loss": 0.0096, "step": 82180 }, { "epoch": 2.02, "learning_rate": 1.60009851003571e-05, "loss": 0.0114, "step": 82190 }, { "epoch": 2.02, "learning_rate": 1.600049255017855e-05, "loss": 0.0153, "step": 82200 }, { "epoch": 2.02, "learning_rate": 1.6000000000000003e-05, "loss": 0.0166, "step": 82210 }, { "epoch": 2.02, "learning_rate": 1.5999507449821452e-05, "loss": 0.0086, "step": 82220 }, { "epoch": 2.02, "learning_rate": 1.59990148996429e-05, "loss": 0.0164, "step": 82230 }, { "epoch": 2.02, "learning_rate": 1.5998522349464353e-05, "loss": 0.009, "step": 82240 }, { "epoch": 2.02, "learning_rate": 1.5998029799285803e-05, "loss": 0.0111, "step": 82250 }, { "epoch": 2.02, "learning_rate": 1.5997537249107252e-05, "loss": 0.0162, "step": 82260 }, { "epoch": 2.02, "learning_rate": 1.5997044698928704e-05, "loss": 0.0169, "step": 82270 }, { "epoch": 2.02, "learning_rate": 1.5996552148750153e-05, "loss": 0.0156, "step": 82280 }, { "epoch": 2.02, "learning_rate": 1.5996059598571606e-05, "loss": 0.0111, "step": 82290 }, { "epoch": 2.02, "learning_rate": 1.599556704839306e-05, "loss": 0.0116, "step": 82300 }, { "epoch": 2.02, "learning_rate": 1.5995074498214508e-05, "loss": 0.0152, "step": 82310 }, { "epoch": 2.02, "learning_rate": 1.5994581948035957e-05, "loss": 0.0117, "step": 82320 }, { "epoch": 2.02, "learning_rate": 1.599408939785741e-05, "loss": 0.0085, "step": 82330 }, { "epoch": 2.02, "learning_rate": 1.599359684767886e-05, "loss": 0.0102, "step": 82340 }, { "epoch": 2.02, "learning_rate": 1.5993104297500308e-05, "loss": 0.0092, "step": 82350 }, { "epoch": 2.02, "learning_rate": 1.599261174732176e-05, "loss": 0.0113, "step": 82360 }, { "epoch": 2.02, "learning_rate": 1.599211919714321e-05, "loss": 0.0108, "step": 82370 }, { "epoch": 2.02, "learning_rate": 1.5991626646964662e-05, "loss": 0.0102, "step": 82380 }, { "epoch": 2.02, "learning_rate": 1.599113409678611e-05, "loss": 0.0104, "step": 82390 }, { "epoch": 2.02, "learning_rate": 1.599064154660756e-05, "loss": 0.0093, "step": 82400 }, { "epoch": 2.02, "learning_rate": 1.5990148996429013e-05, "loss": 0.0096, "step": 82410 }, { "epoch": 2.02, "learning_rate": 1.5989656446250462e-05, "loss": 0.0102, "step": 82420 }, { "epoch": 2.03, "learning_rate": 1.5989163896071914e-05, "loss": 0.0095, "step": 82430 }, { "epoch": 2.03, "learning_rate": 1.5988671345893364e-05, "loss": 0.0091, "step": 82440 }, { "epoch": 2.03, "learning_rate": 1.5988178795714816e-05, "loss": 0.0157, "step": 82450 }, { "epoch": 2.03, "learning_rate": 1.5987686245536265e-05, "loss": 0.0099, "step": 82460 }, { "epoch": 2.03, "learning_rate": 1.5987193695357718e-05, "loss": 0.0086, "step": 82470 }, { "epoch": 2.03, "learning_rate": 1.5986701145179167e-05, "loss": 0.0093, "step": 82480 }, { "epoch": 2.03, "learning_rate": 1.5986208595000616e-05, "loss": 0.0138, "step": 82490 }, { "epoch": 2.03, "learning_rate": 1.598571604482207e-05, "loss": 0.0197, "step": 82500 }, { "epoch": 2.03, "learning_rate": 1.5985223494643518e-05, "loss": 0.0119, "step": 82510 }, { "epoch": 2.03, "learning_rate": 1.5984730944464967e-05, "loss": 0.0124, "step": 82520 }, { "epoch": 2.03, "learning_rate": 1.598423839428642e-05, "loss": 0.0108, "step": 82530 }, { "epoch": 2.03, "learning_rate": 1.598374584410787e-05, "loss": 0.0088, "step": 82540 }, { "epoch": 2.03, "learning_rate": 1.598325329392932e-05, "loss": 0.01, "step": 82550 }, { "epoch": 2.03, "learning_rate": 1.598276074375077e-05, "loss": 0.0097, "step": 82560 }, { "epoch": 2.03, "learning_rate": 1.5982268193572223e-05, "loss": 0.0106, "step": 82570 }, { "epoch": 2.03, "learning_rate": 1.5981775643393672e-05, "loss": 0.0123, "step": 82580 }, { "epoch": 2.03, "learning_rate": 1.5981283093215125e-05, "loss": 0.0093, "step": 82590 }, { "epoch": 2.03, "learning_rate": 1.5980790543036574e-05, "loss": 0.0195, "step": 82600 }, { "epoch": 2.03, "learning_rate": 1.5980297992858023e-05, "loss": 0.0096, "step": 82610 }, { "epoch": 2.03, "learning_rate": 1.5979805442679475e-05, "loss": 0.0107, "step": 82620 }, { "epoch": 2.03, "learning_rate": 1.5979312892500925e-05, "loss": 0.011, "step": 82630 }, { "epoch": 2.03, "learning_rate": 1.5978820342322377e-05, "loss": 0.0098, "step": 82640 }, { "epoch": 2.03, "learning_rate": 1.5978327792143826e-05, "loss": 0.0109, "step": 82650 }, { "epoch": 2.03, "learning_rate": 1.5977835241965275e-05, "loss": 0.0105, "step": 82660 }, { "epoch": 2.03, "learning_rate": 1.5977342691786728e-05, "loss": 0.0102, "step": 82670 }, { "epoch": 2.03, "learning_rate": 1.5976850141608177e-05, "loss": 0.0175, "step": 82680 }, { "epoch": 2.03, "learning_rate": 1.5976357591429626e-05, "loss": 0.009, "step": 82690 }, { "epoch": 2.03, "learning_rate": 1.597586504125108e-05, "loss": 0.0105, "step": 82700 }, { "epoch": 2.03, "learning_rate": 1.5975372491072528e-05, "loss": 0.0293, "step": 82710 }, { "epoch": 2.03, "learning_rate": 1.597487994089398e-05, "loss": 0.0187, "step": 82720 }, { "epoch": 2.03, "learning_rate": 1.597438739071543e-05, "loss": 0.0086, "step": 82730 }, { "epoch": 2.03, "learning_rate": 1.5973894840536882e-05, "loss": 0.0091, "step": 82740 }, { "epoch": 2.03, "learning_rate": 1.597340229035833e-05, "loss": 0.0086, "step": 82750 }, { "epoch": 2.03, "learning_rate": 1.5972909740179784e-05, "loss": 0.0106, "step": 82760 }, { "epoch": 2.03, "learning_rate": 1.5972417190001233e-05, "loss": 0.0094, "step": 82770 }, { "epoch": 2.03, "learning_rate": 1.5971924639822682e-05, "loss": 0.0092, "step": 82780 }, { "epoch": 2.03, "learning_rate": 1.5971432089644135e-05, "loss": 0.0103, "step": 82790 }, { "epoch": 2.03, "learning_rate": 1.5970939539465584e-05, "loss": 0.0183, "step": 82800 }, { "epoch": 2.03, "learning_rate": 1.5970446989287033e-05, "loss": 0.0078, "step": 82810 }, { "epoch": 2.03, "learning_rate": 1.5969954439108485e-05, "loss": 0.009, "step": 82820 }, { "epoch": 2.03, "learning_rate": 1.5969461888929935e-05, "loss": 0.0173, "step": 82830 }, { "epoch": 2.04, "learning_rate": 1.5968969338751387e-05, "loss": 0.0172, "step": 82840 }, { "epoch": 2.04, "learning_rate": 1.5968476788572836e-05, "loss": 0.0097, "step": 82850 }, { "epoch": 2.04, "learning_rate": 1.596798423839429e-05, "loss": 0.0128, "step": 82860 }, { "epoch": 2.04, "learning_rate": 1.5967491688215738e-05, "loss": 0.0141, "step": 82870 }, { "epoch": 2.04, "learning_rate": 1.596699913803719e-05, "loss": 0.0234, "step": 82880 }, { "epoch": 2.04, "learning_rate": 1.596650658785864e-05, "loss": 0.019, "step": 82890 }, { "epoch": 2.04, "learning_rate": 1.5966014037680092e-05, "loss": 0.0131, "step": 82900 }, { "epoch": 2.04, "learning_rate": 1.596552148750154e-05, "loss": 0.0113, "step": 82910 }, { "epoch": 2.04, "learning_rate": 1.596502893732299e-05, "loss": 0.0093, "step": 82920 }, { "epoch": 2.04, "learning_rate": 1.5964536387144443e-05, "loss": 0.0111, "step": 82930 }, { "epoch": 2.04, "learning_rate": 1.5964043836965892e-05, "loss": 0.0103, "step": 82940 }, { "epoch": 2.04, "learning_rate": 1.596355128678734e-05, "loss": 0.0124, "step": 82950 }, { "epoch": 2.04, "learning_rate": 1.5963058736608794e-05, "loss": 0.0098, "step": 82960 }, { "epoch": 2.04, "learning_rate": 1.5962566186430243e-05, "loss": 0.0093, "step": 82970 }, { "epoch": 2.04, "learning_rate": 1.5962073636251692e-05, "loss": 0.009, "step": 82980 }, { "epoch": 2.04, "learning_rate": 1.5961581086073145e-05, "loss": 0.0088, "step": 82990 }, { "epoch": 2.04, "learning_rate": 1.5961088535894594e-05, "loss": 0.0095, "step": 83000 }, { "epoch": 2.04, "learning_rate": 1.5960595985716046e-05, "loss": 0.0134, "step": 83010 }, { "epoch": 2.04, "learning_rate": 1.59601034355375e-05, "loss": 0.0098, "step": 83020 }, { "epoch": 2.04, "learning_rate": 1.5959610885358948e-05, "loss": 0.0096, "step": 83030 }, { "epoch": 2.04, "learning_rate": 1.5959118335180397e-05, "loss": 0.0124, "step": 83040 }, { "epoch": 2.04, "learning_rate": 1.595862578500185e-05, "loss": 0.0097, "step": 83050 }, { "epoch": 2.04, "learning_rate": 1.59581332348233e-05, "loss": 0.0119, "step": 83060 }, { "epoch": 2.04, "learning_rate": 1.5957640684644748e-05, "loss": 0.011, "step": 83070 }, { "epoch": 2.04, "learning_rate": 1.59571481344662e-05, "loss": 0.0168, "step": 83080 }, { "epoch": 2.04, "learning_rate": 1.595665558428765e-05, "loss": 0.0107, "step": 83090 }, { "epoch": 2.04, "learning_rate": 1.5956163034109102e-05, "loss": 0.0096, "step": 83100 }, { "epoch": 2.04, "learning_rate": 1.595567048393055e-05, "loss": 0.0178, "step": 83110 }, { "epoch": 2.04, "learning_rate": 1.5955177933752e-05, "loss": 0.0111, "step": 83120 }, { "epoch": 2.04, "learning_rate": 1.5954685383573453e-05, "loss": 0.0219, "step": 83130 }, { "epoch": 2.04, "learning_rate": 1.5954192833394902e-05, "loss": 0.015, "step": 83140 }, { "epoch": 2.04, "learning_rate": 1.5953700283216355e-05, "loss": 0.0096, "step": 83150 }, { "epoch": 2.04, "learning_rate": 1.5953207733037804e-05, "loss": 0.01, "step": 83160 }, { "epoch": 2.04, "learning_rate": 1.5952715182859256e-05, "loss": 0.0227, "step": 83170 }, { "epoch": 2.04, "learning_rate": 1.5952222632680706e-05, "loss": 0.0095, "step": 83180 }, { "epoch": 2.04, "learning_rate": 1.5951730082502158e-05, "loss": 0.0121, "step": 83190 }, { "epoch": 2.04, "learning_rate": 1.5951237532323607e-05, "loss": 0.0095, "step": 83200 }, { "epoch": 2.04, "learning_rate": 1.5950744982145056e-05, "loss": 0.0103, "step": 83210 }, { "epoch": 2.04, "learning_rate": 1.595025243196651e-05, "loss": 0.0106, "step": 83220 }, { "epoch": 2.04, "learning_rate": 1.5949759881787958e-05, "loss": 0.0103, "step": 83230 }, { "epoch": 2.04, "learning_rate": 1.5949267331609407e-05, "loss": 0.0098, "step": 83240 }, { "epoch": 2.05, "learning_rate": 1.594877478143086e-05, "loss": 0.0133, "step": 83250 }, { "epoch": 2.05, "learning_rate": 1.594828223125231e-05, "loss": 0.0183, "step": 83260 }, { "epoch": 2.05, "learning_rate": 1.594778968107376e-05, "loss": 0.0117, "step": 83270 }, { "epoch": 2.05, "learning_rate": 1.594729713089521e-05, "loss": 0.0202, "step": 83280 }, { "epoch": 2.05, "learning_rate": 1.5946804580716663e-05, "loss": 0.0125, "step": 83290 }, { "epoch": 2.05, "learning_rate": 1.5946312030538112e-05, "loss": 0.0138, "step": 83300 }, { "epoch": 2.05, "learning_rate": 1.5945819480359565e-05, "loss": 0.0093, "step": 83310 }, { "epoch": 2.05, "learning_rate": 1.5945326930181014e-05, "loss": 0.0227, "step": 83320 }, { "epoch": 2.05, "learning_rate": 1.5944834380002463e-05, "loss": 0.0107, "step": 83330 }, { "epoch": 2.05, "learning_rate": 1.5944341829823916e-05, "loss": 0.0099, "step": 83340 }, { "epoch": 2.05, "learning_rate": 1.5943849279645365e-05, "loss": 0.0087, "step": 83350 }, { "epoch": 2.05, "learning_rate": 1.5943356729466817e-05, "loss": 0.0113, "step": 83360 }, { "epoch": 2.05, "learning_rate": 1.5942864179288267e-05, "loss": 0.0104, "step": 83370 }, { "epoch": 2.05, "learning_rate": 1.5942371629109716e-05, "loss": 0.011, "step": 83380 }, { "epoch": 2.05, "learning_rate": 1.5941879078931168e-05, "loss": 0.011, "step": 83390 }, { "epoch": 2.05, "learning_rate": 1.5941386528752617e-05, "loss": 0.0091, "step": 83400 }, { "epoch": 2.05, "learning_rate": 1.5940893978574067e-05, "loss": 0.0101, "step": 83410 }, { "epoch": 2.05, "learning_rate": 1.594040142839552e-05, "loss": 0.0109, "step": 83420 }, { "epoch": 2.05, "learning_rate": 1.5939908878216968e-05, "loss": 0.011, "step": 83430 }, { "epoch": 2.05, "learning_rate": 1.593941632803842e-05, "loss": 0.0115, "step": 83440 }, { "epoch": 2.05, "learning_rate": 1.593892377785987e-05, "loss": 0.0098, "step": 83450 }, { "epoch": 2.05, "learning_rate": 1.5938431227681322e-05, "loss": 0.0111, "step": 83460 }, { "epoch": 2.05, "learning_rate": 1.593793867750277e-05, "loss": 0.0127, "step": 83470 }, { "epoch": 2.05, "learning_rate": 1.5937446127324224e-05, "loss": 0.0084, "step": 83480 }, { "epoch": 2.05, "learning_rate": 1.5936953577145673e-05, "loss": 0.01, "step": 83490 }, { "epoch": 2.05, "learning_rate": 1.5936461026967122e-05, "loss": 0.0095, "step": 83500 }, { "epoch": 2.05, "learning_rate": 1.5935968476788575e-05, "loss": 0.011, "step": 83510 }, { "epoch": 2.05, "learning_rate": 1.5935475926610024e-05, "loss": 0.0104, "step": 83520 }, { "epoch": 2.05, "learning_rate": 1.5934983376431473e-05, "loss": 0.0174, "step": 83530 }, { "epoch": 2.05, "learning_rate": 1.5934490826252926e-05, "loss": 0.0107, "step": 83540 }, { "epoch": 2.05, "learning_rate": 1.5933998276074375e-05, "loss": 0.0094, "step": 83550 }, { "epoch": 2.05, "learning_rate": 1.5933505725895827e-05, "loss": 0.0104, "step": 83560 }, { "epoch": 2.05, "learning_rate": 1.5933013175717277e-05, "loss": 0.02, "step": 83570 }, { "epoch": 2.05, "learning_rate": 1.593252062553873e-05, "loss": 0.0097, "step": 83580 }, { "epoch": 2.05, "learning_rate": 1.5932028075360178e-05, "loss": 0.0086, "step": 83590 }, { "epoch": 2.05, "learning_rate": 1.593153552518163e-05, "loss": 0.0107, "step": 83600 }, { "epoch": 2.05, "learning_rate": 1.593104297500308e-05, "loss": 0.0098, "step": 83610 }, { "epoch": 2.05, "learning_rate": 1.5930550424824533e-05, "loss": 0.0089, "step": 83620 }, { "epoch": 2.05, "learning_rate": 1.593005787464598e-05, "loss": 0.0129, "step": 83630 }, { "epoch": 2.05, "learning_rate": 1.592956532446743e-05, "loss": 0.0115, "step": 83640 }, { "epoch": 2.06, "learning_rate": 1.5929072774288883e-05, "loss": 0.0096, "step": 83650 }, { "epoch": 2.06, "learning_rate": 1.5928580224110332e-05, "loss": 0.0107, "step": 83660 }, { "epoch": 2.06, "learning_rate": 1.592808767393178e-05, "loss": 0.0103, "step": 83670 }, { "epoch": 2.06, "learning_rate": 1.5927595123753234e-05, "loss": 0.0101, "step": 83680 }, { "epoch": 2.06, "learning_rate": 1.5927102573574683e-05, "loss": 0.0176, "step": 83690 }, { "epoch": 2.06, "learning_rate": 1.5926610023396132e-05, "loss": 0.0097, "step": 83700 }, { "epoch": 2.06, "learning_rate": 1.5926117473217585e-05, "loss": 0.0088, "step": 83710 }, { "epoch": 2.06, "learning_rate": 1.5925624923039034e-05, "loss": 0.0096, "step": 83720 }, { "epoch": 2.06, "learning_rate": 1.5925132372860487e-05, "loss": 0.0098, "step": 83730 }, { "epoch": 2.06, "learning_rate": 1.592463982268194e-05, "loss": 0.0098, "step": 83740 }, { "epoch": 2.06, "learning_rate": 1.592414727250339e-05, "loss": 0.0155, "step": 83750 }, { "epoch": 2.06, "learning_rate": 1.5923654722324838e-05, "loss": 0.0081, "step": 83760 }, { "epoch": 2.06, "learning_rate": 1.592316217214629e-05, "loss": 0.0091, "step": 83770 }, { "epoch": 2.06, "learning_rate": 1.592266962196774e-05, "loss": 0.0103, "step": 83780 }, { "epoch": 2.06, "learning_rate": 1.592217707178919e-05, "loss": 0.0152, "step": 83790 }, { "epoch": 2.06, "learning_rate": 1.592168452161064e-05, "loss": 0.0151, "step": 83800 }, { "epoch": 2.06, "learning_rate": 1.592119197143209e-05, "loss": 0.0105, "step": 83810 }, { "epoch": 2.06, "learning_rate": 1.592069942125354e-05, "loss": 0.0196, "step": 83820 }, { "epoch": 2.06, "learning_rate": 1.5920206871074992e-05, "loss": 0.0094, "step": 83830 }, { "epoch": 2.06, "learning_rate": 1.591971432089644e-05, "loss": 0.0196, "step": 83840 }, { "epoch": 2.06, "learning_rate": 1.5919221770717893e-05, "loss": 0.0091, "step": 83850 }, { "epoch": 2.06, "learning_rate": 1.5918729220539343e-05, "loss": 0.0097, "step": 83860 }, { "epoch": 2.06, "learning_rate": 1.5918236670360795e-05, "loss": 0.0093, "step": 83870 }, { "epoch": 2.06, "learning_rate": 1.5917744120182244e-05, "loss": 0.0103, "step": 83880 }, { "epoch": 2.06, "learning_rate": 1.5917251570003697e-05, "loss": 0.0091, "step": 83890 }, { "epoch": 2.06, "learning_rate": 1.5916759019825146e-05, "loss": 0.0111, "step": 83900 }, { "epoch": 2.06, "learning_rate": 1.59162664696466e-05, "loss": 0.0101, "step": 83910 }, { "epoch": 2.06, "learning_rate": 1.5915773919468048e-05, "loss": 0.024, "step": 83920 }, { "epoch": 2.06, "learning_rate": 1.5915281369289497e-05, "loss": 0.0101, "step": 83930 }, { "epoch": 2.06, "learning_rate": 1.591478881911095e-05, "loss": 0.0087, "step": 83940 }, { "epoch": 2.06, "learning_rate": 1.59142962689324e-05, "loss": 0.0108, "step": 83950 }, { "epoch": 2.06, "learning_rate": 1.5913803718753848e-05, "loss": 0.0104, "step": 83960 }, { "epoch": 2.06, "learning_rate": 1.59133111685753e-05, "loss": 0.0099, "step": 83970 }, { "epoch": 2.06, "learning_rate": 1.591281861839675e-05, "loss": 0.0107, "step": 83980 }, { "epoch": 2.06, "learning_rate": 1.5912326068218202e-05, "loss": 0.015, "step": 83990 }, { "epoch": 2.06, "learning_rate": 1.591183351803965e-05, "loss": 0.0159, "step": 84000 }, { "epoch": 2.06, "learning_rate": 1.5911340967861104e-05, "loss": 0.0106, "step": 84010 }, { "epoch": 2.06, "learning_rate": 1.5910848417682553e-05, "loss": 0.015, "step": 84020 }, { "epoch": 2.06, "learning_rate": 1.5910355867504005e-05, "loss": 0.0141, "step": 84030 }, { "epoch": 2.06, "learning_rate": 1.5909863317325454e-05, "loss": 0.012, "step": 84040 }, { "epoch": 2.06, "learning_rate": 1.5909370767146904e-05, "loss": 0.0143, "step": 84050 }, { "epoch": 2.07, "learning_rate": 1.5908878216968356e-05, "loss": 0.0089, "step": 84060 }, { "epoch": 2.07, "learning_rate": 1.5908385666789805e-05, "loss": 0.0218, "step": 84070 }, { "epoch": 2.07, "learning_rate": 1.5907893116611258e-05, "loss": 0.0093, "step": 84080 }, { "epoch": 2.07, "learning_rate": 1.5907400566432707e-05, "loss": 0.018, "step": 84090 }, { "epoch": 2.07, "learning_rate": 1.5906908016254156e-05, "loss": 0.011, "step": 84100 }, { "epoch": 2.07, "learning_rate": 1.590641546607561e-05, "loss": 0.0092, "step": 84110 }, { "epoch": 2.07, "learning_rate": 1.5905922915897058e-05, "loss": 0.0119, "step": 84120 }, { "epoch": 2.07, "learning_rate": 1.5905430365718507e-05, "loss": 0.0107, "step": 84130 }, { "epoch": 2.07, "learning_rate": 1.590493781553996e-05, "loss": 0.0104, "step": 84140 }, { "epoch": 2.07, "learning_rate": 1.590444526536141e-05, "loss": 0.0127, "step": 84150 }, { "epoch": 2.07, "learning_rate": 1.590395271518286e-05, "loss": 0.0133, "step": 84160 }, { "epoch": 2.07, "learning_rate": 1.590346016500431e-05, "loss": 0.0106, "step": 84170 }, { "epoch": 2.07, "learning_rate": 1.5902967614825763e-05, "loss": 0.011, "step": 84180 }, { "epoch": 2.07, "learning_rate": 1.5902475064647212e-05, "loss": 0.0221, "step": 84190 }, { "epoch": 2.07, "learning_rate": 1.5901982514468664e-05, "loss": 0.0098, "step": 84200 }, { "epoch": 2.07, "learning_rate": 1.5901489964290114e-05, "loss": 0.0104, "step": 84210 }, { "epoch": 2.07, "learning_rate": 1.5900997414111563e-05, "loss": 0.0088, "step": 84220 }, { "epoch": 2.07, "learning_rate": 1.5900504863933015e-05, "loss": 0.0095, "step": 84230 }, { "epoch": 2.07, "learning_rate": 1.5900012313754464e-05, "loss": 0.0137, "step": 84240 }, { "epoch": 2.07, "learning_rate": 1.5899519763575914e-05, "loss": 0.0113, "step": 84250 }, { "epoch": 2.07, "learning_rate": 1.5899027213397366e-05, "loss": 0.0097, "step": 84260 }, { "epoch": 2.07, "learning_rate": 1.5898534663218815e-05, "loss": 0.0089, "step": 84270 }, { "epoch": 2.07, "learning_rate": 1.5898042113040268e-05, "loss": 0.0107, "step": 84280 }, { "epoch": 2.07, "learning_rate": 1.5897549562861717e-05, "loss": 0.0115, "step": 84290 }, { "epoch": 2.07, "learning_rate": 1.589705701268317e-05, "loss": 0.0096, "step": 84300 }, { "epoch": 2.07, "learning_rate": 1.589656446250462e-05, "loss": 0.008, "step": 84310 }, { "epoch": 2.07, "learning_rate": 1.589607191232607e-05, "loss": 0.0098, "step": 84320 }, { "epoch": 2.07, "learning_rate": 1.589557936214752e-05, "loss": 0.0176, "step": 84330 }, { "epoch": 2.07, "learning_rate": 1.5895086811968973e-05, "loss": 0.0099, "step": 84340 }, { "epoch": 2.07, "learning_rate": 1.5894594261790422e-05, "loss": 0.0111, "step": 84350 }, { "epoch": 2.07, "learning_rate": 1.589410171161187e-05, "loss": 0.0087, "step": 84360 }, { "epoch": 2.07, "learning_rate": 1.5893609161433324e-05, "loss": 0.0115, "step": 84370 }, { "epoch": 2.07, "learning_rate": 1.5893116611254773e-05, "loss": 0.0111, "step": 84380 }, { "epoch": 2.07, "learning_rate": 1.5892624061076222e-05, "loss": 0.0126, "step": 84390 }, { "epoch": 2.07, "learning_rate": 1.5892131510897675e-05, "loss": 0.0096, "step": 84400 }, { "epoch": 2.07, "learning_rate": 1.5891638960719124e-05, "loss": 0.0088, "step": 84410 }, { "epoch": 2.07, "learning_rate": 1.5891146410540573e-05, "loss": 0.0166, "step": 84420 }, { "epoch": 2.07, "learning_rate": 1.5890653860362025e-05, "loss": 0.0095, "step": 84430 }, { "epoch": 2.07, "learning_rate": 1.5890161310183475e-05, "loss": 0.0101, "step": 84440 }, { "epoch": 2.07, "learning_rate": 1.5889668760004927e-05, "loss": 0.0114, "step": 84450 }, { "epoch": 2.07, "learning_rate": 1.588917620982638e-05, "loss": 0.0106, "step": 84460 }, { "epoch": 2.08, "learning_rate": 1.588868365964783e-05, "loss": 0.0151, "step": 84470 }, { "epoch": 2.08, "learning_rate": 1.5888191109469278e-05, "loss": 0.011, "step": 84480 }, { "epoch": 2.08, "learning_rate": 1.588769855929073e-05, "loss": 0.0109, "step": 84490 }, { "epoch": 2.08, "learning_rate": 1.588720600911218e-05, "loss": 0.0107, "step": 84500 }, { "epoch": 2.08, "learning_rate": 1.588671345893363e-05, "loss": 0.0087, "step": 84510 }, { "epoch": 2.08, "learning_rate": 1.588622090875508e-05, "loss": 0.0082, "step": 84520 }, { "epoch": 2.08, "learning_rate": 1.588572835857653e-05, "loss": 0.0184, "step": 84530 }, { "epoch": 2.08, "learning_rate": 1.588523580839798e-05, "loss": 0.0088, "step": 84540 }, { "epoch": 2.08, "learning_rate": 1.5884743258219432e-05, "loss": 0.0103, "step": 84550 }, { "epoch": 2.08, "learning_rate": 1.588425070804088e-05, "loss": 0.0084, "step": 84560 }, { "epoch": 2.08, "learning_rate": 1.5883758157862334e-05, "loss": 0.0192, "step": 84570 }, { "epoch": 2.08, "learning_rate": 1.5883265607683783e-05, "loss": 0.0097, "step": 84580 }, { "epoch": 2.08, "learning_rate": 1.5882773057505235e-05, "loss": 0.0104, "step": 84590 }, { "epoch": 2.08, "learning_rate": 1.5882280507326685e-05, "loss": 0.009, "step": 84600 }, { "epoch": 2.08, "learning_rate": 1.5881787957148137e-05, "loss": 0.0103, "step": 84610 }, { "epoch": 2.08, "learning_rate": 1.5881295406969586e-05, "loss": 0.012, "step": 84620 }, { "epoch": 2.08, "learning_rate": 1.588080285679104e-05, "loss": 0.0128, "step": 84630 }, { "epoch": 2.08, "learning_rate": 1.5880310306612488e-05, "loss": 0.0169, "step": 84640 }, { "epoch": 2.08, "learning_rate": 1.5879817756433937e-05, "loss": 0.0096, "step": 84650 }, { "epoch": 2.08, "learning_rate": 1.587932520625539e-05, "loss": 0.0299, "step": 84660 }, { "epoch": 2.08, "learning_rate": 1.587883265607684e-05, "loss": 0.0096, "step": 84670 }, { "epoch": 2.08, "learning_rate": 1.5878340105898288e-05, "loss": 0.0092, "step": 84680 }, { "epoch": 2.08, "learning_rate": 1.587784755571974e-05, "loss": 0.0209, "step": 84690 }, { "epoch": 2.08, "learning_rate": 1.587735500554119e-05, "loss": 0.0092, "step": 84700 }, { "epoch": 2.08, "learning_rate": 1.5876862455362642e-05, "loss": 0.0107, "step": 84710 }, { "epoch": 2.08, "learning_rate": 1.587636990518409e-05, "loss": 0.0166, "step": 84720 }, { "epoch": 2.08, "learning_rate": 1.5875877355005544e-05, "loss": 0.0141, "step": 84730 }, { "epoch": 2.08, "learning_rate": 1.5875384804826993e-05, "loss": 0.0093, "step": 84740 }, { "epoch": 2.08, "learning_rate": 1.5874892254648446e-05, "loss": 0.0123, "step": 84750 }, { "epoch": 2.08, "learning_rate": 1.5874399704469895e-05, "loss": 0.01, "step": 84760 }, { "epoch": 2.08, "learning_rate": 1.5873907154291344e-05, "loss": 0.0086, "step": 84770 }, { "epoch": 2.08, "learning_rate": 1.5873414604112796e-05, "loss": 0.0104, "step": 84780 }, { "epoch": 2.08, "learning_rate": 1.5872922053934246e-05, "loss": 0.0082, "step": 84790 }, { "epoch": 2.08, "learning_rate": 1.5872429503755698e-05, "loss": 0.0179, "step": 84800 }, { "epoch": 2.08, "learning_rate": 1.5871936953577147e-05, "loss": 0.0111, "step": 84810 }, { "epoch": 2.08, "learning_rate": 1.5871444403398596e-05, "loss": 0.01, "step": 84820 }, { "epoch": 2.08, "learning_rate": 1.587095185322005e-05, "loss": 0.0156, "step": 84830 }, { "epoch": 2.08, "learning_rate": 1.5870459303041498e-05, "loss": 0.0109, "step": 84840 }, { "epoch": 2.08, "learning_rate": 1.5869966752862947e-05, "loss": 0.0167, "step": 84850 }, { "epoch": 2.08, "learning_rate": 1.58694742026844e-05, "loss": 0.0108, "step": 84860 }, { "epoch": 2.09, "learning_rate": 1.586898165250585e-05, "loss": 0.0106, "step": 84870 }, { "epoch": 2.09, "learning_rate": 1.58684891023273e-05, "loss": 0.0113, "step": 84880 }, { "epoch": 2.09, "learning_rate": 1.586799655214875e-05, "loss": 0.0091, "step": 84890 }, { "epoch": 2.09, "learning_rate": 1.5867504001970203e-05, "loss": 0.0109, "step": 84900 }, { "epoch": 2.09, "learning_rate": 1.5867011451791652e-05, "loss": 0.0112, "step": 84910 }, { "epoch": 2.09, "learning_rate": 1.5866518901613105e-05, "loss": 0.0081, "step": 84920 }, { "epoch": 2.09, "learning_rate": 1.5866026351434554e-05, "loss": 0.0106, "step": 84930 }, { "epoch": 2.09, "learning_rate": 1.5865533801256003e-05, "loss": 0.0101, "step": 84940 }, { "epoch": 2.09, "learning_rate": 1.5865041251077456e-05, "loss": 0.0096, "step": 84950 }, { "epoch": 2.09, "learning_rate": 1.5864548700898905e-05, "loss": 0.0082, "step": 84960 }, { "epoch": 2.09, "learning_rate": 1.5864056150720354e-05, "loss": 0.0121, "step": 84970 }, { "epoch": 2.09, "learning_rate": 1.5863563600541806e-05, "loss": 0.0121, "step": 84980 }, { "epoch": 2.09, "learning_rate": 1.5863071050363256e-05, "loss": 0.0099, "step": 84990 }, { "epoch": 2.09, "learning_rate": 1.5862578500184708e-05, "loss": 0.013, "step": 85000 }, { "epoch": 2.09, "eval_bleu": 98.1758, "eval_gen_len_ratio": 1.0005, "eval_loss": 0.009259458631277084, "eval_runtime": 254.316, "eval_samples_per_second": 7.337, "eval_steps_per_second": 0.232, "step": 85000 } ], "max_steps": 407050, "num_train_epochs": 10, "total_flos": 5.847783034551534e+18, "trial_name": null, "trial_params": null }