{ "best_metric": 0.2915886640548706, "best_model_checkpoint": "flan-t5-xl-SQuAD/checkpoint-87599", "epoch": 1.0, "global_step": 87599, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999771686891403e-05, "loss": 0.4146, "step": 20 }, { "epoch": 0.0, "learning_rate": 4.999543373782806e-05, "loss": 0.3988, "step": 40 }, { "epoch": 0.0, "learning_rate": 4.999315060674209e-05, "loss": 0.4816, "step": 60 }, { "epoch": 0.0, "learning_rate": 4.9990867475656114e-05, "loss": 0.3511, "step": 80 }, { "epoch": 0.0, "learning_rate": 4.9988584344570144e-05, "loss": 0.2744, "step": 100 }, { "epoch": 0.0, "learning_rate": 4.9986301213484174e-05, "loss": 0.3128, "step": 120 }, { "epoch": 0.0, "learning_rate": 4.9984018082398203e-05, "loss": 0.3208, "step": 140 }, { "epoch": 0.0, "learning_rate": 4.998173495131223e-05, "loss": 0.2313, "step": 160 }, { "epoch": 0.0, "learning_rate": 4.9979451820226256e-05, "loss": 0.3397, "step": 180 }, { "epoch": 0.0, "learning_rate": 4.997716868914029e-05, "loss": 0.1869, "step": 200 }, { "epoch": 0.0, "learning_rate": 4.9974885558054315e-05, "loss": 0.3035, "step": 220 }, { "epoch": 0.0, "learning_rate": 4.9972602426968345e-05, "loss": 0.2194, "step": 240 }, { "epoch": 0.0, "learning_rate": 4.9970319295882375e-05, "loss": 0.3556, "step": 260 }, { "epoch": 0.0, "learning_rate": 4.9968036164796404e-05, "loss": 0.3865, "step": 280 }, { "epoch": 0.0, "learning_rate": 4.9965753033710434e-05, "loss": 0.089, "step": 300 }, { "epoch": 0.0, "learning_rate": 4.996346990262446e-05, "loss": 0.3195, "step": 320 }, { "epoch": 0.0, "learning_rate": 4.9961186771538493e-05, "loss": 0.2291, "step": 340 }, { "epoch": 0.0, "learning_rate": 4.995890364045252e-05, "loss": 0.1328, "step": 360 }, { "epoch": 0.0, "learning_rate": 4.9956620509366546e-05, "loss": 0.256, "step": 380 }, { "epoch": 0.0, "learning_rate": 4.9954337378280576e-05, "loss": 0.24, "step": 400 }, { "epoch": 0.0, "learning_rate": 4.9952054247194605e-05, "loss": 0.403, "step": 420 }, { "epoch": 0.01, "learning_rate": 4.9949771116108635e-05, "loss": 0.3736, "step": 440 }, { "epoch": 0.01, "learning_rate": 4.9947487985022665e-05, "loss": 0.2076, "step": 460 }, { "epoch": 0.01, "learning_rate": 4.994520485393669e-05, "loss": 0.3819, "step": 480 }, { "epoch": 0.01, "learning_rate": 4.9942921722850724e-05, "loss": 0.2608, "step": 500 }, { "epoch": 0.01, "learning_rate": 4.994063859176475e-05, "loss": 0.1728, "step": 520 }, { "epoch": 0.01, "learning_rate": 4.993835546067878e-05, "loss": 0.2749, "step": 540 }, { "epoch": 0.01, "learning_rate": 4.9936072329592806e-05, "loss": 0.1894, "step": 560 }, { "epoch": 0.01, "learning_rate": 4.9933789198506836e-05, "loss": 0.2569, "step": 580 }, { "epoch": 0.01, "learning_rate": 4.9931506067420866e-05, "loss": 0.2383, "step": 600 }, { "epoch": 0.01, "learning_rate": 4.992922293633489e-05, "loss": 0.0852, "step": 620 }, { "epoch": 0.01, "learning_rate": 4.9926939805248925e-05, "loss": 0.2445, "step": 640 }, { "epoch": 0.01, "learning_rate": 4.992465667416295e-05, "loss": 0.2401, "step": 660 }, { "epoch": 0.01, "learning_rate": 4.992237354307698e-05, "loss": 0.3975, "step": 680 }, { "epoch": 0.01, "learning_rate": 4.992009041199101e-05, "loss": 0.2715, "step": 700 }, { "epoch": 0.01, "learning_rate": 4.991780728090504e-05, "loss": 0.2373, "step": 720 }, { "epoch": 0.01, "learning_rate": 4.991552414981907e-05, "loss": 0.3071, "step": 740 }, { "epoch": 0.01, "learning_rate": 4.991324101873309e-05, "loss": 0.3325, "step": 760 }, { "epoch": 0.01, "learning_rate": 4.991095788764712e-05, "loss": 0.1126, "step": 780 }, { "epoch": 0.01, "learning_rate": 4.990867475656115e-05, "loss": 0.3092, "step": 800 }, { "epoch": 0.01, "learning_rate": 4.990639162547518e-05, "loss": 0.4497, "step": 820 }, { "epoch": 0.01, "learning_rate": 4.990410849438921e-05, "loss": 0.2964, "step": 840 }, { "epoch": 0.01, "learning_rate": 4.990182536330323e-05, "loss": 0.3013, "step": 860 }, { "epoch": 0.01, "learning_rate": 4.989954223221727e-05, "loss": 0.9065, "step": 880 }, { "epoch": 0.01, "learning_rate": 4.989725910113129e-05, "loss": 0.1934, "step": 900 }, { "epoch": 0.01, "learning_rate": 4.989497597004532e-05, "loss": 0.3569, "step": 920 }, { "epoch": 0.01, "learning_rate": 4.989269283895936e-05, "loss": 0.2081, "step": 940 }, { "epoch": 0.01, "learning_rate": 4.989040970787338e-05, "loss": 0.116, "step": 960 }, { "epoch": 0.01, "learning_rate": 4.988812657678741e-05, "loss": 0.1229, "step": 980 }, { "epoch": 0.01, "learning_rate": 4.988584344570143e-05, "loss": 0.3488, "step": 1000 }, { "epoch": 0.01, "learning_rate": 4.988356031461547e-05, "loss": 0.1855, "step": 1020 }, { "epoch": 0.01, "learning_rate": 4.98812771835295e-05, "loss": 0.2335, "step": 1040 }, { "epoch": 0.01, "learning_rate": 4.987899405244352e-05, "loss": 0.2094, "step": 1060 }, { "epoch": 0.01, "learning_rate": 4.987671092135755e-05, "loss": 0.1951, "step": 1080 }, { "epoch": 0.01, "learning_rate": 4.987442779027158e-05, "loss": 0.3398, "step": 1100 }, { "epoch": 0.01, "learning_rate": 4.987214465918561e-05, "loss": 0.1308, "step": 1120 }, { "epoch": 0.01, "learning_rate": 4.986986152809964e-05, "loss": 0.2623, "step": 1140 }, { "epoch": 0.01, "learning_rate": 4.986757839701366e-05, "loss": 0.2309, "step": 1160 }, { "epoch": 0.01, "learning_rate": 4.98652952659277e-05, "loss": 0.4803, "step": 1180 }, { "epoch": 0.01, "learning_rate": 4.986301213484172e-05, "loss": 0.3203, "step": 1200 }, { "epoch": 0.01, "learning_rate": 4.986072900375575e-05, "loss": 0.2332, "step": 1220 }, { "epoch": 0.01, "learning_rate": 4.985844587266978e-05, "loss": 0.2645, "step": 1240 }, { "epoch": 0.01, "learning_rate": 4.985616274158381e-05, "loss": 0.1989, "step": 1260 }, { "epoch": 0.01, "learning_rate": 4.985387961049784e-05, "loss": 0.1773, "step": 1280 }, { "epoch": 0.01, "learning_rate": 4.9851596479411864e-05, "loss": 0.1135, "step": 1300 }, { "epoch": 0.02, "learning_rate": 4.98493133483259e-05, "loss": 0.4032, "step": 1320 }, { "epoch": 0.02, "learning_rate": 4.984703021723992e-05, "loss": 0.2179, "step": 1340 }, { "epoch": 0.02, "learning_rate": 4.984474708615395e-05, "loss": 0.3003, "step": 1360 }, { "epoch": 0.02, "learning_rate": 4.984246395506798e-05, "loss": 0.2851, "step": 1380 }, { "epoch": 0.02, "learning_rate": 4.984018082398201e-05, "loss": 0.5492, "step": 1400 }, { "epoch": 0.02, "learning_rate": 4.983789769289604e-05, "loss": 0.2551, "step": 1420 }, { "epoch": 0.02, "learning_rate": 4.9835614561810065e-05, "loss": 0.1204, "step": 1440 }, { "epoch": 0.02, "learning_rate": 4.9833331430724095e-05, "loss": 0.3419, "step": 1460 }, { "epoch": 0.02, "learning_rate": 4.983104829963813e-05, "loss": 0.2504, "step": 1480 }, { "epoch": 0.02, "learning_rate": 4.9828765168552154e-05, "loss": 0.1644, "step": 1500 }, { "epoch": 0.02, "learning_rate": 4.9826482037466184e-05, "loss": 0.314, "step": 1520 }, { "epoch": 0.02, "learning_rate": 4.982419890638021e-05, "loss": 0.4252, "step": 1540 }, { "epoch": 0.02, "learning_rate": 4.982191577529424e-05, "loss": 0.1927, "step": 1560 }, { "epoch": 0.02, "learning_rate": 4.981963264420827e-05, "loss": 0.3194, "step": 1580 }, { "epoch": 0.02, "learning_rate": 4.9817349513122296e-05, "loss": 0.4047, "step": 1600 }, { "epoch": 0.02, "learning_rate": 4.981506638203633e-05, "loss": 0.3106, "step": 1620 }, { "epoch": 0.02, "learning_rate": 4.9812783250950355e-05, "loss": 0.5015, "step": 1640 }, { "epoch": 0.02, "learning_rate": 4.9810500119864385e-05, "loss": 0.1732, "step": 1660 }, { "epoch": 0.02, "learning_rate": 4.9808216988778414e-05, "loss": 0.2418, "step": 1680 }, { "epoch": 0.02, "learning_rate": 4.9805933857692444e-05, "loss": 0.1945, "step": 1700 }, { "epoch": 0.02, "learning_rate": 4.9803650726606474e-05, "loss": 0.2372, "step": 1720 }, { "epoch": 0.02, "learning_rate": 4.98013675955205e-05, "loss": 0.2333, "step": 1740 }, { "epoch": 0.02, "learning_rate": 4.9799084464434526e-05, "loss": 0.195, "step": 1760 }, { "epoch": 0.02, "learning_rate": 4.9796801333348556e-05, "loss": 0.4325, "step": 1780 }, { "epoch": 0.02, "learning_rate": 4.9794518202262586e-05, "loss": 0.5061, "step": 1800 }, { "epoch": 0.02, "learning_rate": 4.9792235071176615e-05, "loss": 0.3211, "step": 1820 }, { "epoch": 0.02, "learning_rate": 4.978995194009064e-05, "loss": 0.1807, "step": 1840 }, { "epoch": 0.02, "learning_rate": 4.9787668809004675e-05, "loss": 0.1973, "step": 1860 }, { "epoch": 0.02, "learning_rate": 4.97853856779187e-05, "loss": 0.1652, "step": 1880 }, { "epoch": 0.02, "learning_rate": 4.978310254683273e-05, "loss": 0.1362, "step": 1900 }, { "epoch": 0.02, "learning_rate": 4.978081941574676e-05, "loss": 0.4467, "step": 1920 }, { "epoch": 0.02, "learning_rate": 4.977853628466079e-05, "loss": 0.1253, "step": 1940 }, { "epoch": 0.02, "learning_rate": 4.9776253153574816e-05, "loss": 0.3409, "step": 1960 }, { "epoch": 0.02, "learning_rate": 4.977397002248884e-05, "loss": 0.3037, "step": 1980 }, { "epoch": 0.02, "learning_rate": 4.9771686891402876e-05, "loss": 0.2382, "step": 2000 }, { "epoch": 0.02, "learning_rate": 4.97694037603169e-05, "loss": 0.2266, "step": 2020 }, { "epoch": 0.02, "learning_rate": 4.976712062923093e-05, "loss": 0.1674, "step": 2040 }, { "epoch": 0.02, "learning_rate": 4.976483749814496e-05, "loss": 0.3653, "step": 2060 }, { "epoch": 0.02, "learning_rate": 4.976255436705899e-05, "loss": 0.3434, "step": 2080 }, { "epoch": 0.02, "learning_rate": 4.976027123597302e-05, "loss": 0.2952, "step": 2100 }, { "epoch": 0.02, "learning_rate": 4.975798810488704e-05, "loss": 0.1688, "step": 2120 }, { "epoch": 0.02, "learning_rate": 4.975570497380107e-05, "loss": 0.0581, "step": 2140 }, { "epoch": 0.02, "learning_rate": 4.9753421842715107e-05, "loss": 0.1459, "step": 2160 }, { "epoch": 0.02, "learning_rate": 4.975113871162913e-05, "loss": 0.1525, "step": 2180 }, { "epoch": 0.03, "learning_rate": 4.974885558054316e-05, "loss": 0.1397, "step": 2200 }, { "epoch": 0.03, "learning_rate": 4.974657244945719e-05, "loss": 0.309, "step": 2220 }, { "epoch": 0.03, "learning_rate": 4.974428931837122e-05, "loss": 0.1207, "step": 2240 }, { "epoch": 0.03, "learning_rate": 4.974200618728525e-05, "loss": 0.221, "step": 2260 }, { "epoch": 0.03, "learning_rate": 4.973972305619927e-05, "loss": 0.4353, "step": 2280 }, { "epoch": 0.03, "learning_rate": 4.973743992511331e-05, "loss": 0.3289, "step": 2300 }, { "epoch": 0.03, "learning_rate": 4.973515679402733e-05, "loss": 0.1061, "step": 2320 }, { "epoch": 0.03, "learning_rate": 4.973287366294136e-05, "loss": 0.1281, "step": 2340 }, { "epoch": 0.03, "learning_rate": 4.973059053185539e-05, "loss": 0.3235, "step": 2360 }, { "epoch": 0.03, "learning_rate": 4.972830740076942e-05, "loss": 0.2174, "step": 2380 }, { "epoch": 0.03, "learning_rate": 4.972602426968345e-05, "loss": 0.2234, "step": 2400 }, { "epoch": 0.03, "learning_rate": 4.972374113859747e-05, "loss": 0.3072, "step": 2420 }, { "epoch": 0.03, "learning_rate": 4.97214580075115e-05, "loss": 0.3153, "step": 2440 }, { "epoch": 0.03, "learning_rate": 4.971917487642553e-05, "loss": 0.3259, "step": 2460 }, { "epoch": 0.03, "learning_rate": 4.971689174533956e-05, "loss": 0.083, "step": 2480 }, { "epoch": 0.03, "learning_rate": 4.971460861425359e-05, "loss": 0.2692, "step": 2500 }, { "epoch": 0.03, "learning_rate": 4.971232548316762e-05, "loss": 0.0583, "step": 2520 }, { "epoch": 0.03, "learning_rate": 4.971004235208165e-05, "loss": 0.1481, "step": 2540 }, { "epoch": 0.03, "learning_rate": 4.970775922099567e-05, "loss": 0.3589, "step": 2560 }, { "epoch": 0.03, "learning_rate": 4.97054760899097e-05, "loss": 0.1461, "step": 2580 }, { "epoch": 0.03, "learning_rate": 4.970319295882373e-05, "loss": 0.3339, "step": 2600 }, { "epoch": 0.03, "learning_rate": 4.970090982773776e-05, "loss": 0.2771, "step": 2620 }, { "epoch": 0.03, "learning_rate": 4.969862669665179e-05, "loss": 0.6283, "step": 2640 }, { "epoch": 0.03, "learning_rate": 4.9696343565565815e-05, "loss": 0.6704, "step": 2660 }, { "epoch": 0.03, "learning_rate": 4.969406043447985e-05, "loss": 0.6634, "step": 2680 }, { "epoch": 0.03, "learning_rate": 4.9691777303393874e-05, "loss": 0.3571, "step": 2700 }, { "epoch": 0.03, "learning_rate": 4.9689494172307904e-05, "loss": 0.2565, "step": 2720 }, { "epoch": 0.03, "learning_rate": 4.9687211041221933e-05, "loss": 0.3001, "step": 2740 }, { "epoch": 0.03, "learning_rate": 4.968492791013596e-05, "loss": 0.3792, "step": 2760 }, { "epoch": 0.03, "learning_rate": 4.968264477904999e-05, "loss": 0.1974, "step": 2780 }, { "epoch": 0.03, "learning_rate": 4.9680361647964016e-05, "loss": 0.2047, "step": 2800 }, { "epoch": 0.03, "learning_rate": 4.967807851687805e-05, "loss": 0.3492, "step": 2820 }, { "epoch": 0.03, "learning_rate": 4.967579538579208e-05, "loss": 0.1766, "step": 2840 }, { "epoch": 0.03, "learning_rate": 4.9673512254706105e-05, "loss": 0.3413, "step": 2860 }, { "epoch": 0.03, "learning_rate": 4.9671229123620134e-05, "loss": 0.4132, "step": 2880 }, { "epoch": 0.03, "learning_rate": 4.9668945992534164e-05, "loss": 0.3374, "step": 2900 }, { "epoch": 0.03, "learning_rate": 4.9666662861448194e-05, "loss": 0.3828, "step": 2920 }, { "epoch": 0.03, "learning_rate": 4.9664379730362223e-05, "loss": 0.1999, "step": 2940 }, { "epoch": 0.03, "learning_rate": 4.9662096599276246e-05, "loss": 0.2789, "step": 2960 }, { "epoch": 0.03, "learning_rate": 4.965981346819028e-05, "loss": 0.4507, "step": 2980 }, { "epoch": 0.03, "learning_rate": 4.9657530337104306e-05, "loss": 0.1836, "step": 3000 }, { "epoch": 0.03, "learning_rate": 4.9655247206018335e-05, "loss": 0.4692, "step": 3020 }, { "epoch": 0.03, "learning_rate": 4.9652964074932365e-05, "loss": 0.3735, "step": 3040 }, { "epoch": 0.03, "learning_rate": 4.9650680943846395e-05, "loss": 0.3511, "step": 3060 }, { "epoch": 0.04, "learning_rate": 4.9648397812760424e-05, "loss": 0.3194, "step": 3080 }, { "epoch": 0.04, "learning_rate": 4.964611468167445e-05, "loss": 0.3684, "step": 3100 }, { "epoch": 0.04, "learning_rate": 4.9643831550588484e-05, "loss": 0.2248, "step": 3120 }, { "epoch": 0.04, "learning_rate": 4.964154841950251e-05, "loss": 0.2539, "step": 3140 }, { "epoch": 0.04, "learning_rate": 4.9639265288416536e-05, "loss": 0.1804, "step": 3160 }, { "epoch": 0.04, "learning_rate": 4.9636982157330566e-05, "loss": 0.1204, "step": 3180 }, { "epoch": 0.04, "learning_rate": 4.9634699026244596e-05, "loss": 0.2529, "step": 3200 }, { "epoch": 0.04, "learning_rate": 4.9632415895158625e-05, "loss": 0.1288, "step": 3220 }, { "epoch": 0.04, "learning_rate": 4.963013276407265e-05, "loss": 0.16, "step": 3240 }, { "epoch": 0.04, "learning_rate": 4.962784963298668e-05, "loss": 0.2517, "step": 3260 }, { "epoch": 0.04, "learning_rate": 4.962556650190071e-05, "loss": 0.2468, "step": 3280 }, { "epoch": 0.04, "learning_rate": 4.962328337081474e-05, "loss": 0.207, "step": 3300 }, { "epoch": 0.04, "learning_rate": 4.962100023972877e-05, "loss": 0.4666, "step": 3320 }, { "epoch": 0.04, "learning_rate": 4.961871710864279e-05, "loss": 0.1697, "step": 3340 }, { "epoch": 0.04, "learning_rate": 4.9616433977556826e-05, "loss": 0.3159, "step": 3360 }, { "epoch": 0.04, "learning_rate": 4.961415084647085e-05, "loss": 0.2075, "step": 3380 }, { "epoch": 0.04, "learning_rate": 4.961186771538488e-05, "loss": 0.092, "step": 3400 }, { "epoch": 0.04, "learning_rate": 4.9609584584298916e-05, "loss": 0.4205, "step": 3420 }, { "epoch": 0.04, "learning_rate": 4.960730145321294e-05, "loss": 0.4137, "step": 3440 }, { "epoch": 0.04, "learning_rate": 4.960501832212697e-05, "loss": 0.1671, "step": 3460 }, { "epoch": 0.04, "learning_rate": 4.960273519104099e-05, "loss": 0.1885, "step": 3480 }, { "epoch": 0.04, "learning_rate": 4.960045205995503e-05, "loss": 0.1843, "step": 3500 }, { "epoch": 0.04, "learning_rate": 4.959816892886906e-05, "loss": 0.2509, "step": 3520 }, { "epoch": 0.04, "learning_rate": 4.959588579778308e-05, "loss": 0.3763, "step": 3540 }, { "epoch": 0.04, "learning_rate": 4.959360266669711e-05, "loss": 0.3572, "step": 3560 }, { "epoch": 0.04, "learning_rate": 4.959131953561114e-05, "loss": 0.218, "step": 3580 }, { "epoch": 0.04, "learning_rate": 4.958903640452517e-05, "loss": 0.1181, "step": 3600 }, { "epoch": 0.04, "learning_rate": 4.95867532734392e-05, "loss": 0.2259, "step": 3620 }, { "epoch": 0.04, "learning_rate": 4.958447014235322e-05, "loss": 0.2387, "step": 3640 }, { "epoch": 0.04, "learning_rate": 4.958218701126726e-05, "loss": 0.2646, "step": 3660 }, { "epoch": 0.04, "learning_rate": 4.957990388018128e-05, "loss": 0.198, "step": 3680 }, { "epoch": 0.04, "learning_rate": 4.957762074909531e-05, "loss": 0.2738, "step": 3700 }, { "epoch": 0.04, "learning_rate": 4.957533761800934e-05, "loss": 0.3568, "step": 3720 }, { "epoch": 0.04, "learning_rate": 4.957305448692337e-05, "loss": 0.1046, "step": 3740 }, { "epoch": 0.04, "learning_rate": 4.95707713558374e-05, "loss": 0.1474, "step": 3760 }, { "epoch": 0.04, "learning_rate": 4.956848822475142e-05, "loss": 0.4018, "step": 3780 }, { "epoch": 0.04, "learning_rate": 4.956620509366546e-05, "loss": 0.2474, "step": 3800 }, { "epoch": 0.04, "learning_rate": 4.956392196257948e-05, "loss": 0.3195, "step": 3820 }, { "epoch": 0.04, "learning_rate": 4.956163883149351e-05, "loss": 0.105, "step": 3840 }, { "epoch": 0.04, "learning_rate": 4.955935570040754e-05, "loss": 0.3242, "step": 3860 }, { "epoch": 0.04, "learning_rate": 4.955707256932157e-05, "loss": 0.2215, "step": 3880 }, { "epoch": 0.04, "learning_rate": 4.95547894382356e-05, "loss": 0.2572, "step": 3900 }, { "epoch": 0.04, "learning_rate": 4.9552506307149624e-05, "loss": 0.1422, "step": 3920 }, { "epoch": 0.04, "learning_rate": 4.9550223176063653e-05, "loss": 0.236, "step": 3940 }, { "epoch": 0.05, "learning_rate": 4.954794004497769e-05, "loss": 0.2304, "step": 3960 }, { "epoch": 0.05, "learning_rate": 4.954565691389171e-05, "loss": 0.1552, "step": 3980 }, { "epoch": 0.05, "learning_rate": 4.954337378280574e-05, "loss": 0.1963, "step": 4000 }, { "epoch": 0.05, "learning_rate": 4.9541090651719765e-05, "loss": 0.1041, "step": 4020 }, { "epoch": 0.05, "learning_rate": 4.95388075206338e-05, "loss": 0.1397, "step": 4040 }, { "epoch": 0.05, "learning_rate": 4.953652438954783e-05, "loss": 0.3273, "step": 4060 }, { "epoch": 0.05, "learning_rate": 4.9534241258461854e-05, "loss": 0.5176, "step": 4080 }, { "epoch": 0.05, "learning_rate": 4.953195812737589e-05, "loss": 0.2329, "step": 4100 }, { "epoch": 0.05, "learning_rate": 4.9529674996289914e-05, "loss": 0.3238, "step": 4120 }, { "epoch": 0.05, "learning_rate": 4.9527391865203943e-05, "loss": 0.3704, "step": 4140 }, { "epoch": 0.05, "learning_rate": 4.952510873411797e-05, "loss": 0.3404, "step": 4160 }, { "epoch": 0.05, "learning_rate": 4.9522825603032e-05, "loss": 0.4258, "step": 4180 }, { "epoch": 0.05, "learning_rate": 4.952054247194603e-05, "loss": 0.2323, "step": 4200 }, { "epoch": 0.05, "learning_rate": 4.9518259340860055e-05, "loss": 0.1458, "step": 4220 }, { "epoch": 0.05, "learning_rate": 4.9515976209774085e-05, "loss": 0.1564, "step": 4240 }, { "epoch": 0.05, "learning_rate": 4.9513693078688115e-05, "loss": 0.1343, "step": 4260 }, { "epoch": 0.05, "learning_rate": 4.9511409947602144e-05, "loss": 0.159, "step": 4280 }, { "epoch": 0.05, "learning_rate": 4.9509126816516174e-05, "loss": 0.0791, "step": 4300 }, { "epoch": 0.05, "learning_rate": 4.95068436854302e-05, "loss": 0.2465, "step": 4320 }, { "epoch": 0.05, "learning_rate": 4.9504560554344234e-05, "loss": 0.2158, "step": 4340 }, { "epoch": 0.05, "learning_rate": 4.9502277423258256e-05, "loss": 0.2615, "step": 4360 }, { "epoch": 0.05, "learning_rate": 4.9499994292172286e-05, "loss": 0.5277, "step": 4380 }, { "epoch": 0.05, "learning_rate": 4.9497711161086316e-05, "loss": 0.1444, "step": 4400 }, { "epoch": 0.05, "learning_rate": 4.9495428030000345e-05, "loss": 0.1341, "step": 4420 }, { "epoch": 0.05, "learning_rate": 4.9493144898914375e-05, "loss": 0.3018, "step": 4440 }, { "epoch": 0.05, "learning_rate": 4.94908617678284e-05, "loss": 0.3541, "step": 4460 }, { "epoch": 0.05, "learning_rate": 4.9488578636742435e-05, "loss": 0.1448, "step": 4480 }, { "epoch": 0.05, "learning_rate": 4.948629550565646e-05, "loss": 0.261, "step": 4500 }, { "epoch": 0.05, "learning_rate": 4.948401237457049e-05, "loss": 0.5355, "step": 4520 }, { "epoch": 0.05, "learning_rate": 4.948172924348452e-05, "loss": 0.1527, "step": 4540 }, { "epoch": 0.05, "learning_rate": 4.9479446112398546e-05, "loss": 0.2409, "step": 4560 }, { "epoch": 0.05, "learning_rate": 4.9477162981312576e-05, "loss": 0.2657, "step": 4580 }, { "epoch": 0.05, "learning_rate": 4.94748798502266e-05, "loss": 0.1387, "step": 4600 }, { "epoch": 0.05, "learning_rate": 4.947259671914063e-05, "loss": 0.6281, "step": 4620 }, { "epoch": 0.05, "learning_rate": 4.9470313588054665e-05, "loss": 0.1952, "step": 4640 }, { "epoch": 0.05, "learning_rate": 4.946803045696869e-05, "loss": 0.3314, "step": 4660 }, { "epoch": 0.05, "learning_rate": 4.946574732588272e-05, "loss": 0.3172, "step": 4680 }, { "epoch": 0.05, "learning_rate": 4.946346419479675e-05, "loss": 0.2003, "step": 4700 }, { "epoch": 0.05, "learning_rate": 4.946118106371078e-05, "loss": 0.1398, "step": 4720 }, { "epoch": 0.05, "learning_rate": 4.945889793262481e-05, "loss": 0.3771, "step": 4740 }, { "epoch": 0.05, "learning_rate": 4.945661480153883e-05, "loss": 0.1859, "step": 4760 }, { "epoch": 0.05, "learning_rate": 4.9454331670452866e-05, "loss": 0.2468, "step": 4780 }, { "epoch": 0.05, "learning_rate": 4.945204853936689e-05, "loss": 0.1509, "step": 4800 }, { "epoch": 0.06, "learning_rate": 4.944976540828092e-05, "loss": 0.4088, "step": 4820 }, { "epoch": 0.06, "learning_rate": 4.944748227719495e-05, "loss": 0.236, "step": 4840 }, { "epoch": 0.06, "learning_rate": 4.944519914610898e-05, "loss": 0.1133, "step": 4860 }, { "epoch": 0.06, "learning_rate": 4.944291601502301e-05, "loss": 0.3669, "step": 4880 }, { "epoch": 0.06, "learning_rate": 4.944063288393703e-05, "loss": 0.1687, "step": 4900 }, { "epoch": 0.06, "learning_rate": 4.943834975285106e-05, "loss": 0.32, "step": 4920 }, { "epoch": 0.06, "learning_rate": 4.943606662176509e-05, "loss": 0.1958, "step": 4940 }, { "epoch": 0.06, "learning_rate": 4.943378349067912e-05, "loss": 0.3438, "step": 4960 }, { "epoch": 0.06, "learning_rate": 4.943150035959315e-05, "loss": 0.3389, "step": 4980 }, { "epoch": 0.06, "learning_rate": 4.942921722850718e-05, "loss": 0.261, "step": 5000 }, { "epoch": 0.06, "learning_rate": 4.942693409742121e-05, "loss": 0.2358, "step": 5020 }, { "epoch": 0.06, "learning_rate": 4.942465096633523e-05, "loss": 0.3478, "step": 5040 }, { "epoch": 0.06, "learning_rate": 4.942236783524926e-05, "loss": 0.1177, "step": 5060 }, { "epoch": 0.06, "learning_rate": 4.942008470416329e-05, "loss": 0.1768, "step": 5080 }, { "epoch": 0.06, "learning_rate": 4.941780157307732e-05, "loss": 0.2052, "step": 5100 }, { "epoch": 0.06, "learning_rate": 4.941551844199135e-05, "loss": 0.0377, "step": 5120 }, { "epoch": 0.06, "learning_rate": 4.9413235310905373e-05, "loss": 0.1166, "step": 5140 }, { "epoch": 0.06, "learning_rate": 4.941095217981941e-05, "loss": 0.4004, "step": 5160 }, { "epoch": 0.06, "learning_rate": 4.940866904873343e-05, "loss": 0.0774, "step": 5180 }, { "epoch": 0.06, "learning_rate": 4.940638591764746e-05, "loss": 0.3109, "step": 5200 }, { "epoch": 0.06, "learning_rate": 4.940410278656149e-05, "loss": 0.2865, "step": 5220 }, { "epoch": 0.06, "learning_rate": 4.940181965547552e-05, "loss": 0.2472, "step": 5240 }, { "epoch": 0.06, "learning_rate": 4.939953652438955e-05, "loss": 0.3574, "step": 5260 }, { "epoch": 0.06, "learning_rate": 4.9397253393303574e-05, "loss": 0.1711, "step": 5280 }, { "epoch": 0.06, "learning_rate": 4.939497026221761e-05, "loss": 0.2148, "step": 5300 }, { "epoch": 0.06, "learning_rate": 4.939268713113164e-05, "loss": 0.2056, "step": 5320 }, { "epoch": 0.06, "learning_rate": 4.9390404000045663e-05, "loss": 0.1655, "step": 5340 }, { "epoch": 0.06, "learning_rate": 4.938812086895969e-05, "loss": 0.1228, "step": 5360 }, { "epoch": 0.06, "learning_rate": 4.938583773787372e-05, "loss": 0.2475, "step": 5380 }, { "epoch": 0.06, "learning_rate": 4.938355460678775e-05, "loss": 0.2276, "step": 5400 }, { "epoch": 0.06, "learning_rate": 4.938127147570178e-05, "loss": 0.1785, "step": 5420 }, { "epoch": 0.06, "learning_rate": 4.9378988344615805e-05, "loss": 0.4938, "step": 5440 }, { "epoch": 0.06, "learning_rate": 4.937670521352984e-05, "loss": 0.45, "step": 5460 }, { "epoch": 0.06, "learning_rate": 4.9374422082443864e-05, "loss": 0.1027, "step": 5480 }, { "epoch": 0.06, "learning_rate": 4.9372138951357894e-05, "loss": 0.2185, "step": 5500 }, { "epoch": 0.06, "learning_rate": 4.9369855820271924e-05, "loss": 0.1676, "step": 5520 }, { "epoch": 0.06, "learning_rate": 4.9367572689185954e-05, "loss": 0.1701, "step": 5540 }, { "epoch": 0.06, "learning_rate": 4.936528955809998e-05, "loss": 0.4243, "step": 5560 }, { "epoch": 0.06, "learning_rate": 4.9363006427014006e-05, "loss": 0.2153, "step": 5580 }, { "epoch": 0.06, "learning_rate": 4.936072329592804e-05, "loss": 0.1694, "step": 5600 }, { "epoch": 0.06, "learning_rate": 4.9358440164842065e-05, "loss": 0.1297, "step": 5620 }, { "epoch": 0.06, "learning_rate": 4.9356157033756095e-05, "loss": 0.0859, "step": 5640 }, { "epoch": 0.06, "learning_rate": 4.9353873902670125e-05, "loss": 0.2388, "step": 5660 }, { "epoch": 0.06, "learning_rate": 4.9351590771584155e-05, "loss": 0.1043, "step": 5680 }, { "epoch": 0.07, "learning_rate": 4.9349307640498184e-05, "loss": 0.2286, "step": 5700 }, { "epoch": 0.07, "learning_rate": 4.934702450941221e-05, "loss": 0.0713, "step": 5720 }, { "epoch": 0.07, "learning_rate": 4.934474137832624e-05, "loss": 0.2516, "step": 5740 }, { "epoch": 0.07, "learning_rate": 4.9342458247240266e-05, "loss": 0.1871, "step": 5760 }, { "epoch": 0.07, "learning_rate": 4.9340175116154296e-05, "loss": 0.0978, "step": 5780 }, { "epoch": 0.07, "learning_rate": 4.9337891985068326e-05, "loss": 0.2195, "step": 5800 }, { "epoch": 0.07, "learning_rate": 4.933560885398235e-05, "loss": 0.3066, "step": 5820 }, { "epoch": 0.07, "learning_rate": 4.9333325722896385e-05, "loss": 0.3537, "step": 5840 }, { "epoch": 0.07, "learning_rate": 4.933104259181041e-05, "loss": 0.2534, "step": 5860 }, { "epoch": 0.07, "learning_rate": 4.932875946072444e-05, "loss": 0.2646, "step": 5880 }, { "epoch": 0.07, "learning_rate": 4.9326476329638474e-05, "loss": 0.1812, "step": 5900 }, { "epoch": 0.07, "learning_rate": 4.93241931985525e-05, "loss": 0.3027, "step": 5920 }, { "epoch": 0.07, "learning_rate": 4.932191006746653e-05, "loss": 0.2395, "step": 5940 }, { "epoch": 0.07, "learning_rate": 4.931962693638055e-05, "loss": 0.1377, "step": 5960 }, { "epoch": 0.07, "learning_rate": 4.9317343805294586e-05, "loss": 0.4636, "step": 5980 }, { "epoch": 0.07, "learning_rate": 4.9315060674208616e-05, "loss": 0.1335, "step": 6000 }, { "epoch": 0.07, "learning_rate": 4.931277754312264e-05, "loss": 0.4473, "step": 6020 }, { "epoch": 0.07, "learning_rate": 4.931049441203667e-05, "loss": 0.1601, "step": 6040 }, { "epoch": 0.07, "learning_rate": 4.93082112809507e-05, "loss": 0.3551, "step": 6060 }, { "epoch": 0.07, "learning_rate": 4.930592814986473e-05, "loss": 0.2032, "step": 6080 }, { "epoch": 0.07, "learning_rate": 4.930364501877876e-05, "loss": 0.5486, "step": 6100 }, { "epoch": 0.07, "learning_rate": 4.930136188769278e-05, "loss": 0.1294, "step": 6120 }, { "epoch": 0.07, "learning_rate": 4.929907875660682e-05, "loss": 0.488, "step": 6140 }, { "epoch": 0.07, "learning_rate": 4.929679562552084e-05, "loss": 0.1128, "step": 6160 }, { "epoch": 0.07, "learning_rate": 4.929451249443487e-05, "loss": 0.3157, "step": 6180 }, { "epoch": 0.07, "learning_rate": 4.92922293633489e-05, "loss": 0.1536, "step": 6200 }, { "epoch": 0.07, "learning_rate": 4.928994623226293e-05, "loss": 0.4257, "step": 6220 }, { "epoch": 0.07, "learning_rate": 4.928766310117696e-05, "loss": 0.26, "step": 6240 }, { "epoch": 0.07, "learning_rate": 4.928537997009098e-05, "loss": 0.1886, "step": 6260 }, { "epoch": 0.07, "learning_rate": 4.928309683900502e-05, "loss": 0.1397, "step": 6280 }, { "epoch": 0.07, "learning_rate": 4.928081370791904e-05, "loss": 0.2481, "step": 6300 }, { "epoch": 0.07, "learning_rate": 4.927853057683307e-05, "loss": 0.2582, "step": 6320 }, { "epoch": 0.07, "learning_rate": 4.92762474457471e-05, "loss": 0.142, "step": 6340 }, { "epoch": 0.07, "learning_rate": 4.927396431466113e-05, "loss": 0.1036, "step": 6360 }, { "epoch": 0.07, "learning_rate": 4.927168118357516e-05, "loss": 0.3762, "step": 6380 }, { "epoch": 0.07, "learning_rate": 4.926939805248918e-05, "loss": 0.2052, "step": 6400 }, { "epoch": 0.07, "learning_rate": 4.926711492140321e-05, "loss": 0.2622, "step": 6420 }, { "epoch": 0.07, "learning_rate": 4.926483179031724e-05, "loss": 0.2793, "step": 6440 }, { "epoch": 0.07, "learning_rate": 4.926254865923127e-05, "loss": 0.2538, "step": 6460 }, { "epoch": 0.07, "learning_rate": 4.92602655281453e-05, "loss": 0.1243, "step": 6480 }, { "epoch": 0.07, "learning_rate": 4.9257982397059324e-05, "loss": 0.115, "step": 6500 }, { "epoch": 0.07, "learning_rate": 4.925569926597336e-05, "loss": 0.1897, "step": 6520 }, { "epoch": 0.07, "learning_rate": 4.9253416134887383e-05, "loss": 0.2655, "step": 6540 }, { "epoch": 0.07, "learning_rate": 4.925113300380141e-05, "loss": 0.5182, "step": 6560 }, { "epoch": 0.08, "learning_rate": 4.924884987271545e-05, "loss": 0.2506, "step": 6580 }, { "epoch": 0.08, "learning_rate": 4.924656674162947e-05, "loss": 0.3332, "step": 6600 }, { "epoch": 0.08, "learning_rate": 4.92442836105435e-05, "loss": 0.292, "step": 6620 }, { "epoch": 0.08, "learning_rate": 4.924200047945753e-05, "loss": 0.1046, "step": 6640 }, { "epoch": 0.08, "learning_rate": 4.923971734837156e-05, "loss": 0.1626, "step": 6660 }, { "epoch": 0.08, "learning_rate": 4.923743421728559e-05, "loss": 0.3643, "step": 6680 }, { "epoch": 0.08, "learning_rate": 4.9235151086199614e-05, "loss": 0.2524, "step": 6700 }, { "epoch": 0.08, "learning_rate": 4.9232867955113644e-05, "loss": 0.2231, "step": 6720 }, { "epoch": 0.08, "learning_rate": 4.9230584824027673e-05, "loss": 0.2231, "step": 6740 }, { "epoch": 0.08, "learning_rate": 4.92283016929417e-05, "loss": 0.1995, "step": 6760 }, { "epoch": 0.08, "learning_rate": 4.922601856185573e-05, "loss": 0.4126, "step": 6780 }, { "epoch": 0.08, "learning_rate": 4.9223735430769756e-05, "loss": 0.105, "step": 6800 }, { "epoch": 0.08, "learning_rate": 4.922145229968379e-05, "loss": 0.3149, "step": 6820 }, { "epoch": 0.08, "learning_rate": 4.9219169168597815e-05, "loss": 0.3647, "step": 6840 }, { "epoch": 0.08, "learning_rate": 4.9216886037511845e-05, "loss": 0.0917, "step": 6860 }, { "epoch": 0.08, "learning_rate": 4.9214602906425874e-05, "loss": 0.1775, "step": 6880 }, { "epoch": 0.08, "learning_rate": 4.9212319775339904e-05, "loss": 0.2351, "step": 6900 }, { "epoch": 0.08, "learning_rate": 4.9210036644253934e-05, "loss": 0.2305, "step": 6920 }, { "epoch": 0.08, "learning_rate": 4.920775351316796e-05, "loss": 0.1703, "step": 6940 }, { "epoch": 0.08, "learning_rate": 4.920547038208199e-05, "loss": 0.2927, "step": 6960 }, { "epoch": 0.08, "learning_rate": 4.9203187250996016e-05, "loss": 0.3377, "step": 6980 }, { "epoch": 0.08, "learning_rate": 4.9200904119910046e-05, "loss": 0.2706, "step": 7000 }, { "epoch": 0.08, "learning_rate": 4.9198620988824076e-05, "loss": 0.1029, "step": 7020 }, { "epoch": 0.08, "learning_rate": 4.9196337857738105e-05, "loss": 0.4802, "step": 7040 }, { "epoch": 0.08, "learning_rate": 4.9194054726652135e-05, "loss": 0.6685, "step": 7060 }, { "epoch": 0.08, "learning_rate": 4.919177159556616e-05, "loss": 0.2911, "step": 7080 }, { "epoch": 0.08, "learning_rate": 4.918948846448019e-05, "loss": 0.2216, "step": 7100 }, { "epoch": 0.08, "learning_rate": 4.9187205333394224e-05, "loss": 0.2336, "step": 7120 }, { "epoch": 0.08, "learning_rate": 4.918492220230825e-05, "loss": 0.2078, "step": 7140 }, { "epoch": 0.08, "learning_rate": 4.9182639071222277e-05, "loss": 0.3959, "step": 7160 }, { "epoch": 0.08, "learning_rate": 4.9180355940136306e-05, "loss": 0.2874, "step": 7180 }, { "epoch": 0.08, "learning_rate": 4.9178072809050336e-05, "loss": 0.4349, "step": 7200 }, { "epoch": 0.08, "learning_rate": 4.9175789677964366e-05, "loss": 0.2089, "step": 7220 }, { "epoch": 0.08, "learning_rate": 4.917350654687839e-05, "loss": 0.1043, "step": 7240 }, { "epoch": 0.08, "learning_rate": 4.9171223415792425e-05, "loss": 0.2715, "step": 7260 }, { "epoch": 0.08, "learning_rate": 4.916894028470645e-05, "loss": 0.2515, "step": 7280 }, { "epoch": 0.08, "learning_rate": 4.916665715362048e-05, "loss": 0.1364, "step": 7300 }, { "epoch": 0.08, "learning_rate": 4.916437402253451e-05, "loss": 0.1833, "step": 7320 }, { "epoch": 0.08, "learning_rate": 4.916209089144854e-05, "loss": 0.3686, "step": 7340 }, { "epoch": 0.08, "learning_rate": 4.9159807760362567e-05, "loss": 0.1231, "step": 7360 }, { "epoch": 0.08, "learning_rate": 4.915752462927659e-05, "loss": 0.3674, "step": 7380 }, { "epoch": 0.08, "learning_rate": 4.915524149819062e-05, "loss": 0.2237, "step": 7400 }, { "epoch": 0.08, "learning_rate": 4.915295836710465e-05, "loss": 0.5725, "step": 7420 }, { "epoch": 0.08, "learning_rate": 4.915067523601868e-05, "loss": 0.1593, "step": 7440 }, { "epoch": 0.09, "learning_rate": 4.914839210493271e-05, "loss": 0.155, "step": 7460 }, { "epoch": 0.09, "learning_rate": 4.914610897384674e-05, "loss": 0.3645, "step": 7480 }, { "epoch": 0.09, "learning_rate": 4.914382584276077e-05, "loss": 0.1089, "step": 7500 }, { "epoch": 0.09, "learning_rate": 4.914154271167479e-05, "loss": 0.1343, "step": 7520 }, { "epoch": 0.09, "learning_rate": 4.913925958058882e-05, "loss": 0.1967, "step": 7540 }, { "epoch": 0.09, "learning_rate": 4.913697644950285e-05, "loss": 0.0731, "step": 7560 }, { "epoch": 0.09, "learning_rate": 4.913469331841688e-05, "loss": 0.3045, "step": 7580 }, { "epoch": 0.09, "learning_rate": 4.913241018733091e-05, "loss": 0.2328, "step": 7600 }, { "epoch": 0.09, "learning_rate": 4.913012705624493e-05, "loss": 0.2044, "step": 7620 }, { "epoch": 0.09, "learning_rate": 4.912784392515897e-05, "loss": 0.1483, "step": 7640 }, { "epoch": 0.09, "learning_rate": 4.912556079407299e-05, "loss": 0.1499, "step": 7660 }, { "epoch": 0.09, "learning_rate": 4.912327766298702e-05, "loss": 0.258, "step": 7680 }, { "epoch": 0.09, "learning_rate": 4.912099453190105e-05, "loss": 0.2986, "step": 7700 }, { "epoch": 0.09, "learning_rate": 4.911871140081508e-05, "loss": 0.2889, "step": 7720 }, { "epoch": 0.09, "learning_rate": 4.911642826972911e-05, "loss": 0.2169, "step": 7740 }, { "epoch": 0.09, "learning_rate": 4.911414513864313e-05, "loss": 0.2166, "step": 7760 }, { "epoch": 0.09, "learning_rate": 4.911186200755717e-05, "loss": 0.1061, "step": 7780 }, { "epoch": 0.09, "learning_rate": 4.91095788764712e-05, "loss": 0.0693, "step": 7800 }, { "epoch": 0.09, "learning_rate": 4.910729574538522e-05, "loss": 0.1802, "step": 7820 }, { "epoch": 0.09, "learning_rate": 4.910501261429925e-05, "loss": 0.1243, "step": 7840 }, { "epoch": 0.09, "learning_rate": 4.910272948321328e-05, "loss": 0.1884, "step": 7860 }, { "epoch": 0.09, "learning_rate": 4.910044635212731e-05, "loss": 0.0823, "step": 7880 }, { "epoch": 0.09, "learning_rate": 4.909816322104134e-05, "loss": 0.2473, "step": 7900 }, { "epoch": 0.09, "learning_rate": 4.9095880089955364e-05, "loss": 0.2563, "step": 7920 }, { "epoch": 0.09, "learning_rate": 4.90935969588694e-05, "loss": 0.1227, "step": 7940 }, { "epoch": 0.09, "learning_rate": 4.909131382778342e-05, "loss": 0.436, "step": 7960 }, { "epoch": 0.09, "learning_rate": 4.908903069669745e-05, "loss": 0.241, "step": 7980 }, { "epoch": 0.09, "learning_rate": 4.908674756561148e-05, "loss": 0.1888, "step": 8000 }, { "epoch": 0.09, "learning_rate": 4.908446443452551e-05, "loss": 0.1191, "step": 8020 }, { "epoch": 0.09, "learning_rate": 4.908218130343954e-05, "loss": 0.38, "step": 8040 }, { "epoch": 0.09, "learning_rate": 4.9079898172353565e-05, "loss": 0.1595, "step": 8060 }, { "epoch": 0.09, "learning_rate": 4.90776150412676e-05, "loss": 0.4135, "step": 8080 }, { "epoch": 0.09, "learning_rate": 4.9075331910181624e-05, "loss": 0.2936, "step": 8100 }, { "epoch": 0.09, "learning_rate": 4.9073048779095654e-05, "loss": 0.3264, "step": 8120 }, { "epoch": 0.09, "learning_rate": 4.9070765648009684e-05, "loss": 0.0972, "step": 8140 }, { "epoch": 0.09, "learning_rate": 4.906848251692371e-05, "loss": 0.2992, "step": 8160 }, { "epoch": 0.09, "learning_rate": 4.906619938583774e-05, "loss": 0.2309, "step": 8180 }, { "epoch": 0.09, "learning_rate": 4.9063916254751766e-05, "loss": 0.1703, "step": 8200 }, { "epoch": 0.09, "learning_rate": 4.9061633123665795e-05, "loss": 0.1534, "step": 8220 }, { "epoch": 0.09, "learning_rate": 4.9059349992579825e-05, "loss": 0.1855, "step": 8240 }, { "epoch": 0.09, "learning_rate": 4.9057066861493855e-05, "loss": 0.1117, "step": 8260 }, { "epoch": 0.09, "learning_rate": 4.9054783730407885e-05, "loss": 0.1985, "step": 8280 }, { "epoch": 0.09, "learning_rate": 4.905250059932191e-05, "loss": 0.5225, "step": 8300 }, { "epoch": 0.09, "learning_rate": 4.9050217468235944e-05, "loss": 0.1704, "step": 8320 }, { "epoch": 0.1, "learning_rate": 4.904793433714997e-05, "loss": 0.2136, "step": 8340 }, { "epoch": 0.1, "learning_rate": 4.9045651206063996e-05, "loss": 0.3556, "step": 8360 }, { "epoch": 0.1, "learning_rate": 4.904336807497803e-05, "loss": 0.2737, "step": 8380 }, { "epoch": 0.1, "learning_rate": 4.9041084943892056e-05, "loss": 0.2719, "step": 8400 }, { "epoch": 0.1, "learning_rate": 4.9038801812806086e-05, "loss": 0.2605, "step": 8420 }, { "epoch": 0.1, "learning_rate": 4.903651868172011e-05, "loss": 0.1539, "step": 8440 }, { "epoch": 0.1, "learning_rate": 4.9034235550634145e-05, "loss": 0.1071, "step": 8460 }, { "epoch": 0.1, "learning_rate": 4.9031952419548175e-05, "loss": 0.12, "step": 8480 }, { "epoch": 0.1, "learning_rate": 4.90296692884622e-05, "loss": 0.1599, "step": 8500 }, { "epoch": 0.1, "learning_rate": 4.902738615737623e-05, "loss": 0.2021, "step": 8520 }, { "epoch": 0.1, "learning_rate": 4.902510302629026e-05, "loss": 0.3034, "step": 8540 }, { "epoch": 0.1, "learning_rate": 4.9022819895204287e-05, "loss": 0.3612, "step": 8560 }, { "epoch": 0.1, "learning_rate": 4.9020536764118316e-05, "loss": 0.3148, "step": 8580 }, { "epoch": 0.1, "learning_rate": 4.901825363303234e-05, "loss": 0.253, "step": 8600 }, { "epoch": 0.1, "learning_rate": 4.9015970501946376e-05, "loss": 0.1389, "step": 8620 }, { "epoch": 0.1, "learning_rate": 4.90136873708604e-05, "loss": 0.115, "step": 8640 }, { "epoch": 0.1, "learning_rate": 4.901140423977443e-05, "loss": 0.5993, "step": 8660 }, { "epoch": 0.1, "learning_rate": 4.900912110868846e-05, "loss": 0.3669, "step": 8680 }, { "epoch": 0.1, "learning_rate": 4.900683797760249e-05, "loss": 0.8752, "step": 8700 }, { "epoch": 0.1, "learning_rate": 4.900455484651652e-05, "loss": 0.1658, "step": 8720 }, { "epoch": 0.1, "learning_rate": 4.900227171543054e-05, "loss": 0.1777, "step": 8740 }, { "epoch": 0.1, "learning_rate": 4.8999988584344577e-05, "loss": 0.3001, "step": 8760 }, { "epoch": 0.1, "learning_rate": 4.89977054532586e-05, "loss": 0.3318, "step": 8780 }, { "epoch": 0.1, "learning_rate": 4.899542232217263e-05, "loss": 0.3603, "step": 8800 }, { "epoch": 0.1, "learning_rate": 4.899313919108666e-05, "loss": 0.1865, "step": 8820 }, { "epoch": 0.1, "learning_rate": 4.899085606000069e-05, "loss": 0.2718, "step": 8840 }, { "epoch": 0.1, "learning_rate": 4.898857292891472e-05, "loss": 0.3973, "step": 8860 }, { "epoch": 0.1, "learning_rate": 4.898628979782874e-05, "loss": 0.2394, "step": 8880 }, { "epoch": 0.1, "learning_rate": 4.898400666674277e-05, "loss": 0.1857, "step": 8900 }, { "epoch": 0.1, "learning_rate": 4.89817235356568e-05, "loss": 0.1308, "step": 8920 }, { "epoch": 0.1, "learning_rate": 4.897944040457083e-05, "loss": 0.2541, "step": 8940 }, { "epoch": 0.1, "learning_rate": 4.897715727348486e-05, "loss": 0.2427, "step": 8960 }, { "epoch": 0.1, "learning_rate": 4.897487414239888e-05, "loss": 0.1641, "step": 8980 }, { "epoch": 0.1, "learning_rate": 4.897259101131292e-05, "loss": 0.4682, "step": 9000 }, { "epoch": 0.1, "learning_rate": 4.897030788022694e-05, "loss": 0.1832, "step": 9020 }, { "epoch": 0.1, "learning_rate": 4.896802474914097e-05, "loss": 0.111, "step": 9040 }, { "epoch": 0.1, "learning_rate": 4.896574161805501e-05, "loss": 0.2369, "step": 9060 }, { "epoch": 0.1, "learning_rate": 4.896345848696903e-05, "loss": 0.0907, "step": 9080 }, { "epoch": 0.1, "learning_rate": 4.896117535588306e-05, "loss": 0.2273, "step": 9100 }, { "epoch": 0.1, "learning_rate": 4.8958892224797084e-05, "loss": 0.2367, "step": 9120 }, { "epoch": 0.1, "learning_rate": 4.895660909371112e-05, "loss": 0.2691, "step": 9140 }, { "epoch": 0.1, "learning_rate": 4.895432596262515e-05, "loss": 0.2016, "step": 9160 }, { "epoch": 0.1, "learning_rate": 4.895204283153917e-05, "loss": 0.5513, "step": 9180 }, { "epoch": 0.11, "learning_rate": 4.89497597004532e-05, "loss": 0.3171, "step": 9200 }, { "epoch": 0.11, "learning_rate": 4.894747656936723e-05, "loss": 0.1473, "step": 9220 }, { "epoch": 0.11, "learning_rate": 4.894519343828126e-05, "loss": 0.2307, "step": 9240 }, { "epoch": 0.11, "learning_rate": 4.894291030719529e-05, "loss": 0.1461, "step": 9260 }, { "epoch": 0.11, "learning_rate": 4.8940627176109314e-05, "loss": 0.3376, "step": 9280 }, { "epoch": 0.11, "learning_rate": 4.893834404502335e-05, "loss": 0.3701, "step": 9300 }, { "epoch": 0.11, "learning_rate": 4.8936060913937374e-05, "loss": 0.1637, "step": 9320 }, { "epoch": 0.11, "learning_rate": 4.8933777782851404e-05, "loss": 0.3087, "step": 9340 }, { "epoch": 0.11, "learning_rate": 4.893149465176543e-05, "loss": 0.3708, "step": 9360 }, { "epoch": 0.11, "learning_rate": 4.892921152067946e-05, "loss": 0.4756, "step": 9380 }, { "epoch": 0.11, "learning_rate": 4.892692838959349e-05, "loss": 0.2095, "step": 9400 }, { "epoch": 0.11, "learning_rate": 4.8924645258507515e-05, "loss": 0.1986, "step": 9420 }, { "epoch": 0.11, "learning_rate": 4.892236212742155e-05, "loss": 0.2071, "step": 9440 }, { "epoch": 0.11, "learning_rate": 4.8920078996335575e-05, "loss": 0.2074, "step": 9460 }, { "epoch": 0.11, "learning_rate": 4.8917795865249605e-05, "loss": 0.1595, "step": 9480 }, { "epoch": 0.11, "learning_rate": 4.8915512734163634e-05, "loss": 0.354, "step": 9500 }, { "epoch": 0.11, "learning_rate": 4.8913229603077664e-05, "loss": 0.1875, "step": 9520 }, { "epoch": 0.11, "learning_rate": 4.8910946471991694e-05, "loss": 0.2602, "step": 9540 }, { "epoch": 0.11, "learning_rate": 4.8908663340905716e-05, "loss": 0.2816, "step": 9560 }, { "epoch": 0.11, "learning_rate": 4.8906380209819746e-05, "loss": 0.1041, "step": 9580 }, { "epoch": 0.11, "learning_rate": 4.890409707873378e-05, "loss": 0.1686, "step": 9600 }, { "epoch": 0.11, "learning_rate": 4.8901813947647806e-05, "loss": 0.1633, "step": 9620 }, { "epoch": 0.11, "learning_rate": 4.8899530816561835e-05, "loss": 0.1938, "step": 9640 }, { "epoch": 0.11, "learning_rate": 4.8897247685475865e-05, "loss": 0.2541, "step": 9660 }, { "epoch": 0.11, "learning_rate": 4.8894964554389895e-05, "loss": 0.129, "step": 9680 }, { "epoch": 0.11, "learning_rate": 4.8892681423303924e-05, "loss": 0.2427, "step": 9700 }, { "epoch": 0.11, "learning_rate": 4.889039829221795e-05, "loss": 0.3241, "step": 9720 }, { "epoch": 0.11, "learning_rate": 4.8888115161131984e-05, "loss": 0.1931, "step": 9740 }, { "epoch": 0.11, "learning_rate": 4.8885832030046007e-05, "loss": 0.3126, "step": 9760 }, { "epoch": 0.11, "learning_rate": 4.8883548898960036e-05, "loss": 0.0473, "step": 9780 }, { "epoch": 0.11, "learning_rate": 4.8881265767874066e-05, "loss": 0.2712, "step": 9800 }, { "epoch": 0.11, "learning_rate": 4.8878982636788096e-05, "loss": 0.3779, "step": 9820 }, { "epoch": 0.11, "learning_rate": 4.8876699505702125e-05, "loss": 0.1784, "step": 9840 }, { "epoch": 0.11, "learning_rate": 4.887441637461615e-05, "loss": 0.1524, "step": 9860 }, { "epoch": 0.11, "learning_rate": 4.887213324353018e-05, "loss": 0.2573, "step": 9880 }, { "epoch": 0.11, "learning_rate": 4.886985011244421e-05, "loss": 0.5011, "step": 9900 }, { "epoch": 0.11, "learning_rate": 4.886756698135824e-05, "loss": 0.1882, "step": 9920 }, { "epoch": 0.11, "learning_rate": 4.886528385027227e-05, "loss": 0.2276, "step": 9940 }, { "epoch": 0.11, "learning_rate": 4.8863000719186297e-05, "loss": 0.3534, "step": 9960 }, { "epoch": 0.11, "learning_rate": 4.8860717588100326e-05, "loss": 0.2395, "step": 9980 }, { "epoch": 0.11, "learning_rate": 4.885843445701435e-05, "loss": 0.2236, "step": 10000 }, { "epoch": 0.11, "learning_rate": 4.885615132592838e-05, "loss": 0.1945, "step": 10020 }, { "epoch": 0.11, "learning_rate": 4.885386819484241e-05, "loss": 0.2038, "step": 10040 }, { "epoch": 0.11, "learning_rate": 4.885158506375644e-05, "loss": 0.2955, "step": 10060 }, { "epoch": 0.12, "learning_rate": 4.884930193267047e-05, "loss": 0.1873, "step": 10080 }, { "epoch": 0.12, "learning_rate": 4.884701880158449e-05, "loss": 0.3311, "step": 10100 }, { "epoch": 0.12, "learning_rate": 4.884473567049853e-05, "loss": 0.7022, "step": 10120 }, { "epoch": 0.12, "learning_rate": 4.884245253941255e-05, "loss": 0.1717, "step": 10140 }, { "epoch": 0.12, "learning_rate": 4.884016940832658e-05, "loss": 0.2985, "step": 10160 }, { "epoch": 0.12, "learning_rate": 4.883788627724061e-05, "loss": 0.1803, "step": 10180 }, { "epoch": 0.12, "learning_rate": 4.883560314615464e-05, "loss": 0.1635, "step": 10200 }, { "epoch": 0.12, "learning_rate": 4.883332001506867e-05, "loss": 0.2478, "step": 10220 }, { "epoch": 0.12, "learning_rate": 4.883103688398269e-05, "loss": 0.1534, "step": 10240 }, { "epoch": 0.12, "learning_rate": 4.882875375289673e-05, "loss": 0.2337, "step": 10260 }, { "epoch": 0.12, "learning_rate": 4.882647062181076e-05, "loss": 0.1705, "step": 10280 }, { "epoch": 0.12, "learning_rate": 4.882418749072478e-05, "loss": 0.1549, "step": 10300 }, { "epoch": 0.12, "learning_rate": 4.882190435963881e-05, "loss": 0.3257, "step": 10320 }, { "epoch": 0.12, "learning_rate": 4.881962122855284e-05, "loss": 0.366, "step": 10340 }, { "epoch": 0.12, "learning_rate": 4.881733809746687e-05, "loss": 0.1398, "step": 10360 }, { "epoch": 0.12, "learning_rate": 4.88150549663809e-05, "loss": 0.1249, "step": 10380 }, { "epoch": 0.12, "learning_rate": 4.881277183529492e-05, "loss": 0.1842, "step": 10400 }, { "epoch": 0.12, "learning_rate": 4.881048870420896e-05, "loss": 0.41, "step": 10420 }, { "epoch": 0.12, "learning_rate": 4.880820557312298e-05, "loss": 0.3148, "step": 10440 }, { "epoch": 0.12, "learning_rate": 4.880592244203701e-05, "loss": 0.3722, "step": 10460 }, { "epoch": 0.12, "learning_rate": 4.880363931095104e-05, "loss": 0.8704, "step": 10480 }, { "epoch": 0.12, "learning_rate": 4.880135617986507e-05, "loss": 0.3074, "step": 10500 }, { "epoch": 0.12, "learning_rate": 4.87990730487791e-05, "loss": 0.1445, "step": 10520 }, { "epoch": 0.12, "learning_rate": 4.8796789917693124e-05, "loss": 0.1543, "step": 10540 }, { "epoch": 0.12, "learning_rate": 4.879450678660716e-05, "loss": 0.2525, "step": 10560 }, { "epoch": 0.12, "learning_rate": 4.879222365552118e-05, "loss": 0.2345, "step": 10580 }, { "epoch": 0.12, "learning_rate": 4.878994052443521e-05, "loss": 0.3308, "step": 10600 }, { "epoch": 0.12, "learning_rate": 4.878765739334924e-05, "loss": 0.1445, "step": 10620 }, { "epoch": 0.12, "learning_rate": 4.878537426226327e-05, "loss": 0.1212, "step": 10640 }, { "epoch": 0.12, "learning_rate": 4.87830911311773e-05, "loss": 0.1523, "step": 10660 }, { "epoch": 0.12, "learning_rate": 4.8780808000091325e-05, "loss": 0.1546, "step": 10680 }, { "epoch": 0.12, "learning_rate": 4.8778524869005354e-05, "loss": 0.2133, "step": 10700 }, { "epoch": 0.12, "learning_rate": 4.8776241737919384e-05, "loss": 0.3438, "step": 10720 }, { "epoch": 0.12, "learning_rate": 4.8773958606833414e-05, "loss": 0.2877, "step": 10740 }, { "epoch": 0.12, "learning_rate": 4.877167547574744e-05, "loss": 0.5216, "step": 10760 }, { "epoch": 0.12, "learning_rate": 4.8769392344661466e-05, "loss": 0.1525, "step": 10780 }, { "epoch": 0.12, "learning_rate": 4.87671092135755e-05, "loss": 0.128, "step": 10800 }, { "epoch": 0.12, "learning_rate": 4.8764826082489526e-05, "loss": 0.2528, "step": 10820 }, { "epoch": 0.12, "learning_rate": 4.8762542951403555e-05, "loss": 0.5395, "step": 10840 }, { "epoch": 0.12, "learning_rate": 4.8760259820317585e-05, "loss": 0.4077, "step": 10860 }, { "epoch": 0.12, "learning_rate": 4.8757976689231615e-05, "loss": 0.2215, "step": 10880 }, { "epoch": 0.12, "learning_rate": 4.8755693558145644e-05, "loss": 0.148, "step": 10900 }, { "epoch": 0.12, "learning_rate": 4.875341042705967e-05, "loss": 0.0478, "step": 10920 }, { "epoch": 0.12, "learning_rate": 4.8751127295973704e-05, "loss": 0.0957, "step": 10940 }, { "epoch": 0.13, "learning_rate": 4.874884416488773e-05, "loss": 0.1211, "step": 10960 }, { "epoch": 0.13, "learning_rate": 4.8746561033801756e-05, "loss": 0.2425, "step": 10980 }, { "epoch": 0.13, "learning_rate": 4.8744277902715786e-05, "loss": 0.0478, "step": 11000 }, { "epoch": 0.13, "learning_rate": 4.8741994771629816e-05, "loss": 0.3812, "step": 11020 }, { "epoch": 0.13, "learning_rate": 4.8739711640543845e-05, "loss": 0.1, "step": 11040 }, { "epoch": 0.13, "learning_rate": 4.8737428509457875e-05, "loss": 0.3801, "step": 11060 }, { "epoch": 0.13, "learning_rate": 4.87351453783719e-05, "loss": 0.108, "step": 11080 }, { "epoch": 0.13, "learning_rate": 4.8732862247285934e-05, "loss": 0.1345, "step": 11100 }, { "epoch": 0.13, "learning_rate": 4.873057911619996e-05, "loss": 0.3411, "step": 11120 }, { "epoch": 0.13, "learning_rate": 4.872829598511399e-05, "loss": 0.1611, "step": 11140 }, { "epoch": 0.13, "learning_rate": 4.8726012854028017e-05, "loss": 0.3871, "step": 11160 }, { "epoch": 0.13, "learning_rate": 4.8723729722942046e-05, "loss": 0.1451, "step": 11180 }, { "epoch": 0.13, "learning_rate": 4.8721446591856076e-05, "loss": 0.3003, "step": 11200 }, { "epoch": 0.13, "learning_rate": 4.87191634607701e-05, "loss": 0.0634, "step": 11220 }, { "epoch": 0.13, "learning_rate": 4.8716880329684135e-05, "loss": 0.181, "step": 11240 }, { "epoch": 0.13, "learning_rate": 4.871459719859816e-05, "loss": 0.0783, "step": 11260 }, { "epoch": 0.13, "learning_rate": 4.871231406751219e-05, "loss": 0.3145, "step": 11280 }, { "epoch": 0.13, "learning_rate": 4.871003093642622e-05, "loss": 0.6122, "step": 11300 }, { "epoch": 0.13, "learning_rate": 4.870774780534025e-05, "loss": 0.168, "step": 11320 }, { "epoch": 0.13, "learning_rate": 4.870546467425428e-05, "loss": 0.2246, "step": 11340 }, { "epoch": 0.13, "learning_rate": 4.87031815431683e-05, "loss": 0.2345, "step": 11360 }, { "epoch": 0.13, "learning_rate": 4.870089841208233e-05, "loss": 0.1764, "step": 11380 }, { "epoch": 0.13, "learning_rate": 4.869861528099636e-05, "loss": 0.3675, "step": 11400 }, { "epoch": 0.13, "learning_rate": 4.869633214991039e-05, "loss": 0.2518, "step": 11420 }, { "epoch": 0.13, "learning_rate": 4.869404901882442e-05, "loss": 0.1239, "step": 11440 }, { "epoch": 0.13, "learning_rate": 4.869176588773844e-05, "loss": 0.2104, "step": 11460 }, { "epoch": 0.13, "learning_rate": 4.868948275665248e-05, "loss": 0.204, "step": 11480 }, { "epoch": 0.13, "learning_rate": 4.86871996255665e-05, "loss": 0.252, "step": 11500 }, { "epoch": 0.13, "learning_rate": 4.868491649448053e-05, "loss": 0.2934, "step": 11520 }, { "epoch": 0.13, "learning_rate": 4.868263336339457e-05, "loss": 0.1357, "step": 11540 }, { "epoch": 0.13, "learning_rate": 4.868035023230859e-05, "loss": 0.3304, "step": 11560 }, { "epoch": 0.13, "learning_rate": 4.867806710122262e-05, "loss": 0.3659, "step": 11580 }, { "epoch": 0.13, "learning_rate": 4.867578397013664e-05, "loss": 0.2262, "step": 11600 }, { "epoch": 0.13, "learning_rate": 4.867350083905068e-05, "loss": 0.1561, "step": 11620 }, { "epoch": 0.13, "learning_rate": 4.867121770796471e-05, "loss": 0.1338, "step": 11640 }, { "epoch": 0.13, "learning_rate": 4.866893457687873e-05, "loss": 0.4416, "step": 11660 }, { "epoch": 0.13, "learning_rate": 4.866665144579276e-05, "loss": 0.2584, "step": 11680 }, { "epoch": 0.13, "learning_rate": 4.866436831470679e-05, "loss": 0.1846, "step": 11700 }, { "epoch": 0.13, "learning_rate": 4.866208518362082e-05, "loss": 0.3098, "step": 11720 }, { "epoch": 0.13, "learning_rate": 4.865980205253485e-05, "loss": 0.216, "step": 11740 }, { "epoch": 0.13, "learning_rate": 4.865751892144887e-05, "loss": 0.1826, "step": 11760 }, { "epoch": 0.13, "learning_rate": 4.865523579036291e-05, "loss": 0.2064, "step": 11780 }, { "epoch": 0.13, "learning_rate": 4.865295265927693e-05, "loss": 0.1948, "step": 11800 }, { "epoch": 0.13, "learning_rate": 4.865066952819096e-05, "loss": 0.1946, "step": 11820 }, { "epoch": 0.14, "learning_rate": 4.864838639710499e-05, "loss": 0.1117, "step": 11840 }, { "epoch": 0.14, "learning_rate": 4.864610326601902e-05, "loss": 0.0852, "step": 11860 }, { "epoch": 0.14, "learning_rate": 4.864382013493305e-05, "loss": 0.3345, "step": 11880 }, { "epoch": 0.14, "learning_rate": 4.8641537003847074e-05, "loss": 0.2607, "step": 11900 }, { "epoch": 0.14, "learning_rate": 4.863925387276111e-05, "loss": 0.1999, "step": 11920 }, { "epoch": 0.14, "learning_rate": 4.8636970741675134e-05, "loss": 0.0824, "step": 11940 }, { "epoch": 0.14, "learning_rate": 4.863468761058916e-05, "loss": 0.0885, "step": 11960 }, { "epoch": 0.14, "learning_rate": 4.863240447950319e-05, "loss": 0.2905, "step": 11980 }, { "epoch": 0.14, "learning_rate": 4.863012134841722e-05, "loss": 0.1862, "step": 12000 }, { "epoch": 0.14, "learning_rate": 4.862783821733125e-05, "loss": 0.2294, "step": 12020 }, { "epoch": 0.14, "learning_rate": 4.8625555086245275e-05, "loss": 0.2212, "step": 12040 }, { "epoch": 0.14, "learning_rate": 4.8623271955159305e-05, "loss": 0.4834, "step": 12060 }, { "epoch": 0.14, "learning_rate": 4.862098882407334e-05, "loss": 0.2955, "step": 12080 }, { "epoch": 0.14, "learning_rate": 4.8618705692987364e-05, "loss": 0.2482, "step": 12100 }, { "epoch": 0.14, "learning_rate": 4.8616422561901394e-05, "loss": 0.0969, "step": 12120 }, { "epoch": 0.14, "learning_rate": 4.8614139430815424e-05, "loss": 0.1488, "step": 12140 }, { "epoch": 0.14, "learning_rate": 4.861185629972945e-05, "loss": 0.5339, "step": 12160 }, { "epoch": 0.14, "learning_rate": 4.860957316864348e-05, "loss": 0.5189, "step": 12180 }, { "epoch": 0.14, "learning_rate": 4.8607290037557506e-05, "loss": 0.3042, "step": 12200 }, { "epoch": 0.14, "learning_rate": 4.860500690647154e-05, "loss": 0.2176, "step": 12220 }, { "epoch": 0.14, "learning_rate": 4.8602723775385565e-05, "loss": 0.1297, "step": 12240 }, { "epoch": 0.14, "learning_rate": 4.8600440644299595e-05, "loss": 0.2511, "step": 12260 }, { "epoch": 0.14, "learning_rate": 4.8598157513213625e-05, "loss": 0.2656, "step": 12280 }, { "epoch": 0.14, "learning_rate": 4.8595874382127654e-05, "loss": 0.1563, "step": 12300 }, { "epoch": 0.14, "learning_rate": 4.8593591251041684e-05, "loss": 0.1708, "step": 12320 }, { "epoch": 0.14, "learning_rate": 4.859130811995571e-05, "loss": 0.1062, "step": 12340 }, { "epoch": 0.14, "learning_rate": 4.8589024988869737e-05, "loss": 0.1796, "step": 12360 }, { "epoch": 0.14, "learning_rate": 4.8586741857783766e-05, "loss": 0.3131, "step": 12380 }, { "epoch": 0.14, "learning_rate": 4.8584458726697796e-05, "loss": 0.1894, "step": 12400 }, { "epoch": 0.14, "learning_rate": 4.8582175595611826e-05, "loss": 0.0882, "step": 12420 }, { "epoch": 0.14, "learning_rate": 4.8579892464525855e-05, "loss": 0.1958, "step": 12440 }, { "epoch": 0.14, "learning_rate": 4.8577609333439885e-05, "loss": 0.0885, "step": 12460 }, { "epoch": 0.14, "learning_rate": 4.857532620235391e-05, "loss": 0.1362, "step": 12480 }, { "epoch": 0.14, "learning_rate": 4.857304307126794e-05, "loss": 0.3417, "step": 12500 }, { "epoch": 0.14, "learning_rate": 4.857075994018197e-05, "loss": 0.1762, "step": 12520 }, { "epoch": 0.14, "learning_rate": 4.8568476809096e-05, "loss": 0.2546, "step": 12540 }, { "epoch": 0.14, "learning_rate": 4.8566193678010027e-05, "loss": 0.1303, "step": 12560 }, { "epoch": 0.14, "learning_rate": 4.856391054692405e-05, "loss": 0.1058, "step": 12580 }, { "epoch": 0.14, "learning_rate": 4.8561627415838086e-05, "loss": 0.1915, "step": 12600 }, { "epoch": 0.14, "learning_rate": 4.855934428475211e-05, "loss": 0.6457, "step": 12620 }, { "epoch": 0.14, "learning_rate": 4.855706115366614e-05, "loss": 0.2569, "step": 12640 }, { "epoch": 0.14, "learning_rate": 4.855477802258017e-05, "loss": 0.25, "step": 12660 }, { "epoch": 0.14, "learning_rate": 4.85524948914942e-05, "loss": 0.1808, "step": 12680 }, { "epoch": 0.14, "learning_rate": 4.855021176040823e-05, "loss": 0.1907, "step": 12700 }, { "epoch": 0.15, "learning_rate": 4.854792862932225e-05, "loss": 0.1927, "step": 12720 }, { "epoch": 0.15, "learning_rate": 4.854564549823629e-05, "loss": 0.0954, "step": 12740 }, { "epoch": 0.15, "learning_rate": 4.854336236715032e-05, "loss": 0.3867, "step": 12760 }, { "epoch": 0.15, "learning_rate": 4.854107923606434e-05, "loss": 0.1319, "step": 12780 }, { "epoch": 0.15, "learning_rate": 4.853879610497837e-05, "loss": 0.431, "step": 12800 }, { "epoch": 0.15, "learning_rate": 4.85365129738924e-05, "loss": 0.359, "step": 12820 }, { "epoch": 0.15, "learning_rate": 4.853422984280643e-05, "loss": 0.3174, "step": 12840 }, { "epoch": 0.15, "learning_rate": 4.853194671172046e-05, "loss": 0.2507, "step": 12860 }, { "epoch": 0.15, "learning_rate": 4.852966358063448e-05, "loss": 0.1217, "step": 12880 }, { "epoch": 0.15, "learning_rate": 4.852738044954852e-05, "loss": 0.2047, "step": 12900 }, { "epoch": 0.15, "learning_rate": 4.852509731846254e-05, "loss": 0.397, "step": 12920 }, { "epoch": 0.15, "learning_rate": 4.852281418737657e-05, "loss": 0.3172, "step": 12940 }, { "epoch": 0.15, "learning_rate": 4.85205310562906e-05, "loss": 0.104, "step": 12960 }, { "epoch": 0.15, "learning_rate": 4.851824792520463e-05, "loss": 0.4485, "step": 12980 }, { "epoch": 0.15, "learning_rate": 4.851596479411866e-05, "loss": 0.1551, "step": 13000 }, { "epoch": 0.15, "learning_rate": 4.851368166303268e-05, "loss": 0.2541, "step": 13020 }, { "epoch": 0.15, "learning_rate": 4.851139853194672e-05, "loss": 0.2653, "step": 13040 }, { "epoch": 0.15, "learning_rate": 4.850911540086074e-05, "loss": 0.2688, "step": 13060 }, { "epoch": 0.15, "learning_rate": 4.850683226977477e-05, "loss": 0.5488, "step": 13080 }, { "epoch": 0.15, "learning_rate": 4.85045491386888e-05, "loss": 0.0899, "step": 13100 }, { "epoch": 0.15, "learning_rate": 4.850226600760283e-05, "loss": 0.5316, "step": 13120 }, { "epoch": 0.15, "learning_rate": 4.849998287651686e-05, "loss": 0.0923, "step": 13140 }, { "epoch": 0.15, "learning_rate": 4.849769974543088e-05, "loss": 0.202, "step": 13160 }, { "epoch": 0.15, "learning_rate": 4.849541661434491e-05, "loss": 0.2372, "step": 13180 }, { "epoch": 0.15, "learning_rate": 4.849313348325894e-05, "loss": 0.106, "step": 13200 }, { "epoch": 0.15, "learning_rate": 4.849085035217297e-05, "loss": 0.3648, "step": 13220 }, { "epoch": 0.15, "learning_rate": 4.8488567221087e-05, "loss": 0.1676, "step": 13240 }, { "epoch": 0.15, "learning_rate": 4.8486284090001025e-05, "loss": 0.2009, "step": 13260 }, { "epoch": 0.15, "learning_rate": 4.848400095891506e-05, "loss": 0.2124, "step": 13280 }, { "epoch": 0.15, "learning_rate": 4.8481717827829084e-05, "loss": 0.3824, "step": 13300 }, { "epoch": 0.15, "learning_rate": 4.8479434696743114e-05, "loss": 0.1595, "step": 13320 }, { "epoch": 0.15, "learning_rate": 4.8477151565657144e-05, "loss": 0.2819, "step": 13340 }, { "epoch": 0.15, "learning_rate": 4.847486843457117e-05, "loss": 0.2054, "step": 13360 }, { "epoch": 0.15, "learning_rate": 4.84725853034852e-05, "loss": 0.2783, "step": 13380 }, { "epoch": 0.15, "learning_rate": 4.8470302172399226e-05, "loss": 0.2557, "step": 13400 }, { "epoch": 0.15, "learning_rate": 4.846801904131326e-05, "loss": 0.2069, "step": 13420 }, { "epoch": 0.15, "learning_rate": 4.846573591022729e-05, "loss": 0.2322, "step": 13440 }, { "epoch": 0.15, "learning_rate": 4.8463452779141315e-05, "loss": 0.0841, "step": 13460 }, { "epoch": 0.15, "learning_rate": 4.8461169648055345e-05, "loss": 0.4648, "step": 13480 }, { "epoch": 0.15, "learning_rate": 4.8458886516969374e-05, "loss": 0.2695, "step": 13500 }, { "epoch": 0.15, "learning_rate": 4.8456603385883404e-05, "loss": 0.2124, "step": 13520 }, { "epoch": 0.15, "learning_rate": 4.8454320254797434e-05, "loss": 0.2354, "step": 13540 }, { "epoch": 0.15, "learning_rate": 4.8452037123711457e-05, "loss": 0.6906, "step": 13560 }, { "epoch": 0.16, "learning_rate": 4.844975399262549e-05, "loss": 0.3555, "step": 13580 }, { "epoch": 0.16, "learning_rate": 4.8447470861539516e-05, "loss": 0.3433, "step": 13600 }, { "epoch": 0.16, "learning_rate": 4.8445187730453546e-05, "loss": 0.2153, "step": 13620 }, { "epoch": 0.16, "learning_rate": 4.8442904599367575e-05, "loss": 0.3262, "step": 13640 }, { "epoch": 0.16, "learning_rate": 4.8440621468281605e-05, "loss": 0.362, "step": 13660 }, { "epoch": 0.16, "learning_rate": 4.8438338337195635e-05, "loss": 0.1669, "step": 13680 }, { "epoch": 0.16, "learning_rate": 4.843605520610966e-05, "loss": 0.1521, "step": 13700 }, { "epoch": 0.16, "learning_rate": 4.8433772075023694e-05, "loss": 0.1775, "step": 13720 }, { "epoch": 0.16, "learning_rate": 4.843148894393772e-05, "loss": 0.4839, "step": 13740 }, { "epoch": 0.16, "learning_rate": 4.8429205812851747e-05, "loss": 0.1786, "step": 13760 }, { "epoch": 0.16, "learning_rate": 4.8426922681765776e-05, "loss": 0.3235, "step": 13780 }, { "epoch": 0.16, "learning_rate": 4.8424639550679806e-05, "loss": 0.303, "step": 13800 }, { "epoch": 0.16, "learning_rate": 4.8422356419593836e-05, "loss": 0.1942, "step": 13820 }, { "epoch": 0.16, "learning_rate": 4.842007328850786e-05, "loss": 0.2128, "step": 13840 }, { "epoch": 0.16, "learning_rate": 4.841779015742189e-05, "loss": 0.2079, "step": 13860 }, { "epoch": 0.16, "learning_rate": 4.841550702633592e-05, "loss": 0.2866, "step": 13880 }, { "epoch": 0.16, "learning_rate": 4.841322389524995e-05, "loss": 0.4447, "step": 13900 }, { "epoch": 0.16, "learning_rate": 4.841094076416398e-05, "loss": 0.2709, "step": 13920 }, { "epoch": 0.16, "learning_rate": 4.8408657633078e-05, "loss": 0.1317, "step": 13940 }, { "epoch": 0.16, "learning_rate": 4.840637450199204e-05, "loss": 0.2079, "step": 13960 }, { "epoch": 0.16, "learning_rate": 4.840409137090606e-05, "loss": 0.2531, "step": 13980 }, { "epoch": 0.16, "learning_rate": 4.840180823982009e-05, "loss": 0.199, "step": 14000 }, { "epoch": 0.16, "learning_rate": 4.8399525108734126e-05, "loss": 0.1124, "step": 14020 }, { "epoch": 0.16, "learning_rate": 4.839724197764815e-05, "loss": 0.1329, "step": 14040 }, { "epoch": 0.16, "learning_rate": 4.839495884656218e-05, "loss": 0.2992, "step": 14060 }, { "epoch": 0.16, "learning_rate": 4.83926757154762e-05, "loss": 0.091, "step": 14080 }, { "epoch": 0.16, "learning_rate": 4.839039258439024e-05, "loss": 0.1595, "step": 14100 }, { "epoch": 0.16, "learning_rate": 4.838810945330427e-05, "loss": 0.1703, "step": 14120 }, { "epoch": 0.16, "learning_rate": 4.838582632221829e-05, "loss": 0.1201, "step": 14140 }, { "epoch": 0.16, "learning_rate": 4.838354319113232e-05, "loss": 0.3395, "step": 14160 }, { "epoch": 0.16, "learning_rate": 4.838126006004635e-05, "loss": 0.2097, "step": 14180 }, { "epoch": 0.16, "learning_rate": 4.837897692896038e-05, "loss": 0.1463, "step": 14200 }, { "epoch": 0.16, "learning_rate": 4.837669379787441e-05, "loss": 0.3051, "step": 14220 }, { "epoch": 0.16, "learning_rate": 4.837441066678843e-05, "loss": 0.159, "step": 14240 }, { "epoch": 0.16, "learning_rate": 4.837212753570247e-05, "loss": 0.2217, "step": 14260 }, { "epoch": 0.16, "learning_rate": 4.836984440461649e-05, "loss": 0.3707, "step": 14280 }, { "epoch": 0.16, "learning_rate": 4.836756127353052e-05, "loss": 0.1621, "step": 14300 }, { "epoch": 0.16, "learning_rate": 4.836527814244455e-05, "loss": 0.3691, "step": 14320 }, { "epoch": 0.16, "learning_rate": 4.836299501135858e-05, "loss": 0.1145, "step": 14340 }, { "epoch": 0.16, "learning_rate": 4.836071188027261e-05, "loss": 0.3012, "step": 14360 }, { "epoch": 0.16, "learning_rate": 4.835842874918663e-05, "loss": 0.2286, "step": 14380 }, { "epoch": 0.16, "learning_rate": 4.835614561810067e-05, "loss": 0.1506, "step": 14400 }, { "epoch": 0.16, "learning_rate": 4.835386248701469e-05, "loss": 0.1939, "step": 14420 }, { "epoch": 0.16, "learning_rate": 4.835157935592872e-05, "loss": 0.2034, "step": 14440 }, { "epoch": 0.17, "learning_rate": 4.834929622484275e-05, "loss": 0.4145, "step": 14460 }, { "epoch": 0.17, "learning_rate": 4.834701309375678e-05, "loss": 0.1724, "step": 14480 }, { "epoch": 0.17, "learning_rate": 4.834472996267081e-05, "loss": 0.163, "step": 14500 }, { "epoch": 0.17, "learning_rate": 4.8342446831584834e-05, "loss": 0.0384, "step": 14520 }, { "epoch": 0.17, "learning_rate": 4.8340163700498864e-05, "loss": 0.1564, "step": 14540 }, { "epoch": 0.17, "learning_rate": 4.83378805694129e-05, "loss": 0.1974, "step": 14560 }, { "epoch": 0.17, "learning_rate": 4.833559743832692e-05, "loss": 0.3863, "step": 14580 }, { "epoch": 0.17, "learning_rate": 4.833331430724095e-05, "loss": 0.3867, "step": 14600 }, { "epoch": 0.17, "learning_rate": 4.833103117615498e-05, "loss": 0.101, "step": 14620 }, { "epoch": 0.17, "learning_rate": 4.832874804506901e-05, "loss": 0.1712, "step": 14640 }, { "epoch": 0.17, "learning_rate": 4.832646491398304e-05, "loss": 0.2116, "step": 14660 }, { "epoch": 0.17, "learning_rate": 4.8324181782897065e-05, "loss": 0.1428, "step": 14680 }, { "epoch": 0.17, "learning_rate": 4.83218986518111e-05, "loss": 0.2676, "step": 14700 }, { "epoch": 0.17, "learning_rate": 4.8319615520725124e-05, "loss": 0.1821, "step": 14720 }, { "epoch": 0.17, "learning_rate": 4.8317332389639154e-05, "loss": 0.2218, "step": 14740 }, { "epoch": 0.17, "learning_rate": 4.831504925855318e-05, "loss": 0.2123, "step": 14760 }, { "epoch": 0.17, "learning_rate": 4.831276612746721e-05, "loss": 0.0525, "step": 14780 }, { "epoch": 0.17, "learning_rate": 4.831048299638124e-05, "loss": 0.2242, "step": 14800 }, { "epoch": 0.17, "learning_rate": 4.8308199865295266e-05, "loss": 0.3232, "step": 14820 }, { "epoch": 0.17, "learning_rate": 4.8305916734209295e-05, "loss": 0.5396, "step": 14840 }, { "epoch": 0.17, "learning_rate": 4.8303633603123325e-05, "loss": 0.1251, "step": 14860 }, { "epoch": 0.17, "learning_rate": 4.8301350472037355e-05, "loss": 0.3565, "step": 14880 }, { "epoch": 0.17, "learning_rate": 4.8299067340951384e-05, "loss": 0.222, "step": 14900 }, { "epoch": 0.17, "learning_rate": 4.8296784209865414e-05, "loss": 0.1024, "step": 14920 }, { "epoch": 0.17, "learning_rate": 4.8294501078779444e-05, "loss": 0.5784, "step": 14940 }, { "epoch": 0.17, "learning_rate": 4.8292217947693467e-05, "loss": 0.3689, "step": 14960 }, { "epoch": 0.17, "learning_rate": 4.8289934816607496e-05, "loss": 0.2942, "step": 14980 }, { "epoch": 0.17, "learning_rate": 4.8287651685521526e-05, "loss": 0.15, "step": 15000 }, { "epoch": 0.17, "learning_rate": 4.8285368554435556e-05, "loss": 0.245, "step": 15020 }, { "epoch": 0.17, "learning_rate": 4.8283085423349585e-05, "loss": 0.3346, "step": 15040 }, { "epoch": 0.17, "learning_rate": 4.828080229226361e-05, "loss": 0.1453, "step": 15060 }, { "epoch": 0.17, "learning_rate": 4.8278519161177645e-05, "loss": 0.2363, "step": 15080 }, { "epoch": 0.17, "learning_rate": 4.827623603009167e-05, "loss": 0.0949, "step": 15100 }, { "epoch": 0.17, "learning_rate": 4.82739528990057e-05, "loss": 0.2316, "step": 15120 }, { "epoch": 0.17, "learning_rate": 4.827166976791973e-05, "loss": 0.1553, "step": 15140 }, { "epoch": 0.17, "learning_rate": 4.826938663683376e-05, "loss": 0.2684, "step": 15160 }, { "epoch": 0.17, "learning_rate": 4.8267103505747786e-05, "loss": 0.2217, "step": 15180 }, { "epoch": 0.17, "learning_rate": 4.826482037466181e-05, "loss": 0.0578, "step": 15200 }, { "epoch": 0.17, "learning_rate": 4.8262537243575846e-05, "loss": 0.1853, "step": 15220 }, { "epoch": 0.17, "learning_rate": 4.8260254112489875e-05, "loss": 0.3784, "step": 15240 }, { "epoch": 0.17, "learning_rate": 4.82579709814039e-05, "loss": 0.6189, "step": 15260 }, { "epoch": 0.17, "learning_rate": 4.825568785031793e-05, "loss": 0.2267, "step": 15280 }, { "epoch": 0.17, "learning_rate": 4.825340471923196e-05, "loss": 0.3898, "step": 15300 }, { "epoch": 0.17, "learning_rate": 4.825112158814599e-05, "loss": 0.7594, "step": 15320 }, { "epoch": 0.18, "learning_rate": 4.824883845706002e-05, "loss": 0.2766, "step": 15340 }, { "epoch": 0.18, "learning_rate": 4.824655532597404e-05, "loss": 0.265, "step": 15360 }, { "epoch": 0.18, "learning_rate": 4.8244272194888076e-05, "loss": 0.3349, "step": 15380 }, { "epoch": 0.18, "learning_rate": 4.82419890638021e-05, "loss": 0.2217, "step": 15400 }, { "epoch": 0.18, "learning_rate": 4.823970593271613e-05, "loss": 0.2775, "step": 15420 }, { "epoch": 0.18, "learning_rate": 4.823742280163016e-05, "loss": 0.1765, "step": 15440 }, { "epoch": 0.18, "learning_rate": 4.823513967054419e-05, "loss": 0.5794, "step": 15460 }, { "epoch": 0.18, "learning_rate": 4.823285653945822e-05, "loss": 0.2915, "step": 15480 }, { "epoch": 0.18, "learning_rate": 4.823057340837224e-05, "loss": 0.1939, "step": 15500 }, { "epoch": 0.18, "learning_rate": 4.822829027728628e-05, "loss": 0.1685, "step": 15520 }, { "epoch": 0.18, "learning_rate": 4.82260071462003e-05, "loss": 0.0381, "step": 15540 }, { "epoch": 0.18, "learning_rate": 4.822372401511433e-05, "loss": 0.2696, "step": 15560 }, { "epoch": 0.18, "learning_rate": 4.822144088402836e-05, "loss": 0.3588, "step": 15580 }, { "epoch": 0.18, "learning_rate": 4.821915775294239e-05, "loss": 0.3376, "step": 15600 }, { "epoch": 0.18, "learning_rate": 4.821687462185642e-05, "loss": 0.1283, "step": 15620 }, { "epoch": 0.18, "learning_rate": 4.821459149077044e-05, "loss": 0.3893, "step": 15640 }, { "epoch": 0.18, "learning_rate": 4.821230835968447e-05, "loss": 0.18, "step": 15660 }, { "epoch": 0.18, "learning_rate": 4.82100252285985e-05, "loss": 0.166, "step": 15680 }, { "epoch": 0.18, "learning_rate": 4.820774209751253e-05, "loss": 0.2923, "step": 15700 }, { "epoch": 0.18, "learning_rate": 4.820545896642656e-05, "loss": 0.2481, "step": 15720 }, { "epoch": 0.18, "learning_rate": 4.8203175835340584e-05, "loss": 0.6648, "step": 15740 }, { "epoch": 0.18, "learning_rate": 4.820089270425462e-05, "loss": 0.2373, "step": 15760 }, { "epoch": 0.18, "learning_rate": 4.819860957316864e-05, "loss": 0.4881, "step": 15780 }, { "epoch": 0.18, "learning_rate": 4.819632644208267e-05, "loss": 0.2074, "step": 15800 }, { "epoch": 0.18, "learning_rate": 4.81940433109967e-05, "loss": 0.1324, "step": 15820 }, { "epoch": 0.18, "learning_rate": 4.819176017991073e-05, "loss": 0.2818, "step": 15840 }, { "epoch": 0.18, "learning_rate": 4.818947704882476e-05, "loss": 0.3484, "step": 15860 }, { "epoch": 0.18, "learning_rate": 4.8187193917738785e-05, "loss": 0.261, "step": 15880 }, { "epoch": 0.18, "learning_rate": 4.818491078665282e-05, "loss": 0.2386, "step": 15900 }, { "epoch": 0.18, "learning_rate": 4.818262765556685e-05, "loss": 0.1494, "step": 15920 }, { "epoch": 0.18, "learning_rate": 4.8180344524480874e-05, "loss": 0.1614, "step": 15940 }, { "epoch": 0.18, "learning_rate": 4.81780613933949e-05, "loss": 0.1387, "step": 15960 }, { "epoch": 0.18, "learning_rate": 4.817577826230893e-05, "loss": 0.101, "step": 15980 }, { "epoch": 0.18, "learning_rate": 4.817349513122296e-05, "loss": 0.1119, "step": 16000 }, { "epoch": 0.18, "learning_rate": 4.817121200013699e-05, "loss": 0.1848, "step": 16020 }, { "epoch": 0.18, "learning_rate": 4.8168928869051015e-05, "loss": 0.2079, "step": 16040 }, { "epoch": 0.18, "learning_rate": 4.816664573796505e-05, "loss": 0.1759, "step": 16060 }, { "epoch": 0.18, "learning_rate": 4.8164362606879075e-05, "loss": 0.1511, "step": 16080 }, { "epoch": 0.18, "learning_rate": 4.8162079475793104e-05, "loss": 0.0759, "step": 16100 }, { "epoch": 0.18, "learning_rate": 4.8159796344707134e-05, "loss": 0.1243, "step": 16120 }, { "epoch": 0.18, "learning_rate": 4.8157513213621164e-05, "loss": 0.1595, "step": 16140 }, { "epoch": 0.18, "learning_rate": 4.815523008253519e-05, "loss": 0.2829, "step": 16160 }, { "epoch": 0.18, "learning_rate": 4.8152946951449216e-05, "loss": 0.1362, "step": 16180 }, { "epoch": 0.18, "learning_rate": 4.815066382036325e-05, "loss": 0.255, "step": 16200 }, { "epoch": 0.19, "learning_rate": 4.8148380689277276e-05, "loss": 0.3321, "step": 16220 }, { "epoch": 0.19, "learning_rate": 4.8146097558191305e-05, "loss": 0.4858, "step": 16240 }, { "epoch": 0.19, "learning_rate": 4.8143814427105335e-05, "loss": 0.0896, "step": 16260 }, { "epoch": 0.19, "learning_rate": 4.8141531296019365e-05, "loss": 0.2825, "step": 16280 }, { "epoch": 0.19, "learning_rate": 4.8139248164933394e-05, "loss": 0.3625, "step": 16300 }, { "epoch": 0.19, "learning_rate": 4.813696503384742e-05, "loss": 0.1085, "step": 16320 }, { "epoch": 0.19, "learning_rate": 4.813468190276145e-05, "loss": 0.2976, "step": 16340 }, { "epoch": 0.19, "learning_rate": 4.813239877167548e-05, "loss": 0.2624, "step": 16360 }, { "epoch": 0.19, "learning_rate": 4.8130115640589506e-05, "loss": 0.1309, "step": 16380 }, { "epoch": 0.19, "learning_rate": 4.8127832509503536e-05, "loss": 0.3113, "step": 16400 }, { "epoch": 0.19, "learning_rate": 4.812554937841756e-05, "loss": 0.3127, "step": 16420 }, { "epoch": 0.19, "learning_rate": 4.8123266247331595e-05, "loss": 0.3685, "step": 16440 }, { "epoch": 0.19, "learning_rate": 4.812098311624562e-05, "loss": 0.3844, "step": 16460 }, { "epoch": 0.19, "learning_rate": 4.811869998515965e-05, "loss": 0.3027, "step": 16480 }, { "epoch": 0.19, "learning_rate": 4.8116416854073684e-05, "loss": 0.1713, "step": 16500 }, { "epoch": 0.19, "learning_rate": 4.811413372298771e-05, "loss": 0.1902, "step": 16520 }, { "epoch": 0.19, "learning_rate": 4.811185059190174e-05, "loss": 0.2295, "step": 16540 }, { "epoch": 0.19, "learning_rate": 4.810956746081576e-05, "loss": 0.5175, "step": 16560 }, { "epoch": 0.19, "learning_rate": 4.8107284329729796e-05, "loss": 0.4497, "step": 16580 }, { "epoch": 0.19, "learning_rate": 4.8105001198643826e-05, "loss": 0.1413, "step": 16600 }, { "epoch": 0.19, "learning_rate": 4.810271806755785e-05, "loss": 0.2424, "step": 16620 }, { "epoch": 0.19, "learning_rate": 4.810043493647188e-05, "loss": 0.2221, "step": 16640 }, { "epoch": 0.19, "learning_rate": 4.809815180538591e-05, "loss": 0.2287, "step": 16660 }, { "epoch": 0.19, "learning_rate": 4.809586867429994e-05, "loss": 0.2451, "step": 16680 }, { "epoch": 0.19, "learning_rate": 4.809358554321397e-05, "loss": 0.0715, "step": 16700 }, { "epoch": 0.19, "learning_rate": 4.809130241212799e-05, "loss": 0.1564, "step": 16720 }, { "epoch": 0.19, "learning_rate": 4.808901928104203e-05, "loss": 0.4255, "step": 16740 }, { "epoch": 0.19, "learning_rate": 4.808673614995605e-05, "loss": 0.3267, "step": 16760 }, { "epoch": 0.19, "learning_rate": 4.808445301887008e-05, "loss": 0.2989, "step": 16780 }, { "epoch": 0.19, "learning_rate": 4.808216988778411e-05, "loss": 0.1612, "step": 16800 }, { "epoch": 0.19, "learning_rate": 4.807988675669814e-05, "loss": 0.2678, "step": 16820 }, { "epoch": 0.19, "learning_rate": 4.807760362561217e-05, "loss": 0.1379, "step": 16840 }, { "epoch": 0.19, "learning_rate": 4.807532049452619e-05, "loss": 0.302, "step": 16860 }, { "epoch": 0.19, "learning_rate": 4.807303736344023e-05, "loss": 0.435, "step": 16880 }, { "epoch": 0.19, "learning_rate": 4.807075423235425e-05, "loss": 0.3834, "step": 16900 }, { "epoch": 0.19, "learning_rate": 4.806847110126828e-05, "loss": 0.476, "step": 16920 }, { "epoch": 0.19, "learning_rate": 4.806618797018231e-05, "loss": 0.3449, "step": 16940 }, { "epoch": 0.19, "learning_rate": 4.806390483909634e-05, "loss": 0.4167, "step": 16960 }, { "epoch": 0.19, "learning_rate": 4.806162170801037e-05, "loss": 0.3769, "step": 16980 }, { "epoch": 0.19, "learning_rate": 4.805933857692439e-05, "loss": 0.1708, "step": 17000 }, { "epoch": 0.19, "learning_rate": 4.805705544583842e-05, "loss": 0.1346, "step": 17020 }, { "epoch": 0.19, "learning_rate": 4.805477231475245e-05, "loss": 0.295, "step": 17040 }, { "epoch": 0.19, "learning_rate": 4.805248918366648e-05, "loss": 0.2713, "step": 17060 }, { "epoch": 0.19, "learning_rate": 4.805020605258051e-05, "loss": 0.1755, "step": 17080 }, { "epoch": 0.2, "learning_rate": 4.804792292149454e-05, "loss": 0.3529, "step": 17100 }, { "epoch": 0.2, "learning_rate": 4.804563979040857e-05, "loss": 0.0556, "step": 17120 }, { "epoch": 0.2, "learning_rate": 4.80433566593226e-05, "loss": 0.2184, "step": 17140 }, { "epoch": 0.2, "learning_rate": 4.804107352823662e-05, "loss": 0.1073, "step": 17160 }, { "epoch": 0.2, "learning_rate": 4.803879039715066e-05, "loss": 0.2377, "step": 17180 }, { "epoch": 0.2, "learning_rate": 4.803650726606468e-05, "loss": 0.1914, "step": 17200 }, { "epoch": 0.2, "learning_rate": 4.803422413497871e-05, "loss": 0.1988, "step": 17220 }, { "epoch": 0.2, "learning_rate": 4.803194100389274e-05, "loss": 0.0975, "step": 17240 }, { "epoch": 0.2, "learning_rate": 4.802965787280677e-05, "loss": 0.1857, "step": 17260 }, { "epoch": 0.2, "learning_rate": 4.80273747417208e-05, "loss": 0.162, "step": 17280 }, { "epoch": 0.2, "learning_rate": 4.8025091610634824e-05, "loss": 0.4619, "step": 17300 }, { "epoch": 0.2, "learning_rate": 4.8022808479548854e-05, "loss": 0.286, "step": 17320 }, { "epoch": 0.2, "learning_rate": 4.8020525348462884e-05, "loss": 0.0321, "step": 17340 }, { "epoch": 0.2, "learning_rate": 4.801824221737691e-05, "loss": 0.2135, "step": 17360 }, { "epoch": 0.2, "learning_rate": 4.801595908629094e-05, "loss": 0.1168, "step": 17380 }, { "epoch": 0.2, "learning_rate": 4.801367595520497e-05, "loss": 0.0945, "step": 17400 }, { "epoch": 0.2, "learning_rate": 4.8011392824119e-05, "loss": 0.1052, "step": 17420 }, { "epoch": 0.2, "learning_rate": 4.8009109693033025e-05, "loss": 0.2523, "step": 17440 }, { "epoch": 0.2, "learning_rate": 4.8006826561947055e-05, "loss": 0.4226, "step": 17460 }, { "epoch": 0.2, "learning_rate": 4.8004543430861085e-05, "loss": 0.3033, "step": 17480 }, { "epoch": 0.2, "learning_rate": 4.8002260299775114e-05, "loss": 0.1958, "step": 17500 }, { "epoch": 0.2, "learning_rate": 4.7999977168689144e-05, "loss": 0.4425, "step": 17520 }, { "epoch": 0.2, "learning_rate": 4.799769403760317e-05, "loss": 0.3444, "step": 17540 }, { "epoch": 0.2, "learning_rate": 4.79954109065172e-05, "loss": 0.3855, "step": 17560 }, { "epoch": 0.2, "learning_rate": 4.7993127775431226e-05, "loss": 0.6514, "step": 17580 }, { "epoch": 0.2, "learning_rate": 4.7990844644345256e-05, "loss": 0.393, "step": 17600 }, { "epoch": 0.2, "learning_rate": 4.7988561513259286e-05, "loss": 0.1536, "step": 17620 }, { "epoch": 0.2, "learning_rate": 4.7986278382173315e-05, "loss": 0.3235, "step": 17640 }, { "epoch": 0.2, "learning_rate": 4.7983995251087345e-05, "loss": 0.1845, "step": 17660 }, { "epoch": 0.2, "learning_rate": 4.798171212000137e-05, "loss": 0.2428, "step": 17680 }, { "epoch": 0.2, "learning_rate": 4.7979428988915404e-05, "loss": 0.2164, "step": 17700 }, { "epoch": 0.2, "learning_rate": 4.7977145857829434e-05, "loss": 0.1194, "step": 17720 }, { "epoch": 0.2, "learning_rate": 4.797486272674346e-05, "loss": 0.1969, "step": 17740 }, { "epoch": 0.2, "learning_rate": 4.797257959565749e-05, "loss": 0.1336, "step": 17760 }, { "epoch": 0.2, "learning_rate": 4.7970296464571516e-05, "loss": 0.4071, "step": 17780 }, { "epoch": 0.2, "learning_rate": 4.7968013333485546e-05, "loss": 0.2076, "step": 17800 }, { "epoch": 0.2, "learning_rate": 4.7965730202399576e-05, "loss": 0.2542, "step": 17820 }, { "epoch": 0.2, "learning_rate": 4.79634470713136e-05, "loss": 0.2379, "step": 17840 }, { "epoch": 0.2, "learning_rate": 4.7961163940227635e-05, "loss": 0.7328, "step": 17860 }, { "epoch": 0.2, "learning_rate": 4.795888080914166e-05, "loss": 0.2378, "step": 17880 }, { "epoch": 0.2, "learning_rate": 4.795659767805569e-05, "loss": 0.3738, "step": 17900 }, { "epoch": 0.2, "learning_rate": 4.795431454696972e-05, "loss": 0.2118, "step": 17920 }, { "epoch": 0.2, "learning_rate": 4.795203141588375e-05, "loss": 0.3868, "step": 17940 }, { "epoch": 0.21, "learning_rate": 4.794974828479778e-05, "loss": 0.1895, "step": 17960 }, { "epoch": 0.21, "learning_rate": 4.79474651537118e-05, "loss": 0.222, "step": 17980 }, { "epoch": 0.21, "learning_rate": 4.794518202262583e-05, "loss": 0.1479, "step": 18000 }, { "epoch": 0.21, "learning_rate": 4.794289889153986e-05, "loss": 0.1662, "step": 18020 }, { "epoch": 0.21, "learning_rate": 4.794061576045389e-05, "loss": 0.3504, "step": 18040 }, { "epoch": 0.21, "learning_rate": 4.793833262936792e-05, "loss": 0.2542, "step": 18060 }, { "epoch": 0.21, "learning_rate": 4.793604949828195e-05, "loss": 0.2173, "step": 18080 }, { "epoch": 0.21, "learning_rate": 4.793376636719598e-05, "loss": 0.5445, "step": 18100 }, { "epoch": 0.21, "learning_rate": 4.793148323611e-05, "loss": 0.1754, "step": 18120 }, { "epoch": 0.21, "learning_rate": 4.792920010502403e-05, "loss": 0.2068, "step": 18140 }, { "epoch": 0.21, "learning_rate": 4.792691697393806e-05, "loss": 0.1525, "step": 18160 }, { "epoch": 0.21, "learning_rate": 4.792463384285209e-05, "loss": 0.2542, "step": 18180 }, { "epoch": 0.21, "learning_rate": 4.792235071176612e-05, "loss": 0.1667, "step": 18200 }, { "epoch": 0.21, "learning_rate": 4.792006758068014e-05, "loss": 0.315, "step": 18220 }, { "epoch": 0.21, "learning_rate": 4.791778444959418e-05, "loss": 0.2067, "step": 18240 }, { "epoch": 0.21, "learning_rate": 4.79155013185082e-05, "loss": 0.4292, "step": 18260 }, { "epoch": 0.21, "learning_rate": 4.791321818742223e-05, "loss": 0.2158, "step": 18280 }, { "epoch": 0.21, "learning_rate": 4.791093505633626e-05, "loss": 0.2389, "step": 18300 }, { "epoch": 0.21, "learning_rate": 4.790865192525029e-05, "loss": 0.2242, "step": 18320 }, { "epoch": 0.21, "learning_rate": 4.790636879416432e-05, "loss": 0.2678, "step": 18340 }, { "epoch": 0.21, "learning_rate": 4.790408566307834e-05, "loss": 0.1459, "step": 18360 }, { "epoch": 0.21, "learning_rate": 4.790180253199238e-05, "loss": 0.0723, "step": 18380 }, { "epoch": 0.21, "learning_rate": 4.789951940090641e-05, "loss": 0.2542, "step": 18400 }, { "epoch": 0.21, "learning_rate": 4.789723626982043e-05, "loss": 0.4325, "step": 18420 }, { "epoch": 0.21, "learning_rate": 4.789495313873446e-05, "loss": 0.5287, "step": 18440 }, { "epoch": 0.21, "learning_rate": 4.789267000764849e-05, "loss": 0.1589, "step": 18460 }, { "epoch": 0.21, "learning_rate": 4.789038687656252e-05, "loss": 0.5329, "step": 18480 }, { "epoch": 0.21, "learning_rate": 4.788810374547655e-05, "loss": 0.1792, "step": 18500 }, { "epoch": 0.21, "learning_rate": 4.7885820614390574e-05, "loss": 0.2759, "step": 18520 }, { "epoch": 0.21, "learning_rate": 4.788353748330461e-05, "loss": 0.3941, "step": 18540 }, { "epoch": 0.21, "learning_rate": 4.788125435221863e-05, "loss": 0.1729, "step": 18560 }, { "epoch": 0.21, "learning_rate": 4.787897122113266e-05, "loss": 0.2707, "step": 18580 }, { "epoch": 0.21, "learning_rate": 4.787668809004669e-05, "loss": 0.1966, "step": 18600 }, { "epoch": 0.21, "learning_rate": 4.787440495896072e-05, "loss": 0.1781, "step": 18620 }, { "epoch": 0.21, "learning_rate": 4.787212182787475e-05, "loss": 0.2353, "step": 18640 }, { "epoch": 0.21, "learning_rate": 4.7869838696788775e-05, "loss": 0.2168, "step": 18660 }, { "epoch": 0.21, "learning_rate": 4.786755556570281e-05, "loss": 0.1101, "step": 18680 }, { "epoch": 0.21, "learning_rate": 4.7865272434616834e-05, "loss": 0.4139, "step": 18700 }, { "epoch": 0.21, "learning_rate": 4.7862989303530864e-05, "loss": 0.3088, "step": 18720 }, { "epoch": 0.21, "learning_rate": 4.7860706172444894e-05, "loss": 0.2684, "step": 18740 }, { "epoch": 0.21, "learning_rate": 4.785842304135892e-05, "loss": 0.0917, "step": 18760 }, { "epoch": 0.21, "learning_rate": 4.785613991027295e-05, "loss": 0.198, "step": 18780 }, { "epoch": 0.21, "learning_rate": 4.7853856779186976e-05, "loss": 0.1308, "step": 18800 }, { "epoch": 0.21, "learning_rate": 4.7851573648101006e-05, "loss": 0.1231, "step": 18820 }, { "epoch": 0.22, "learning_rate": 4.7849290517015035e-05, "loss": 0.1714, "step": 18840 }, { "epoch": 0.22, "learning_rate": 4.7847007385929065e-05, "loss": 0.1235, "step": 18860 }, { "epoch": 0.22, "learning_rate": 4.7844724254843095e-05, "loss": 0.176, "step": 18880 }, { "epoch": 0.22, "learning_rate": 4.784244112375712e-05, "loss": 0.1023, "step": 18900 }, { "epoch": 0.22, "learning_rate": 4.7840157992671154e-05, "loss": 0.4199, "step": 18920 }, { "epoch": 0.22, "learning_rate": 4.783787486158518e-05, "loss": 0.5122, "step": 18940 }, { "epoch": 0.22, "learning_rate": 4.783559173049921e-05, "loss": 0.2963, "step": 18960 }, { "epoch": 0.22, "learning_rate": 4.783330859941324e-05, "loss": 0.1061, "step": 18980 }, { "epoch": 0.22, "learning_rate": 4.7831025468327266e-05, "loss": 0.1077, "step": 19000 }, { "epoch": 0.22, "learning_rate": 4.7828742337241296e-05, "loss": 0.1909, "step": 19020 }, { "epoch": 0.22, "learning_rate": 4.782645920615532e-05, "loss": 0.516, "step": 19040 }, { "epoch": 0.22, "learning_rate": 4.7824176075069355e-05, "loss": 0.2776, "step": 19060 }, { "epoch": 0.22, "learning_rate": 4.7821892943983385e-05, "loss": 0.3447, "step": 19080 }, { "epoch": 0.22, "learning_rate": 4.781960981289741e-05, "loss": 0.2549, "step": 19100 }, { "epoch": 0.22, "learning_rate": 4.781732668181144e-05, "loss": 0.1847, "step": 19120 }, { "epoch": 0.22, "learning_rate": 4.781504355072547e-05, "loss": 0.3502, "step": 19140 }, { "epoch": 0.22, "learning_rate": 4.78127604196395e-05, "loss": 0.3344, "step": 19160 }, { "epoch": 0.22, "learning_rate": 4.7810477288553526e-05, "loss": 0.291, "step": 19180 }, { "epoch": 0.22, "learning_rate": 4.780819415746755e-05, "loss": 0.6044, "step": 19200 }, { "epoch": 0.22, "learning_rate": 4.7805911026381586e-05, "loss": 0.2623, "step": 19220 }, { "epoch": 0.22, "learning_rate": 4.780362789529561e-05, "loss": 0.1301, "step": 19240 }, { "epoch": 0.22, "learning_rate": 4.780134476420964e-05, "loss": 0.155, "step": 19260 }, { "epoch": 0.22, "learning_rate": 4.779906163312367e-05, "loss": 0.4167, "step": 19280 }, { "epoch": 0.22, "learning_rate": 4.77967785020377e-05, "loss": 0.3578, "step": 19300 }, { "epoch": 0.22, "learning_rate": 4.779449537095173e-05, "loss": 0.1291, "step": 19320 }, { "epoch": 0.22, "learning_rate": 4.779221223986575e-05, "loss": 0.1692, "step": 19340 }, { "epoch": 0.22, "learning_rate": 4.778992910877979e-05, "loss": 0.1162, "step": 19360 }, { "epoch": 0.22, "learning_rate": 4.778764597769381e-05, "loss": 0.4298, "step": 19380 }, { "epoch": 0.22, "learning_rate": 4.778536284660784e-05, "loss": 0.4598, "step": 19400 }, { "epoch": 0.22, "learning_rate": 4.778307971552187e-05, "loss": 0.2211, "step": 19420 }, { "epoch": 0.22, "learning_rate": 4.77807965844359e-05, "loss": 0.1969, "step": 19440 }, { "epoch": 0.22, "learning_rate": 4.777851345334993e-05, "loss": 0.2411, "step": 19460 }, { "epoch": 0.22, "learning_rate": 4.777623032226395e-05, "loss": 0.0991, "step": 19480 }, { "epoch": 0.22, "learning_rate": 4.777394719117798e-05, "loss": 0.2948, "step": 19500 }, { "epoch": 0.22, "learning_rate": 4.777166406009201e-05, "loss": 0.3403, "step": 19520 }, { "epoch": 0.22, "learning_rate": 4.776938092900604e-05, "loss": 0.1225, "step": 19540 }, { "epoch": 0.22, "learning_rate": 4.776709779792007e-05, "loss": 0.2427, "step": 19560 }, { "epoch": 0.22, "learning_rate": 4.77648146668341e-05, "loss": 0.4756, "step": 19580 }, { "epoch": 0.22, "learning_rate": 4.776253153574813e-05, "loss": 0.3001, "step": 19600 }, { "epoch": 0.22, "learning_rate": 4.776024840466215e-05, "loss": 0.1179, "step": 19620 }, { "epoch": 0.22, "learning_rate": 4.775796527357618e-05, "loss": 0.4539, "step": 19640 }, { "epoch": 0.22, "learning_rate": 4.775568214249022e-05, "loss": 0.1902, "step": 19660 }, { "epoch": 0.22, "learning_rate": 4.775339901140424e-05, "loss": 0.1575, "step": 19680 }, { "epoch": 0.22, "learning_rate": 4.775111588031827e-05, "loss": 0.359, "step": 19700 }, { "epoch": 0.23, "learning_rate": 4.7748832749232294e-05, "loss": 0.1363, "step": 19720 }, { "epoch": 0.23, "learning_rate": 4.774654961814633e-05, "loss": 0.1812, "step": 19740 }, { "epoch": 0.23, "learning_rate": 4.774426648706036e-05, "loss": 0.1526, "step": 19760 }, { "epoch": 0.23, "learning_rate": 4.774198335597438e-05, "loss": 0.3408, "step": 19780 }, { "epoch": 0.23, "learning_rate": 4.773970022488841e-05, "loss": 0.2006, "step": 19800 }, { "epoch": 0.23, "learning_rate": 4.773741709380244e-05, "loss": 0.1459, "step": 19820 }, { "epoch": 0.23, "learning_rate": 4.773513396271647e-05, "loss": 0.2041, "step": 19840 }, { "epoch": 0.23, "learning_rate": 4.77328508316305e-05, "loss": 0.0896, "step": 19860 }, { "epoch": 0.23, "learning_rate": 4.773056770054453e-05, "loss": 0.1331, "step": 19880 }, { "epoch": 0.23, "learning_rate": 4.772828456945856e-05, "loss": 0.2913, "step": 19900 }, { "epoch": 0.23, "learning_rate": 4.7726001438372584e-05, "loss": 0.3025, "step": 19920 }, { "epoch": 0.23, "learning_rate": 4.7723718307286614e-05, "loss": 0.2577, "step": 19940 }, { "epoch": 0.23, "learning_rate": 4.772143517620064e-05, "loss": 0.0822, "step": 19960 }, { "epoch": 0.23, "learning_rate": 4.771915204511467e-05, "loss": 0.1794, "step": 19980 }, { "epoch": 0.23, "learning_rate": 4.77168689140287e-05, "loss": 0.28, "step": 20000 }, { "epoch": 0.23, "learning_rate": 4.7714585782942726e-05, "loss": 0.4442, "step": 20020 }, { "epoch": 0.23, "learning_rate": 4.771230265185676e-05, "loss": 0.3709, "step": 20040 }, { "epoch": 0.23, "learning_rate": 4.7710019520770785e-05, "loss": 0.3423, "step": 20060 }, { "epoch": 0.23, "learning_rate": 4.7707736389684815e-05, "loss": 0.158, "step": 20080 }, { "epoch": 0.23, "learning_rate": 4.7705453258598844e-05, "loss": 0.1422, "step": 20100 }, { "epoch": 0.23, "learning_rate": 4.7703170127512874e-05, "loss": 0.1912, "step": 20120 }, { "epoch": 0.23, "learning_rate": 4.7700886996426904e-05, "loss": 0.2554, "step": 20140 }, { "epoch": 0.23, "learning_rate": 4.769860386534093e-05, "loss": 0.3017, "step": 20160 }, { "epoch": 0.23, "learning_rate": 4.769632073425496e-05, "loss": 0.1041, "step": 20180 }, { "epoch": 0.23, "learning_rate": 4.769403760316899e-05, "loss": 0.0784, "step": 20200 }, { "epoch": 0.23, "learning_rate": 4.7691754472083016e-05, "loss": 0.3911, "step": 20220 }, { "epoch": 0.23, "learning_rate": 4.7689471340997045e-05, "loss": 0.2149, "step": 20240 }, { "epoch": 0.23, "learning_rate": 4.7687188209911075e-05, "loss": 0.3544, "step": 20260 }, { "epoch": 0.23, "learning_rate": 4.7684905078825105e-05, "loss": 0.1868, "step": 20280 }, { "epoch": 0.23, "learning_rate": 4.7682621947739134e-05, "loss": 0.4206, "step": 20300 }, { "epoch": 0.23, "learning_rate": 4.768033881665316e-05, "loss": 0.2351, "step": 20320 }, { "epoch": 0.23, "learning_rate": 4.7678055685567194e-05, "loss": 0.1607, "step": 20340 }, { "epoch": 0.23, "learning_rate": 4.767577255448122e-05, "loss": 0.2468, "step": 20360 }, { "epoch": 0.23, "learning_rate": 4.7673489423395246e-05, "loss": 0.2738, "step": 20380 }, { "epoch": 0.23, "learning_rate": 4.7671206292309276e-05, "loss": 0.268, "step": 20400 }, { "epoch": 0.23, "learning_rate": 4.7668923161223306e-05, "loss": 0.3065, "step": 20420 }, { "epoch": 0.23, "learning_rate": 4.7666640030137335e-05, "loss": 0.2607, "step": 20440 }, { "epoch": 0.23, "learning_rate": 4.766435689905136e-05, "loss": 0.2545, "step": 20460 }, { "epoch": 0.23, "learning_rate": 4.766207376796539e-05, "loss": 0.2217, "step": 20480 }, { "epoch": 0.23, "learning_rate": 4.765979063687942e-05, "loss": 0.4399, "step": 20500 }, { "epoch": 0.23, "learning_rate": 4.765750750579345e-05, "loss": 0.2811, "step": 20520 }, { "epoch": 0.23, "learning_rate": 4.765522437470748e-05, "loss": 0.2794, "step": 20540 }, { "epoch": 0.23, "learning_rate": 4.765294124362151e-05, "loss": 0.2171, "step": 20560 }, { "epoch": 0.23, "learning_rate": 4.7650658112535536e-05, "loss": 0.1364, "step": 20580 }, { "epoch": 0.24, "learning_rate": 4.764837498144956e-05, "loss": 0.2387, "step": 20600 }, { "epoch": 0.24, "learning_rate": 4.764609185036359e-05, "loss": 0.2295, "step": 20620 }, { "epoch": 0.24, "learning_rate": 4.764380871927762e-05, "loss": 0.3465, "step": 20640 }, { "epoch": 0.24, "learning_rate": 4.764152558819165e-05, "loss": 0.3706, "step": 20660 }, { "epoch": 0.24, "learning_rate": 4.763924245710568e-05, "loss": 0.4643, "step": 20680 }, { "epoch": 0.24, "learning_rate": 4.76369593260197e-05, "loss": 0.1933, "step": 20700 }, { "epoch": 0.24, "learning_rate": 4.763467619493374e-05, "loss": 0.2899, "step": 20720 }, { "epoch": 0.24, "learning_rate": 4.763239306384776e-05, "loss": 0.1757, "step": 20740 }, { "epoch": 0.24, "learning_rate": 4.763010993276179e-05, "loss": 0.2229, "step": 20760 }, { "epoch": 0.24, "learning_rate": 4.762782680167582e-05, "loss": 0.2413, "step": 20780 }, { "epoch": 0.24, "learning_rate": 4.762554367058985e-05, "loss": 0.1433, "step": 20800 }, { "epoch": 0.24, "learning_rate": 4.762326053950388e-05, "loss": 0.0693, "step": 20820 }, { "epoch": 0.24, "learning_rate": 4.76209774084179e-05, "loss": 0.2766, "step": 20840 }, { "epoch": 0.24, "learning_rate": 4.761869427733194e-05, "loss": 0.2302, "step": 20860 }, { "epoch": 0.24, "learning_rate": 4.761641114624597e-05, "loss": 0.2706, "step": 20880 }, { "epoch": 0.24, "learning_rate": 4.761412801515999e-05, "loss": 0.0659, "step": 20900 }, { "epoch": 0.24, "learning_rate": 4.761184488407402e-05, "loss": 0.2233, "step": 20920 }, { "epoch": 0.24, "learning_rate": 4.760956175298805e-05, "loss": 0.6441, "step": 20940 }, { "epoch": 0.24, "learning_rate": 4.760727862190208e-05, "loss": 0.2606, "step": 20960 }, { "epoch": 0.24, "learning_rate": 4.760499549081611e-05, "loss": 0.0911, "step": 20980 }, { "epoch": 0.24, "learning_rate": 4.760271235973013e-05, "loss": 0.1008, "step": 21000 }, { "epoch": 0.24, "learning_rate": 4.760042922864417e-05, "loss": 0.3406, "step": 21020 }, { "epoch": 0.24, "learning_rate": 4.759814609755819e-05, "loss": 0.0952, "step": 21040 }, { "epoch": 0.24, "learning_rate": 4.759586296647222e-05, "loss": 0.1543, "step": 21060 }, { "epoch": 0.24, "learning_rate": 4.759357983538625e-05, "loss": 0.336, "step": 21080 }, { "epoch": 0.24, "learning_rate": 4.759129670430028e-05, "loss": 0.0752, "step": 21100 }, { "epoch": 0.24, "learning_rate": 4.758901357321431e-05, "loss": 0.1929, "step": 21120 }, { "epoch": 0.24, "learning_rate": 4.7586730442128334e-05, "loss": 0.3049, "step": 21140 }, { "epoch": 0.24, "learning_rate": 4.758444731104237e-05, "loss": 0.1724, "step": 21160 }, { "epoch": 0.24, "learning_rate": 4.758216417995639e-05, "loss": 0.246, "step": 21180 }, { "epoch": 0.24, "learning_rate": 4.757988104887042e-05, "loss": 0.2048, "step": 21200 }, { "epoch": 0.24, "learning_rate": 4.757759791778445e-05, "loss": 0.2887, "step": 21220 }, { "epoch": 0.24, "learning_rate": 4.757531478669848e-05, "loss": 0.4643, "step": 21240 }, { "epoch": 0.24, "learning_rate": 4.757303165561251e-05, "loss": 0.1566, "step": 21260 }, { "epoch": 0.24, "learning_rate": 4.7570748524526535e-05, "loss": 0.1433, "step": 21280 }, { "epoch": 0.24, "learning_rate": 4.7568465393440564e-05, "loss": 0.2427, "step": 21300 }, { "epoch": 0.24, "learning_rate": 4.7566182262354594e-05, "loss": 0.1292, "step": 21320 }, { "epoch": 0.24, "learning_rate": 4.7563899131268624e-05, "loss": 0.4564, "step": 21340 }, { "epoch": 0.24, "learning_rate": 4.7561616000182653e-05, "loss": 0.4773, "step": 21360 }, { "epoch": 0.24, "learning_rate": 4.7559332869096676e-05, "loss": 0.1894, "step": 21380 }, { "epoch": 0.24, "learning_rate": 4.755704973801071e-05, "loss": 0.1295, "step": 21400 }, { "epoch": 0.24, "learning_rate": 4.7554766606924736e-05, "loss": 0.1551, "step": 21420 }, { "epoch": 0.24, "learning_rate": 4.7552483475838765e-05, "loss": 0.3004, "step": 21440 }, { "epoch": 0.24, "learning_rate": 4.75502003447528e-05, "loss": 0.2475, "step": 21460 }, { "epoch": 0.25, "learning_rate": 4.7547917213666825e-05, "loss": 0.1785, "step": 21480 }, { "epoch": 0.25, "learning_rate": 4.7545634082580854e-05, "loss": 0.0603, "step": 21500 }, { "epoch": 0.25, "learning_rate": 4.754335095149488e-05, "loss": 0.1819, "step": 21520 }, { "epoch": 0.25, "learning_rate": 4.7541067820408914e-05, "loss": 0.1347, "step": 21540 }, { "epoch": 0.25, "learning_rate": 4.7538784689322943e-05, "loss": 0.2694, "step": 21560 }, { "epoch": 0.25, "learning_rate": 4.7536501558236966e-05, "loss": 0.1673, "step": 21580 }, { "epoch": 0.25, "learning_rate": 4.7534218427150996e-05, "loss": 0.2762, "step": 21600 }, { "epoch": 0.25, "learning_rate": 4.7531935296065026e-05, "loss": 0.1272, "step": 21620 }, { "epoch": 0.25, "learning_rate": 4.7529652164979055e-05, "loss": 0.1409, "step": 21640 }, { "epoch": 0.25, "learning_rate": 4.7527369033893085e-05, "loss": 0.2112, "step": 21660 }, { "epoch": 0.25, "learning_rate": 4.752508590280711e-05, "loss": 0.2789, "step": 21680 }, { "epoch": 0.25, "learning_rate": 4.7522802771721144e-05, "loss": 0.136, "step": 21700 }, { "epoch": 0.25, "learning_rate": 4.752051964063517e-05, "loss": 0.1572, "step": 21720 }, { "epoch": 0.25, "learning_rate": 4.75182365095492e-05, "loss": 0.134, "step": 21740 }, { "epoch": 0.25, "learning_rate": 4.751595337846323e-05, "loss": 0.1763, "step": 21760 }, { "epoch": 0.25, "learning_rate": 4.7513670247377256e-05, "loss": 0.2714, "step": 21780 }, { "epoch": 0.25, "learning_rate": 4.7511387116291286e-05, "loss": 0.0883, "step": 21800 }, { "epoch": 0.25, "learning_rate": 4.750910398520531e-05, "loss": 0.2159, "step": 21820 }, { "epoch": 0.25, "learning_rate": 4.7506820854119345e-05, "loss": 0.3997, "step": 21840 }, { "epoch": 0.25, "learning_rate": 4.750453772303337e-05, "loss": 0.3553, "step": 21860 }, { "epoch": 0.25, "learning_rate": 4.75022545919474e-05, "loss": 0.3318, "step": 21880 }, { "epoch": 0.25, "learning_rate": 4.749997146086143e-05, "loss": 0.1913, "step": 21900 }, { "epoch": 0.25, "learning_rate": 4.749768832977546e-05, "loss": 0.3367, "step": 21920 }, { "epoch": 0.25, "learning_rate": 4.749540519868949e-05, "loss": 0.1242, "step": 21940 }, { "epoch": 0.25, "learning_rate": 4.749312206760351e-05, "loss": 0.3241, "step": 21960 }, { "epoch": 0.25, "learning_rate": 4.749083893651754e-05, "loss": 0.5237, "step": 21980 }, { "epoch": 0.25, "learning_rate": 4.748855580543157e-05, "loss": 0.3242, "step": 22000 }, { "epoch": 0.25, "learning_rate": 4.74862726743456e-05, "loss": 0.7477, "step": 22020 }, { "epoch": 0.25, "learning_rate": 4.748398954325963e-05, "loss": 0.3193, "step": 22040 }, { "epoch": 0.25, "learning_rate": 4.748170641217366e-05, "loss": 0.1411, "step": 22060 }, { "epoch": 0.25, "learning_rate": 4.747942328108769e-05, "loss": 0.2295, "step": 22080 }, { "epoch": 0.25, "learning_rate": 4.747714015000171e-05, "loss": 0.3306, "step": 22100 }, { "epoch": 0.25, "learning_rate": 4.747485701891574e-05, "loss": 0.2056, "step": 22120 }, { "epoch": 0.25, "learning_rate": 4.747257388782978e-05, "loss": 0.1687, "step": 22140 }, { "epoch": 0.25, "learning_rate": 4.74702907567438e-05, "loss": 0.1253, "step": 22160 }, { "epoch": 0.25, "learning_rate": 4.746800762565783e-05, "loss": 0.231, "step": 22180 }, { "epoch": 0.25, "learning_rate": 4.746572449457185e-05, "loss": 0.158, "step": 22200 }, { "epoch": 0.25, "learning_rate": 4.746344136348589e-05, "loss": 0.0976, "step": 22220 }, { "epoch": 0.25, "learning_rate": 4.746115823239992e-05, "loss": 0.1015, "step": 22240 }, { "epoch": 0.25, "learning_rate": 4.745887510131394e-05, "loss": 0.2067, "step": 22260 }, { "epoch": 0.25, "learning_rate": 4.745659197022797e-05, "loss": 0.437, "step": 22280 }, { "epoch": 0.25, "learning_rate": 4.7454308839142e-05, "loss": 0.2613, "step": 22300 }, { "epoch": 0.25, "learning_rate": 4.745202570805603e-05, "loss": 0.1103, "step": 22320 }, { "epoch": 0.26, "learning_rate": 4.744974257697006e-05, "loss": 0.1907, "step": 22340 }, { "epoch": 0.26, "learning_rate": 4.744745944588409e-05, "loss": 0.2715, "step": 22360 }, { "epoch": 0.26, "learning_rate": 4.744517631479812e-05, "loss": 0.1585, "step": 22380 }, { "epoch": 0.26, "learning_rate": 4.744289318371214e-05, "loss": 0.1597, "step": 22400 }, { "epoch": 0.26, "learning_rate": 4.744061005262617e-05, "loss": 0.1154, "step": 22420 }, { "epoch": 0.26, "learning_rate": 4.74383269215402e-05, "loss": 0.1766, "step": 22440 }, { "epoch": 0.26, "learning_rate": 4.743604379045423e-05, "loss": 0.1722, "step": 22460 }, { "epoch": 0.26, "learning_rate": 4.743376065936826e-05, "loss": 0.3068, "step": 22480 }, { "epoch": 0.26, "learning_rate": 4.7431477528282284e-05, "loss": 0.3184, "step": 22500 }, { "epoch": 0.26, "learning_rate": 4.742919439719632e-05, "loss": 0.247, "step": 22520 }, { "epoch": 0.26, "learning_rate": 4.7426911266110344e-05, "loss": 0.0679, "step": 22540 }, { "epoch": 0.26, "learning_rate": 4.742462813502437e-05, "loss": 0.1863, "step": 22560 }, { "epoch": 0.26, "learning_rate": 4.74223450039384e-05, "loss": 0.2409, "step": 22580 }, { "epoch": 0.26, "learning_rate": 4.742006187285243e-05, "loss": 0.4126, "step": 22600 }, { "epoch": 0.26, "learning_rate": 4.741777874176646e-05, "loss": 0.1744, "step": 22620 }, { "epoch": 0.26, "learning_rate": 4.7415495610680485e-05, "loss": 0.1128, "step": 22640 }, { "epoch": 0.26, "learning_rate": 4.741321247959452e-05, "loss": 0.2759, "step": 22660 }, { "epoch": 0.26, "learning_rate": 4.741092934850855e-05, "loss": 0.1229, "step": 22680 }, { "epoch": 0.26, "learning_rate": 4.7408646217422574e-05, "loss": 0.1229, "step": 22700 }, { "epoch": 0.26, "learning_rate": 4.7406363086336604e-05, "loss": 0.1466, "step": 22720 }, { "epoch": 0.26, "learning_rate": 4.7404079955250634e-05, "loss": 0.2767, "step": 22740 }, { "epoch": 0.26, "learning_rate": 4.7401796824164663e-05, "loss": 0.0576, "step": 22760 }, { "epoch": 0.26, "learning_rate": 4.739951369307869e-05, "loss": 0.1953, "step": 22780 }, { "epoch": 0.26, "learning_rate": 4.7397230561992716e-05, "loss": 0.0631, "step": 22800 }, { "epoch": 0.26, "learning_rate": 4.739494743090675e-05, "loss": 0.274, "step": 22820 }, { "epoch": 0.26, "learning_rate": 4.7392664299820775e-05, "loss": 0.2211, "step": 22840 }, { "epoch": 0.26, "learning_rate": 4.7390381168734805e-05, "loss": 0.2798, "step": 22860 }, { "epoch": 0.26, "learning_rate": 4.7388098037648835e-05, "loss": 0.2211, "step": 22880 }, { "epoch": 0.26, "learning_rate": 4.7385814906562864e-05, "loss": 0.3011, "step": 22900 }, { "epoch": 0.26, "learning_rate": 4.7383531775476894e-05, "loss": 0.3382, "step": 22920 }, { "epoch": 0.26, "learning_rate": 4.738124864439092e-05, "loss": 0.1094, "step": 22940 }, { "epoch": 0.26, "learning_rate": 4.737896551330495e-05, "loss": 0.4431, "step": 22960 }, { "epoch": 0.26, "learning_rate": 4.7376682382218976e-05, "loss": 0.3831, "step": 22980 }, { "epoch": 0.26, "learning_rate": 4.7374399251133006e-05, "loss": 0.2534, "step": 23000 }, { "epoch": 0.26, "learning_rate": 4.7372116120047036e-05, "loss": 0.3019, "step": 23020 }, { "epoch": 0.26, "learning_rate": 4.7369832988961065e-05, "loss": 0.2431, "step": 23040 }, { "epoch": 0.26, "learning_rate": 4.7367549857875095e-05, "loss": 0.1881, "step": 23060 }, { "epoch": 0.26, "learning_rate": 4.736526672678912e-05, "loss": 0.3418, "step": 23080 }, { "epoch": 0.26, "learning_rate": 4.736298359570315e-05, "loss": 0.2647, "step": 23100 }, { "epoch": 0.26, "learning_rate": 4.736070046461718e-05, "loss": 0.1322, "step": 23120 }, { "epoch": 0.26, "learning_rate": 4.735841733353121e-05, "loss": 0.1007, "step": 23140 }, { "epoch": 0.26, "learning_rate": 4.735613420244524e-05, "loss": 0.0906, "step": 23160 }, { "epoch": 0.26, "learning_rate": 4.735385107135926e-05, "loss": 0.2491, "step": 23180 }, { "epoch": 0.26, "learning_rate": 4.7351567940273296e-05, "loss": 0.2215, "step": 23200 }, { "epoch": 0.27, "learning_rate": 4.734928480918732e-05, "loss": 0.1793, "step": 23220 }, { "epoch": 0.27, "learning_rate": 4.734700167810135e-05, "loss": 0.2992, "step": 23240 }, { "epoch": 0.27, "learning_rate": 4.734471854701538e-05, "loss": 0.2231, "step": 23260 }, { "epoch": 0.27, "learning_rate": 4.734243541592941e-05, "loss": 0.554, "step": 23280 }, { "epoch": 0.27, "learning_rate": 4.734015228484344e-05, "loss": 0.1617, "step": 23300 }, { "epoch": 0.27, "learning_rate": 4.733786915375746e-05, "loss": 0.3112, "step": 23320 }, { "epoch": 0.27, "learning_rate": 4.73355860226715e-05, "loss": 0.2599, "step": 23340 }, { "epoch": 0.27, "learning_rate": 4.733330289158553e-05, "loss": 0.2496, "step": 23360 }, { "epoch": 0.27, "learning_rate": 4.733101976049955e-05, "loss": 0.4191, "step": 23380 }, { "epoch": 0.27, "learning_rate": 4.732873662941358e-05, "loss": 0.2301, "step": 23400 }, { "epoch": 0.27, "learning_rate": 4.732645349832761e-05, "loss": 0.1658, "step": 23420 }, { "epoch": 0.27, "learning_rate": 4.732417036724164e-05, "loss": 0.1451, "step": 23440 }, { "epoch": 0.27, "learning_rate": 4.732188723615567e-05, "loss": 0.1813, "step": 23460 }, { "epoch": 0.27, "learning_rate": 4.731960410506969e-05, "loss": 0.3417, "step": 23480 }, { "epoch": 0.27, "learning_rate": 4.731732097398373e-05, "loss": 0.2831, "step": 23500 }, { "epoch": 0.27, "learning_rate": 4.731503784289775e-05, "loss": 0.1925, "step": 23520 }, { "epoch": 0.27, "learning_rate": 4.731275471181178e-05, "loss": 0.2041, "step": 23540 }, { "epoch": 0.27, "learning_rate": 4.731047158072581e-05, "loss": 0.14, "step": 23560 }, { "epoch": 0.27, "learning_rate": 4.730818844963984e-05, "loss": 0.3446, "step": 23580 }, { "epoch": 0.27, "learning_rate": 4.730590531855387e-05, "loss": 0.2162, "step": 23600 }, { "epoch": 0.27, "learning_rate": 4.730362218746789e-05, "loss": 0.1209, "step": 23620 }, { "epoch": 0.27, "learning_rate": 4.730133905638193e-05, "loss": 0.5638, "step": 23640 }, { "epoch": 0.27, "learning_rate": 4.729905592529595e-05, "loss": 0.2857, "step": 23660 }, { "epoch": 0.27, "learning_rate": 4.729677279420998e-05, "loss": 0.3908, "step": 23680 }, { "epoch": 0.27, "learning_rate": 4.729448966312401e-05, "loss": 0.2657, "step": 23700 }, { "epoch": 0.27, "learning_rate": 4.729220653203804e-05, "loss": 0.158, "step": 23720 }, { "epoch": 0.27, "learning_rate": 4.728992340095207e-05, "loss": 0.1392, "step": 23740 }, { "epoch": 0.27, "learning_rate": 4.728764026986609e-05, "loss": 0.2958, "step": 23760 }, { "epoch": 0.27, "learning_rate": 4.728535713878012e-05, "loss": 0.1044, "step": 23780 }, { "epoch": 0.27, "learning_rate": 4.728307400769415e-05, "loss": 0.218, "step": 23800 }, { "epoch": 0.27, "learning_rate": 4.728079087660818e-05, "loss": 0.2634, "step": 23820 }, { "epoch": 0.27, "learning_rate": 4.727850774552221e-05, "loss": 0.2271, "step": 23840 }, { "epoch": 0.27, "learning_rate": 4.7276224614436235e-05, "loss": 0.0972, "step": 23860 }, { "epoch": 0.27, "learning_rate": 4.727394148335027e-05, "loss": 0.2498, "step": 23880 }, { "epoch": 0.27, "learning_rate": 4.7271658352264294e-05, "loss": 0.3355, "step": 23900 }, { "epoch": 0.27, "learning_rate": 4.7269375221178324e-05, "loss": 0.1937, "step": 23920 }, { "epoch": 0.27, "learning_rate": 4.726709209009236e-05, "loss": 0.0728, "step": 23940 }, { "epoch": 0.27, "learning_rate": 4.7264808959006383e-05, "loss": 0.1288, "step": 23960 }, { "epoch": 0.27, "learning_rate": 4.726252582792041e-05, "loss": 0.3853, "step": 23980 }, { "epoch": 0.27, "learning_rate": 4.7260242696834436e-05, "loss": 0.388, "step": 24000 }, { "epoch": 0.27, "learning_rate": 4.725795956574847e-05, "loss": 0.2647, "step": 24020 }, { "epoch": 0.27, "learning_rate": 4.72556764346625e-05, "loss": 0.4496, "step": 24040 }, { "epoch": 0.27, "learning_rate": 4.7253393303576525e-05, "loss": 0.0841, "step": 24060 }, { "epoch": 0.27, "learning_rate": 4.7251110172490555e-05, "loss": 0.229, "step": 24080 }, { "epoch": 0.28, "learning_rate": 4.7248827041404584e-05, "loss": 0.1589, "step": 24100 }, { "epoch": 0.28, "learning_rate": 4.7246543910318614e-05, "loss": 0.2051, "step": 24120 }, { "epoch": 0.28, "learning_rate": 4.7244260779232644e-05, "loss": 0.1634, "step": 24140 }, { "epoch": 0.28, "learning_rate": 4.724197764814667e-05, "loss": 0.0633, "step": 24160 }, { "epoch": 0.28, "learning_rate": 4.72396945170607e-05, "loss": 0.1791, "step": 24180 }, { "epoch": 0.28, "learning_rate": 4.7237411385974726e-05, "loss": 0.0923, "step": 24200 }, { "epoch": 0.28, "learning_rate": 4.7235128254888756e-05, "loss": 0.2985, "step": 24220 }, { "epoch": 0.28, "learning_rate": 4.7232845123802785e-05, "loss": 0.2427, "step": 24240 }, { "epoch": 0.28, "learning_rate": 4.7230561992716815e-05, "loss": 0.2207, "step": 24260 }, { "epoch": 0.28, "learning_rate": 4.7228278861630845e-05, "loss": 0.2788, "step": 24280 }, { "epoch": 0.28, "learning_rate": 4.722599573054487e-05, "loss": 0.1753, "step": 24300 }, { "epoch": 0.28, "learning_rate": 4.7223712599458904e-05, "loss": 0.1645, "step": 24320 }, { "epoch": 0.28, "learning_rate": 4.722142946837293e-05, "loss": 0.338, "step": 24340 }, { "epoch": 0.28, "learning_rate": 4.721914633728696e-05, "loss": 0.0655, "step": 24360 }, { "epoch": 0.28, "learning_rate": 4.7216863206200986e-05, "loss": 0.1333, "step": 24380 }, { "epoch": 0.28, "learning_rate": 4.7214580075115016e-05, "loss": 0.1827, "step": 24400 }, { "epoch": 0.28, "learning_rate": 4.7212296944029046e-05, "loss": 0.1805, "step": 24420 }, { "epoch": 0.28, "learning_rate": 4.721001381294307e-05, "loss": 0.2557, "step": 24440 }, { "epoch": 0.28, "learning_rate": 4.72077306818571e-05, "loss": 0.189, "step": 24460 }, { "epoch": 0.28, "learning_rate": 4.720544755077113e-05, "loss": 0.2641, "step": 24480 }, { "epoch": 0.28, "learning_rate": 4.720316441968516e-05, "loss": 0.0422, "step": 24500 }, { "epoch": 0.28, "learning_rate": 4.720088128859919e-05, "loss": 0.1276, "step": 24520 }, { "epoch": 0.28, "learning_rate": 4.719859815751322e-05, "loss": 0.2978, "step": 24540 }, { "epoch": 0.28, "learning_rate": 4.719631502642725e-05, "loss": 0.292, "step": 24560 }, { "epoch": 0.28, "learning_rate": 4.719403189534127e-05, "loss": 0.244, "step": 24580 }, { "epoch": 0.28, "learning_rate": 4.71917487642553e-05, "loss": 0.1329, "step": 24600 }, { "epoch": 0.28, "learning_rate": 4.7189465633169336e-05, "loss": 0.1237, "step": 24620 }, { "epoch": 0.28, "learning_rate": 4.718718250208336e-05, "loss": 0.1595, "step": 24640 }, { "epoch": 0.28, "learning_rate": 4.718489937099739e-05, "loss": 0.1471, "step": 24660 }, { "epoch": 0.28, "learning_rate": 4.718261623991141e-05, "loss": 0.3982, "step": 24680 }, { "epoch": 0.28, "learning_rate": 4.718033310882545e-05, "loss": 0.1551, "step": 24700 }, { "epoch": 0.28, "learning_rate": 4.717804997773948e-05, "loss": 0.176, "step": 24720 }, { "epoch": 0.28, "learning_rate": 4.71757668466535e-05, "loss": 0.186, "step": 24740 }, { "epoch": 0.28, "learning_rate": 4.717348371556753e-05, "loss": 0.1392, "step": 24760 }, { "epoch": 0.28, "learning_rate": 4.717120058448156e-05, "loss": 0.3822, "step": 24780 }, { "epoch": 0.28, "learning_rate": 4.716891745339559e-05, "loss": 0.1123, "step": 24800 }, { "epoch": 0.28, "learning_rate": 4.716663432230962e-05, "loss": 0.1849, "step": 24820 }, { "epoch": 0.28, "learning_rate": 4.716435119122365e-05, "loss": 0.1637, "step": 24840 }, { "epoch": 0.28, "learning_rate": 4.716206806013768e-05, "loss": 0.3703, "step": 24860 }, { "epoch": 0.28, "learning_rate": 4.71597849290517e-05, "loss": 0.2363, "step": 24880 }, { "epoch": 0.28, "learning_rate": 4.715750179796573e-05, "loss": 0.3077, "step": 24900 }, { "epoch": 0.28, "learning_rate": 4.715521866687976e-05, "loss": 0.5072, "step": 24920 }, { "epoch": 0.28, "learning_rate": 4.715293553579379e-05, "loss": 0.3432, "step": 24940 }, { "epoch": 0.28, "learning_rate": 4.715065240470782e-05, "loss": 0.2283, "step": 24960 }, { "epoch": 0.29, "learning_rate": 4.714836927362184e-05, "loss": 0.3124, "step": 24980 }, { "epoch": 0.29, "learning_rate": 4.714608614253588e-05, "loss": 0.0966, "step": 25000 }, { "epoch": 0.29, "learning_rate": 4.71438030114499e-05, "loss": 0.1474, "step": 25020 }, { "epoch": 0.29, "learning_rate": 4.714151988036393e-05, "loss": 0.1299, "step": 25040 }, { "epoch": 0.29, "learning_rate": 4.713923674927796e-05, "loss": 0.0881, "step": 25060 }, { "epoch": 0.29, "learning_rate": 4.713695361819199e-05, "loss": 0.2015, "step": 25080 }, { "epoch": 0.29, "learning_rate": 4.713467048710602e-05, "loss": 0.1247, "step": 25100 }, { "epoch": 0.29, "learning_rate": 4.7132387356020044e-05, "loss": 0.1407, "step": 25120 }, { "epoch": 0.29, "learning_rate": 4.713010422493408e-05, "loss": 0.2403, "step": 25140 }, { "epoch": 0.29, "learning_rate": 4.712782109384811e-05, "loss": 0.1019, "step": 25160 }, { "epoch": 0.29, "learning_rate": 4.712553796276213e-05, "loss": 0.0492, "step": 25180 }, { "epoch": 0.29, "learning_rate": 4.712325483167616e-05, "loss": 0.2063, "step": 25200 }, { "epoch": 0.29, "learning_rate": 4.712097170059019e-05, "loss": 0.0872, "step": 25220 }, { "epoch": 0.29, "learning_rate": 4.711868856950422e-05, "loss": 0.0874, "step": 25240 }, { "epoch": 0.29, "learning_rate": 4.711640543841825e-05, "loss": 0.1145, "step": 25260 }, { "epoch": 0.29, "learning_rate": 4.7114122307332275e-05, "loss": 0.1191, "step": 25280 }, { "epoch": 0.29, "learning_rate": 4.711183917624631e-05, "loss": 0.216, "step": 25300 }, { "epoch": 0.29, "learning_rate": 4.7109556045160334e-05, "loss": 0.1471, "step": 25320 }, { "epoch": 0.29, "learning_rate": 4.7107272914074364e-05, "loss": 0.2148, "step": 25340 }, { "epoch": 0.29, "learning_rate": 4.7104989782988393e-05, "loss": 0.2792, "step": 25360 }, { "epoch": 0.29, "learning_rate": 4.710270665190242e-05, "loss": 0.3422, "step": 25380 }, { "epoch": 0.29, "learning_rate": 4.710042352081645e-05, "loss": 0.26, "step": 25400 }, { "epoch": 0.29, "learning_rate": 4.7098140389730476e-05, "loss": 0.1177, "step": 25420 }, { "epoch": 0.29, "learning_rate": 4.7095857258644505e-05, "loss": 0.2927, "step": 25440 }, { "epoch": 0.29, "learning_rate": 4.7093574127558535e-05, "loss": 0.1495, "step": 25460 }, { "epoch": 0.29, "learning_rate": 4.7091290996472565e-05, "loss": 0.21, "step": 25480 }, { "epoch": 0.29, "learning_rate": 4.7089007865386594e-05, "loss": 0.1534, "step": 25500 }, { "epoch": 0.29, "learning_rate": 4.7086724734300624e-05, "loss": 0.1793, "step": 25520 }, { "epoch": 0.29, "learning_rate": 4.7084441603214654e-05, "loss": 0.4432, "step": 25540 }, { "epoch": 0.29, "learning_rate": 4.708215847212868e-05, "loss": 0.2653, "step": 25560 }, { "epoch": 0.29, "learning_rate": 4.7079875341042706e-05, "loss": 0.4762, "step": 25580 }, { "epoch": 0.29, "learning_rate": 4.7077592209956736e-05, "loss": 0.2827, "step": 25600 }, { "epoch": 0.29, "learning_rate": 4.7075309078870766e-05, "loss": 0.2646, "step": 25620 }, { "epoch": 0.29, "learning_rate": 4.7073025947784795e-05, "loss": 0.4647, "step": 25640 }, { "epoch": 0.29, "learning_rate": 4.707074281669882e-05, "loss": 0.1342, "step": 25660 }, { "epoch": 0.29, "learning_rate": 4.7068459685612855e-05, "loss": 0.5382, "step": 25680 }, { "epoch": 0.29, "learning_rate": 4.706617655452688e-05, "loss": 0.4671, "step": 25700 }, { "epoch": 0.29, "learning_rate": 4.706389342344091e-05, "loss": 0.1155, "step": 25720 }, { "epoch": 0.29, "learning_rate": 4.706161029235494e-05, "loss": 0.239, "step": 25740 }, { "epoch": 0.29, "learning_rate": 4.705932716126897e-05, "loss": 0.1193, "step": 25760 }, { "epoch": 0.29, "learning_rate": 4.7057044030182996e-05, "loss": 0.2693, "step": 25780 }, { "epoch": 0.29, "learning_rate": 4.705476089909702e-05, "loss": 0.1928, "step": 25800 }, { "epoch": 0.29, "learning_rate": 4.7052477768011056e-05, "loss": 0.3012, "step": 25820 }, { "epoch": 0.29, "learning_rate": 4.7050194636925086e-05, "loss": 0.3103, "step": 25840 }, { "epoch": 0.3, "learning_rate": 4.704791150583911e-05, "loss": 0.2297, "step": 25860 }, { "epoch": 0.3, "learning_rate": 4.704562837475314e-05, "loss": 0.0988, "step": 25880 }, { "epoch": 0.3, "learning_rate": 4.704334524366717e-05, "loss": 0.1365, "step": 25900 }, { "epoch": 0.3, "learning_rate": 4.70410621125812e-05, "loss": 0.2913, "step": 25920 }, { "epoch": 0.3, "learning_rate": 4.703877898149523e-05, "loss": 0.1612, "step": 25940 }, { "epoch": 0.3, "learning_rate": 4.703649585040925e-05, "loss": 0.2934, "step": 25960 }, { "epoch": 0.3, "learning_rate": 4.7034212719323287e-05, "loss": 0.4397, "step": 25980 }, { "epoch": 0.3, "learning_rate": 4.703192958823731e-05, "loss": 0.0961, "step": 26000 }, { "epoch": 0.3, "learning_rate": 4.702964645715134e-05, "loss": 0.1997, "step": 26020 }, { "epoch": 0.3, "learning_rate": 4.702736332606537e-05, "loss": 0.1287, "step": 26040 }, { "epoch": 0.3, "learning_rate": 4.70250801949794e-05, "loss": 0.2804, "step": 26060 }, { "epoch": 0.3, "learning_rate": 4.702279706389343e-05, "loss": 0.2269, "step": 26080 }, { "epoch": 0.3, "learning_rate": 4.702051393280745e-05, "loss": 0.3153, "step": 26100 }, { "epoch": 0.3, "learning_rate": 4.701823080172149e-05, "loss": 0.1995, "step": 26120 }, { "epoch": 0.3, "learning_rate": 4.701594767063551e-05, "loss": 0.5865, "step": 26140 }, { "epoch": 0.3, "learning_rate": 4.701366453954954e-05, "loss": 0.1869, "step": 26160 }, { "epoch": 0.3, "learning_rate": 4.701138140846357e-05, "loss": 0.0841, "step": 26180 }, { "epoch": 0.3, "learning_rate": 4.70090982773776e-05, "loss": 0.0858, "step": 26200 }, { "epoch": 0.3, "learning_rate": 4.700681514629163e-05, "loss": 0.1956, "step": 26220 }, { "epoch": 0.3, "learning_rate": 4.700453201520565e-05, "loss": 0.1216, "step": 26240 }, { "epoch": 0.3, "learning_rate": 4.700224888411968e-05, "loss": 0.1431, "step": 26260 }, { "epoch": 0.3, "learning_rate": 4.699996575303371e-05, "loss": 0.3438, "step": 26280 }, { "epoch": 0.3, "learning_rate": 4.699768262194774e-05, "loss": 0.3116, "step": 26300 }, { "epoch": 0.3, "learning_rate": 4.699539949086177e-05, "loss": 0.1153, "step": 26320 }, { "epoch": 0.3, "learning_rate": 4.6993116359775794e-05, "loss": 0.3653, "step": 26340 }, { "epoch": 0.3, "learning_rate": 4.699083322868983e-05, "loss": 0.1633, "step": 26360 }, { "epoch": 0.3, "learning_rate": 4.698855009760385e-05, "loss": 0.3322, "step": 26380 }, { "epoch": 0.3, "learning_rate": 4.698626696651788e-05, "loss": 0.3349, "step": 26400 }, { "epoch": 0.3, "learning_rate": 4.698398383543192e-05, "loss": 0.1611, "step": 26420 }, { "epoch": 0.3, "learning_rate": 4.698170070434594e-05, "loss": 0.0762, "step": 26440 }, { "epoch": 0.3, "learning_rate": 4.697941757325997e-05, "loss": 0.2275, "step": 26460 }, { "epoch": 0.3, "learning_rate": 4.6977134442173995e-05, "loss": 0.2509, "step": 26480 }, { "epoch": 0.3, "learning_rate": 4.697485131108803e-05, "loss": 0.3024, "step": 26500 }, { "epoch": 0.3, "learning_rate": 4.697256818000206e-05, "loss": 0.0468, "step": 26520 }, { "epoch": 0.3, "learning_rate": 4.6970285048916084e-05, "loss": 0.0977, "step": 26540 }, { "epoch": 0.3, "learning_rate": 4.6968001917830113e-05, "loss": 0.3228, "step": 26560 }, { "epoch": 0.3, "learning_rate": 4.696571878674414e-05, "loss": 0.4069, "step": 26580 }, { "epoch": 0.3, "learning_rate": 4.696343565565817e-05, "loss": 0.1492, "step": 26600 }, { "epoch": 0.3, "learning_rate": 4.69611525245722e-05, "loss": 0.156, "step": 26620 }, { "epoch": 0.3, "learning_rate": 4.6958869393486225e-05, "loss": 0.0924, "step": 26640 }, { "epoch": 0.3, "learning_rate": 4.695658626240026e-05, "loss": 0.457, "step": 26660 }, { "epoch": 0.3, "learning_rate": 4.6954303131314285e-05, "loss": 0.1938, "step": 26680 }, { "epoch": 0.3, "learning_rate": 4.6952020000228314e-05, "loss": 0.1427, "step": 26700 }, { "epoch": 0.31, "learning_rate": 4.6949736869142344e-05, "loss": 0.1757, "step": 26720 }, { "epoch": 0.31, "learning_rate": 4.6947453738056374e-05, "loss": 0.1911, "step": 26740 }, { "epoch": 0.31, "learning_rate": 4.6945170606970404e-05, "loss": 0.2313, "step": 26760 }, { "epoch": 0.31, "learning_rate": 4.6942887475884426e-05, "loss": 0.2092, "step": 26780 }, { "epoch": 0.31, "learning_rate": 4.694060434479846e-05, "loss": 0.0524, "step": 26800 }, { "epoch": 0.31, "learning_rate": 4.6938321213712486e-05, "loss": 0.3001, "step": 26820 }, { "epoch": 0.31, "learning_rate": 4.6936038082626515e-05, "loss": 0.1359, "step": 26840 }, { "epoch": 0.31, "learning_rate": 4.6933754951540545e-05, "loss": 0.1888, "step": 26860 }, { "epoch": 0.31, "learning_rate": 4.6931471820454575e-05, "loss": 0.333, "step": 26880 }, { "epoch": 0.31, "learning_rate": 4.6929188689368605e-05, "loss": 0.4987, "step": 26900 }, { "epoch": 0.31, "learning_rate": 4.692690555828263e-05, "loss": 0.2564, "step": 26920 }, { "epoch": 0.31, "learning_rate": 4.692462242719666e-05, "loss": 0.1071, "step": 26940 }, { "epoch": 0.31, "learning_rate": 4.692233929611069e-05, "loss": 0.315, "step": 26960 }, { "epoch": 0.31, "learning_rate": 4.6920056165024716e-05, "loss": 0.2658, "step": 26980 }, { "epoch": 0.31, "learning_rate": 4.6917773033938746e-05, "loss": 0.1932, "step": 27000 }, { "epoch": 0.31, "learning_rate": 4.6915489902852776e-05, "loss": 0.1575, "step": 27020 }, { "epoch": 0.31, "learning_rate": 4.6913206771766806e-05, "loss": 0.1047, "step": 27040 }, { "epoch": 0.31, "learning_rate": 4.691092364068083e-05, "loss": 0.2544, "step": 27060 }, { "epoch": 0.31, "learning_rate": 4.690864050959486e-05, "loss": 0.3456, "step": 27080 }, { "epoch": 0.31, "learning_rate": 4.6906357378508895e-05, "loss": 0.1469, "step": 27100 }, { "epoch": 0.31, "learning_rate": 4.690407424742292e-05, "loss": 0.5685, "step": 27120 }, { "epoch": 0.31, "learning_rate": 4.690179111633695e-05, "loss": 0.2657, "step": 27140 }, { "epoch": 0.31, "learning_rate": 4.689950798525097e-05, "loss": 0.126, "step": 27160 }, { "epoch": 0.31, "learning_rate": 4.6897224854165007e-05, "loss": 0.1248, "step": 27180 }, { "epoch": 0.31, "learning_rate": 4.6894941723079036e-05, "loss": 0.2434, "step": 27200 }, { "epoch": 0.31, "learning_rate": 4.689265859199306e-05, "loss": 0.1871, "step": 27220 }, { "epoch": 0.31, "learning_rate": 4.689037546090709e-05, "loss": 0.522, "step": 27240 }, { "epoch": 0.31, "learning_rate": 4.688809232982112e-05, "loss": 0.1547, "step": 27260 }, { "epoch": 0.31, "learning_rate": 4.688580919873515e-05, "loss": 0.2498, "step": 27280 }, { "epoch": 0.31, "learning_rate": 4.688352606764918e-05, "loss": 0.4705, "step": 27300 }, { "epoch": 0.31, "learning_rate": 4.688124293656321e-05, "loss": 0.3332, "step": 27320 }, { "epoch": 0.31, "learning_rate": 4.687895980547724e-05, "loss": 0.2212, "step": 27340 }, { "epoch": 0.31, "learning_rate": 4.687667667439126e-05, "loss": 0.1626, "step": 27360 }, { "epoch": 0.31, "learning_rate": 4.687439354330529e-05, "loss": 0.1958, "step": 27380 }, { "epoch": 0.31, "learning_rate": 4.687211041221932e-05, "loss": 0.4111, "step": 27400 }, { "epoch": 0.31, "learning_rate": 4.686982728113335e-05, "loss": 0.3845, "step": 27420 }, { "epoch": 0.31, "learning_rate": 4.686754415004738e-05, "loss": 0.0772, "step": 27440 }, { "epoch": 0.31, "learning_rate": 4.68652610189614e-05, "loss": 0.3387, "step": 27460 }, { "epoch": 0.31, "learning_rate": 4.686297788787544e-05, "loss": 0.1783, "step": 27480 }, { "epoch": 0.31, "learning_rate": 4.686069475678946e-05, "loss": 0.2887, "step": 27500 }, { "epoch": 0.31, "learning_rate": 4.685841162570349e-05, "loss": 0.1088, "step": 27520 }, { "epoch": 0.31, "learning_rate": 4.685612849461752e-05, "loss": 0.1275, "step": 27540 }, { "epoch": 0.31, "learning_rate": 4.685384536353155e-05, "loss": 0.2174, "step": 27560 }, { "epoch": 0.31, "learning_rate": 4.685156223244558e-05, "loss": 0.2202, "step": 27580 }, { "epoch": 0.32, "learning_rate": 4.68492791013596e-05, "loss": 0.31, "step": 27600 }, { "epoch": 0.32, "learning_rate": 4.684699597027363e-05, "loss": 0.1454, "step": 27620 }, { "epoch": 0.32, "learning_rate": 4.684471283918767e-05, "loss": 0.246, "step": 27640 }, { "epoch": 0.32, "learning_rate": 4.684242970810169e-05, "loss": 0.3776, "step": 27660 }, { "epoch": 0.32, "learning_rate": 4.684014657701572e-05, "loss": 0.4635, "step": 27680 }, { "epoch": 0.32, "learning_rate": 4.683786344592975e-05, "loss": 0.2759, "step": 27700 }, { "epoch": 0.32, "learning_rate": 4.683558031484378e-05, "loss": 0.2418, "step": 27720 }, { "epoch": 0.32, "learning_rate": 4.683329718375781e-05, "loss": 0.413, "step": 27740 }, { "epoch": 0.32, "learning_rate": 4.6831014052671833e-05, "loss": 0.2531, "step": 27760 }, { "epoch": 0.32, "learning_rate": 4.682873092158587e-05, "loss": 0.3954, "step": 27780 }, { "epoch": 0.32, "learning_rate": 4.682644779049989e-05, "loss": 0.2616, "step": 27800 }, { "epoch": 0.32, "learning_rate": 4.682416465941392e-05, "loss": 0.1184, "step": 27820 }, { "epoch": 0.32, "learning_rate": 4.682188152832795e-05, "loss": 0.3221, "step": 27840 }, { "epoch": 0.32, "learning_rate": 4.681959839724198e-05, "loss": 0.3173, "step": 27860 }, { "epoch": 0.32, "learning_rate": 4.681731526615601e-05, "loss": 0.2164, "step": 27880 }, { "epoch": 0.32, "learning_rate": 4.6815032135070034e-05, "loss": 0.1263, "step": 27900 }, { "epoch": 0.32, "learning_rate": 4.6812749003984064e-05, "loss": 0.1226, "step": 27920 }, { "epoch": 0.32, "learning_rate": 4.6810465872898094e-05, "loss": 0.2538, "step": 27940 }, { "epoch": 0.32, "learning_rate": 4.6808182741812123e-05, "loss": 0.4058, "step": 27960 }, { "epoch": 0.32, "learning_rate": 4.680589961072615e-05, "loss": 0.2217, "step": 27980 }, { "epoch": 0.32, "learning_rate": 4.680361647964018e-05, "loss": 0.2131, "step": 28000 }, { "epoch": 0.32, "learning_rate": 4.680133334855421e-05, "loss": 0.1246, "step": 28020 }, { "epoch": 0.32, "learning_rate": 4.6799050217468235e-05, "loss": 0.1528, "step": 28040 }, { "epoch": 0.32, "learning_rate": 4.6796767086382265e-05, "loss": 0.0903, "step": 28060 }, { "epoch": 0.32, "learning_rate": 4.6794483955296295e-05, "loss": 0.2985, "step": 28080 }, { "epoch": 0.32, "learning_rate": 4.6792200824210324e-05, "loss": 0.5863, "step": 28100 }, { "epoch": 0.32, "learning_rate": 4.6789917693124354e-05, "loss": 0.1127, "step": 28120 }, { "epoch": 0.32, "learning_rate": 4.678763456203838e-05, "loss": 0.4726, "step": 28140 }, { "epoch": 0.32, "learning_rate": 4.6785351430952414e-05, "loss": 0.1674, "step": 28160 }, { "epoch": 0.32, "learning_rate": 4.6783068299866436e-05, "loss": 0.4203, "step": 28180 }, { "epoch": 0.32, "learning_rate": 4.6780785168780466e-05, "loss": 0.1179, "step": 28200 }, { "epoch": 0.32, "learning_rate": 4.6778502037694496e-05, "loss": 0.1521, "step": 28220 }, { "epoch": 0.32, "learning_rate": 4.6776218906608525e-05, "loss": 0.226, "step": 28240 }, { "epoch": 0.32, "learning_rate": 4.6773935775522555e-05, "loss": 0.2548, "step": 28260 }, { "epoch": 0.32, "learning_rate": 4.677165264443658e-05, "loss": 0.2104, "step": 28280 }, { "epoch": 0.32, "learning_rate": 4.6769369513350615e-05, "loss": 0.1276, "step": 28300 }, { "epoch": 0.32, "learning_rate": 4.6767086382264644e-05, "loss": 0.1549, "step": 28320 }, { "epoch": 0.32, "learning_rate": 4.676480325117867e-05, "loss": 0.1242, "step": 28340 }, { "epoch": 0.32, "learning_rate": 4.67625201200927e-05, "loss": 0.2874, "step": 28360 }, { "epoch": 0.32, "learning_rate": 4.6760236989006727e-05, "loss": 0.1033, "step": 28380 }, { "epoch": 0.32, "learning_rate": 4.6757953857920756e-05, "loss": 0.1214, "step": 28400 }, { "epoch": 0.32, "learning_rate": 4.6755670726834786e-05, "loss": 0.0968, "step": 28420 }, { "epoch": 0.32, "learning_rate": 4.675338759574881e-05, "loss": 0.1897, "step": 28440 }, { "epoch": 0.32, "learning_rate": 4.6751104464662845e-05, "loss": 0.3537, "step": 28460 }, { "epoch": 0.33, "learning_rate": 4.674882133357687e-05, "loss": 0.1463, "step": 28480 }, { "epoch": 0.33, "learning_rate": 4.67465382024909e-05, "loss": 0.6689, "step": 28500 }, { "epoch": 0.33, "learning_rate": 4.674425507140493e-05, "loss": 0.3041, "step": 28520 }, { "epoch": 0.33, "learning_rate": 4.674197194031896e-05, "loss": 0.6822, "step": 28540 }, { "epoch": 0.33, "learning_rate": 4.673968880923299e-05, "loss": 0.265, "step": 28560 }, { "epoch": 0.33, "learning_rate": 4.673740567814701e-05, "loss": 0.2125, "step": 28580 }, { "epoch": 0.33, "learning_rate": 4.6735122547061046e-05, "loss": 0.1652, "step": 28600 }, { "epoch": 0.33, "learning_rate": 4.673283941597507e-05, "loss": 0.3903, "step": 28620 }, { "epoch": 0.33, "learning_rate": 4.67305562848891e-05, "loss": 0.4579, "step": 28640 }, { "epoch": 0.33, "learning_rate": 4.672827315380313e-05, "loss": 0.1294, "step": 28660 }, { "epoch": 0.33, "learning_rate": 4.672599002271716e-05, "loss": 0.1291, "step": 28680 }, { "epoch": 0.33, "learning_rate": 4.672370689163119e-05, "loss": 0.1115, "step": 28700 }, { "epoch": 0.33, "learning_rate": 4.672142376054521e-05, "loss": 0.3263, "step": 28720 }, { "epoch": 0.33, "learning_rate": 4.671914062945924e-05, "loss": 0.1674, "step": 28740 }, { "epoch": 0.33, "learning_rate": 4.671685749837327e-05, "loss": 0.4125, "step": 28760 }, { "epoch": 0.33, "learning_rate": 4.67145743672873e-05, "loss": 0.1807, "step": 28780 }, { "epoch": 0.33, "learning_rate": 4.671229123620133e-05, "loss": 0.2282, "step": 28800 }, { "epoch": 0.33, "learning_rate": 4.671000810511535e-05, "loss": 0.2641, "step": 28820 }, { "epoch": 0.33, "learning_rate": 4.670772497402939e-05, "loss": 0.1762, "step": 28840 }, { "epoch": 0.33, "learning_rate": 4.670544184294341e-05, "loss": 0.1318, "step": 28860 }, { "epoch": 0.33, "learning_rate": 4.670315871185744e-05, "loss": 0.4563, "step": 28880 }, { "epoch": 0.33, "learning_rate": 4.670087558077148e-05, "loss": 0.1443, "step": 28900 }, { "epoch": 0.33, "learning_rate": 4.66985924496855e-05, "loss": 0.4151, "step": 28920 }, { "epoch": 0.33, "learning_rate": 4.669630931859953e-05, "loss": 0.0987, "step": 28940 }, { "epoch": 0.33, "learning_rate": 4.6694026187513553e-05, "loss": 0.2813, "step": 28960 }, { "epoch": 0.33, "learning_rate": 4.669174305642759e-05, "loss": 0.2942, "step": 28980 }, { "epoch": 0.33, "learning_rate": 4.668945992534162e-05, "loss": 0.2736, "step": 29000 }, { "epoch": 0.33, "learning_rate": 4.668717679425564e-05, "loss": 0.3536, "step": 29020 }, { "epoch": 0.33, "learning_rate": 4.668489366316967e-05, "loss": 0.1708, "step": 29040 }, { "epoch": 0.33, "learning_rate": 4.66826105320837e-05, "loss": 0.0983, "step": 29060 }, { "epoch": 0.33, "learning_rate": 4.668032740099773e-05, "loss": 0.1111, "step": 29080 }, { "epoch": 0.33, "learning_rate": 4.667804426991176e-05, "loss": 0.1593, "step": 29100 }, { "epoch": 0.33, "learning_rate": 4.6675761138825784e-05, "loss": 0.3594, "step": 29120 }, { "epoch": 0.33, "learning_rate": 4.667347800773982e-05, "loss": 0.0588, "step": 29140 }, { "epoch": 0.33, "learning_rate": 4.6671194876653843e-05, "loss": 0.2691, "step": 29160 }, { "epoch": 0.33, "learning_rate": 4.666891174556787e-05, "loss": 0.2426, "step": 29180 }, { "epoch": 0.33, "learning_rate": 4.66666286144819e-05, "loss": 0.2839, "step": 29200 }, { "epoch": 0.33, "learning_rate": 4.666434548339593e-05, "loss": 0.2011, "step": 29220 }, { "epoch": 0.33, "learning_rate": 4.666206235230996e-05, "loss": 0.3927, "step": 29240 }, { "epoch": 0.33, "learning_rate": 4.6659779221223985e-05, "loss": 0.1211, "step": 29260 }, { "epoch": 0.33, "learning_rate": 4.665749609013802e-05, "loss": 0.2132, "step": 29280 }, { "epoch": 0.33, "learning_rate": 4.6655212959052044e-05, "loss": 0.4192, "step": 29300 }, { "epoch": 0.33, "learning_rate": 4.6652929827966074e-05, "loss": 0.1242, "step": 29320 }, { "epoch": 0.33, "learning_rate": 4.6650646696880104e-05, "loss": 0.5932, "step": 29340 }, { "epoch": 0.34, "learning_rate": 4.6648363565794134e-05, "loss": 0.2562, "step": 29360 }, { "epoch": 0.34, "learning_rate": 4.664608043470816e-05, "loss": 0.2929, "step": 29380 }, { "epoch": 0.34, "learning_rate": 4.6643797303622186e-05, "loss": 0.1897, "step": 29400 }, { "epoch": 0.34, "learning_rate": 4.6641514172536216e-05, "loss": 0.1677, "step": 29420 }, { "epoch": 0.34, "learning_rate": 4.6639231041450245e-05, "loss": 0.3893, "step": 29440 }, { "epoch": 0.34, "learning_rate": 4.6636947910364275e-05, "loss": 0.3463, "step": 29460 }, { "epoch": 0.34, "learning_rate": 4.6634664779278305e-05, "loss": 0.1234, "step": 29480 }, { "epoch": 0.34, "learning_rate": 4.6632381648192335e-05, "loss": 0.3159, "step": 29500 }, { "epoch": 0.34, "learning_rate": 4.6630098517106364e-05, "loss": 0.3789, "step": 29520 }, { "epoch": 0.34, "learning_rate": 4.662781538602039e-05, "loss": 0.2058, "step": 29540 }, { "epoch": 0.34, "learning_rate": 4.662553225493442e-05, "loss": 0.4196, "step": 29560 }, { "epoch": 0.34, "learning_rate": 4.662324912384845e-05, "loss": 0.1748, "step": 29580 }, { "epoch": 0.34, "learning_rate": 4.6620965992762476e-05, "loss": 0.2978, "step": 29600 }, { "epoch": 0.34, "learning_rate": 4.6618682861676506e-05, "loss": 0.1976, "step": 29620 }, { "epoch": 0.34, "learning_rate": 4.661639973059053e-05, "loss": 0.3286, "step": 29640 }, { "epoch": 0.34, "learning_rate": 4.6614116599504565e-05, "loss": 0.2594, "step": 29660 }, { "epoch": 0.34, "learning_rate": 4.6611833468418595e-05, "loss": 0.1386, "step": 29680 }, { "epoch": 0.34, "learning_rate": 4.660955033733262e-05, "loss": 0.1713, "step": 29700 }, { "epoch": 0.34, "learning_rate": 4.660726720624665e-05, "loss": 0.1372, "step": 29720 }, { "epoch": 0.34, "learning_rate": 4.660498407516068e-05, "loss": 0.1507, "step": 29740 }, { "epoch": 0.34, "learning_rate": 4.660270094407471e-05, "loss": 0.2664, "step": 29760 }, { "epoch": 0.34, "learning_rate": 4.6600417812988737e-05, "loss": 0.2163, "step": 29780 }, { "epoch": 0.34, "learning_rate": 4.6598134681902766e-05, "loss": 0.1489, "step": 29800 }, { "epoch": 0.34, "learning_rate": 4.6595851550816796e-05, "loss": 0.4327, "step": 29820 }, { "epoch": 0.34, "learning_rate": 4.659356841973082e-05, "loss": 0.1179, "step": 29840 }, { "epoch": 0.34, "learning_rate": 4.659128528864485e-05, "loss": 0.1051, "step": 29860 }, { "epoch": 0.34, "learning_rate": 4.658900215755888e-05, "loss": 0.2803, "step": 29880 }, { "epoch": 0.34, "learning_rate": 4.658671902647291e-05, "loss": 0.2474, "step": 29900 }, { "epoch": 0.34, "learning_rate": 4.658443589538694e-05, "loss": 0.1657, "step": 29920 }, { "epoch": 0.34, "learning_rate": 4.658215276430096e-05, "loss": 0.5821, "step": 29940 }, { "epoch": 0.34, "learning_rate": 4.6579869633215e-05, "loss": 0.0996, "step": 29960 }, { "epoch": 0.34, "learning_rate": 4.657758650212902e-05, "loss": 0.3625, "step": 29980 }, { "epoch": 0.34, "learning_rate": 4.657530337104305e-05, "loss": 0.1946, "step": 30000 }, { "epoch": 0.34, "learning_rate": 4.657302023995708e-05, "loss": 0.3067, "step": 30020 }, { "epoch": 0.34, "learning_rate": 4.657073710887111e-05, "loss": 0.3612, "step": 30040 }, { "epoch": 0.34, "learning_rate": 4.656845397778514e-05, "loss": 0.1228, "step": 30060 }, { "epoch": 0.34, "learning_rate": 4.656617084669916e-05, "loss": 0.2039, "step": 30080 }, { "epoch": 0.34, "learning_rate": 4.656388771561319e-05, "loss": 0.3799, "step": 30100 }, { "epoch": 0.34, "learning_rate": 4.656160458452722e-05, "loss": 0.5414, "step": 30120 }, { "epoch": 0.34, "learning_rate": 4.655932145344125e-05, "loss": 0.3193, "step": 30140 }, { "epoch": 0.34, "learning_rate": 4.655703832235528e-05, "loss": 0.1599, "step": 30160 }, { "epoch": 0.34, "learning_rate": 4.655475519126931e-05, "loss": 0.1052, "step": 30180 }, { "epoch": 0.34, "learning_rate": 4.655247206018334e-05, "loss": 0.3321, "step": 30200 }, { "epoch": 0.34, "learning_rate": 4.655018892909736e-05, "loss": 0.2118, "step": 30220 }, { "epoch": 0.35, "learning_rate": 4.654790579801139e-05, "loss": 0.2282, "step": 30240 }, { "epoch": 0.35, "learning_rate": 4.654562266692543e-05, "loss": 0.0858, "step": 30260 }, { "epoch": 0.35, "learning_rate": 4.654333953583945e-05, "loss": 0.096, "step": 30280 }, { "epoch": 0.35, "learning_rate": 4.654105640475348e-05, "loss": 0.2978, "step": 30300 }, { "epoch": 0.35, "learning_rate": 4.653877327366751e-05, "loss": 0.3638, "step": 30320 }, { "epoch": 0.35, "learning_rate": 4.653649014258154e-05, "loss": 0.3792, "step": 30340 }, { "epoch": 0.35, "learning_rate": 4.653420701149557e-05, "loss": 0.0751, "step": 30360 }, { "epoch": 0.35, "learning_rate": 4.653192388040959e-05, "loss": 0.1537, "step": 30380 }, { "epoch": 0.35, "learning_rate": 4.652964074932362e-05, "loss": 0.1935, "step": 30400 }, { "epoch": 0.35, "learning_rate": 4.652735761823765e-05, "loss": 0.4257, "step": 30420 }, { "epoch": 0.35, "learning_rate": 4.652507448715168e-05, "loss": 0.1746, "step": 30440 }, { "epoch": 0.35, "learning_rate": 4.652279135606571e-05, "loss": 0.1823, "step": 30460 }, { "epoch": 0.35, "learning_rate": 4.652050822497974e-05, "loss": 0.1194, "step": 30480 }, { "epoch": 0.35, "learning_rate": 4.651822509389377e-05, "loss": 0.2367, "step": 30500 }, { "epoch": 0.35, "learning_rate": 4.6515941962807794e-05, "loss": 0.3, "step": 30520 }, { "epoch": 0.35, "learning_rate": 4.6513658831721824e-05, "loss": 0.1938, "step": 30540 }, { "epoch": 0.35, "learning_rate": 4.6511375700635854e-05, "loss": 0.0735, "step": 30560 }, { "epoch": 0.35, "learning_rate": 4.650909256954988e-05, "loss": 0.2118, "step": 30580 }, { "epoch": 0.35, "learning_rate": 4.650680943846391e-05, "loss": 0.1454, "step": 30600 }, { "epoch": 0.35, "learning_rate": 4.6504526307377936e-05, "loss": 0.136, "step": 30620 }, { "epoch": 0.35, "learning_rate": 4.650224317629197e-05, "loss": 0.1202, "step": 30640 }, { "epoch": 0.35, "learning_rate": 4.6499960045205995e-05, "loss": 0.3388, "step": 30660 }, { "epoch": 0.35, "learning_rate": 4.6497676914120025e-05, "loss": 0.327, "step": 30680 }, { "epoch": 0.35, "learning_rate": 4.6495393783034055e-05, "loss": 0.2117, "step": 30700 }, { "epoch": 0.35, "learning_rate": 4.6493110651948084e-05, "loss": 0.265, "step": 30720 }, { "epoch": 0.35, "learning_rate": 4.6490827520862114e-05, "loss": 0.2941, "step": 30740 }, { "epoch": 0.35, "learning_rate": 4.648854438977614e-05, "loss": 0.1586, "step": 30760 }, { "epoch": 0.35, "learning_rate": 4.648626125869017e-05, "loss": 0.2393, "step": 30780 }, { "epoch": 0.35, "learning_rate": 4.64839781276042e-05, "loss": 0.5313, "step": 30800 }, { "epoch": 0.35, "learning_rate": 4.6481694996518226e-05, "loss": 0.3646, "step": 30820 }, { "epoch": 0.35, "learning_rate": 4.6479411865432256e-05, "loss": 0.4968, "step": 30840 }, { "epoch": 0.35, "learning_rate": 4.6477128734346285e-05, "loss": 0.2266, "step": 30860 }, { "epoch": 0.35, "learning_rate": 4.6474845603260315e-05, "loss": 0.1091, "step": 30880 }, { "epoch": 0.35, "learning_rate": 4.6472562472174345e-05, "loss": 0.083, "step": 30900 }, { "epoch": 0.35, "learning_rate": 4.647027934108837e-05, "loss": 0.2194, "step": 30920 }, { "epoch": 0.35, "learning_rate": 4.6467996210002404e-05, "loss": 0.242, "step": 30940 }, { "epoch": 0.35, "learning_rate": 4.646571307891643e-05, "loss": 0.1988, "step": 30960 }, { "epoch": 0.35, "learning_rate": 4.6463429947830457e-05, "loss": 0.3715, "step": 30980 }, { "epoch": 0.35, "learning_rate": 4.6461146816744486e-05, "loss": 0.2221, "step": 31000 }, { "epoch": 0.35, "learning_rate": 4.6458863685658516e-05, "loss": 0.5447, "step": 31020 }, { "epoch": 0.35, "learning_rate": 4.6456580554572546e-05, "loss": 0.0997, "step": 31040 }, { "epoch": 0.35, "learning_rate": 4.645429742348657e-05, "loss": 0.1563, "step": 31060 }, { "epoch": 0.35, "learning_rate": 4.6452014292400605e-05, "loss": 0.2131, "step": 31080 }, { "epoch": 0.36, "learning_rate": 4.644973116131463e-05, "loss": 0.4158, "step": 31100 }, { "epoch": 0.36, "learning_rate": 4.644744803022866e-05, "loss": 0.1955, "step": 31120 }, { "epoch": 0.36, "learning_rate": 4.644516489914269e-05, "loss": 0.1324, "step": 31140 }, { "epoch": 0.36, "learning_rate": 4.644288176805672e-05, "loss": 0.1309, "step": 31160 }, { "epoch": 0.36, "learning_rate": 4.6440598636970747e-05, "loss": 0.0786, "step": 31180 }, { "epoch": 0.36, "learning_rate": 4.643831550588477e-05, "loss": 0.454, "step": 31200 }, { "epoch": 0.36, "learning_rate": 4.64360323747988e-05, "loss": 0.2296, "step": 31220 }, { "epoch": 0.36, "learning_rate": 4.643374924371283e-05, "loss": 0.1237, "step": 31240 }, { "epoch": 0.36, "learning_rate": 4.643146611262686e-05, "loss": 0.2864, "step": 31260 }, { "epoch": 0.36, "learning_rate": 4.642918298154089e-05, "loss": 0.1541, "step": 31280 }, { "epoch": 0.36, "learning_rate": 4.642689985045491e-05, "loss": 0.5016, "step": 31300 }, { "epoch": 0.36, "learning_rate": 4.642461671936895e-05, "loss": 0.1055, "step": 31320 }, { "epoch": 0.36, "learning_rate": 4.642233358828297e-05, "loss": 0.4413, "step": 31340 }, { "epoch": 0.36, "learning_rate": 4.6420050457197e-05, "loss": 0.2971, "step": 31360 }, { "epoch": 0.36, "learning_rate": 4.641776732611104e-05, "loss": 0.2702, "step": 31380 }, { "epoch": 0.36, "learning_rate": 4.641548419502506e-05, "loss": 0.1892, "step": 31400 }, { "epoch": 0.36, "learning_rate": 4.641320106393909e-05, "loss": 0.0558, "step": 31420 }, { "epoch": 0.36, "learning_rate": 4.641091793285311e-05, "loss": 0.2745, "step": 31440 }, { "epoch": 0.36, "learning_rate": 4.640863480176715e-05, "loss": 0.1577, "step": 31460 }, { "epoch": 0.36, "learning_rate": 4.640635167068118e-05, "loss": 0.4418, "step": 31480 }, { "epoch": 0.36, "learning_rate": 4.64040685395952e-05, "loss": 0.2495, "step": 31500 }, { "epoch": 0.36, "learning_rate": 4.640178540850923e-05, "loss": 0.2185, "step": 31520 }, { "epoch": 0.36, "learning_rate": 4.639950227742326e-05, "loss": 0.1712, "step": 31540 }, { "epoch": 0.36, "learning_rate": 4.639721914633729e-05, "loss": 0.0718, "step": 31560 }, { "epoch": 0.36, "learning_rate": 4.639493601525132e-05, "loss": 0.7654, "step": 31580 }, { "epoch": 0.36, "learning_rate": 4.639265288416534e-05, "loss": 0.2044, "step": 31600 }, { "epoch": 0.36, "learning_rate": 4.639036975307938e-05, "loss": 0.1581, "step": 31620 }, { "epoch": 0.36, "learning_rate": 4.63880866219934e-05, "loss": 0.1769, "step": 31640 }, { "epoch": 0.36, "learning_rate": 4.638580349090743e-05, "loss": 0.426, "step": 31660 }, { "epoch": 0.36, "learning_rate": 4.638352035982146e-05, "loss": 0.2866, "step": 31680 }, { "epoch": 0.36, "learning_rate": 4.638123722873549e-05, "loss": 0.0812, "step": 31700 }, { "epoch": 0.36, "learning_rate": 4.637895409764952e-05, "loss": 0.2673, "step": 31720 }, { "epoch": 0.36, "learning_rate": 4.6376670966563544e-05, "loss": 0.3665, "step": 31740 }, { "epoch": 0.36, "learning_rate": 4.637438783547758e-05, "loss": 0.1277, "step": 31760 }, { "epoch": 0.36, "learning_rate": 4.63721047043916e-05, "loss": 0.2, "step": 31780 }, { "epoch": 0.36, "learning_rate": 4.636982157330563e-05, "loss": 0.1955, "step": 31800 }, { "epoch": 0.36, "learning_rate": 4.636753844221966e-05, "loss": 0.2739, "step": 31820 }, { "epoch": 0.36, "learning_rate": 4.636525531113369e-05, "loss": 0.166, "step": 31840 }, { "epoch": 0.36, "learning_rate": 4.636297218004772e-05, "loss": 0.2995, "step": 31860 }, { "epoch": 0.36, "learning_rate": 4.6360689048961745e-05, "loss": 0.1336, "step": 31880 }, { "epoch": 0.36, "learning_rate": 4.6358405917875775e-05, "loss": 0.3258, "step": 31900 }, { "epoch": 0.36, "learning_rate": 4.6356122786789804e-05, "loss": 0.1746, "step": 31920 }, { "epoch": 0.36, "learning_rate": 4.6353839655703834e-05, "loss": 0.2577, "step": 31940 }, { "epoch": 0.36, "learning_rate": 4.6351556524617864e-05, "loss": 0.3848, "step": 31960 }, { "epoch": 0.37, "learning_rate": 4.634927339353189e-05, "loss": 0.3368, "step": 31980 }, { "epoch": 0.37, "learning_rate": 4.634699026244592e-05, "loss": 0.2798, "step": 32000 }, { "epoch": 0.37, "learning_rate": 4.6344707131359946e-05, "loss": 0.4976, "step": 32020 }, { "epoch": 0.37, "learning_rate": 4.6342424000273976e-05, "loss": 0.074, "step": 32040 }, { "epoch": 0.37, "learning_rate": 4.634014086918801e-05, "loss": 0.2116, "step": 32060 }, { "epoch": 0.37, "learning_rate": 4.6337857738102035e-05, "loss": 0.1955, "step": 32080 }, { "epoch": 0.37, "learning_rate": 4.6335574607016065e-05, "loss": 0.1408, "step": 32100 }, { "epoch": 0.37, "learning_rate": 4.633329147593009e-05, "loss": 0.1389, "step": 32120 }, { "epoch": 0.37, "learning_rate": 4.6331008344844124e-05, "loss": 0.2763, "step": 32140 }, { "epoch": 0.37, "learning_rate": 4.6328725213758154e-05, "loss": 0.2871, "step": 32160 }, { "epoch": 0.37, "learning_rate": 4.6326442082672177e-05, "loss": 0.2793, "step": 32180 }, { "epoch": 0.37, "learning_rate": 4.6324158951586206e-05, "loss": 0.2231, "step": 32200 }, { "epoch": 0.37, "learning_rate": 4.6321875820500236e-05, "loss": 0.1346, "step": 32220 }, { "epoch": 0.37, "learning_rate": 4.6319592689414266e-05, "loss": 0.2997, "step": 32240 }, { "epoch": 0.37, "learning_rate": 4.6317309558328295e-05, "loss": 0.3081, "step": 32260 }, { "epoch": 0.37, "learning_rate": 4.6315026427242325e-05, "loss": 0.4599, "step": 32280 }, { "epoch": 0.37, "learning_rate": 4.6312743296156355e-05, "loss": 0.1583, "step": 32300 }, { "epoch": 0.37, "learning_rate": 4.631046016507038e-05, "loss": 0.2517, "step": 32320 }, { "epoch": 0.37, "learning_rate": 4.630817703398441e-05, "loss": 0.4374, "step": 32340 }, { "epoch": 0.37, "learning_rate": 4.630589390289844e-05, "loss": 0.1689, "step": 32360 }, { "epoch": 0.37, "learning_rate": 4.6303610771812467e-05, "loss": 0.5209, "step": 32380 }, { "epoch": 0.37, "learning_rate": 4.6301327640726496e-05, "loss": 0.2711, "step": 32400 }, { "epoch": 0.37, "learning_rate": 4.629904450964052e-05, "loss": 0.2297, "step": 32420 }, { "epoch": 0.37, "learning_rate": 4.6296761378554556e-05, "loss": 0.123, "step": 32440 }, { "epoch": 0.37, "learning_rate": 4.629447824746858e-05, "loss": 0.1853, "step": 32460 }, { "epoch": 0.37, "learning_rate": 4.629219511638261e-05, "loss": 0.1458, "step": 32480 }, { "epoch": 0.37, "learning_rate": 4.628991198529664e-05, "loss": 0.1781, "step": 32500 }, { "epoch": 0.37, "learning_rate": 4.628762885421067e-05, "loss": 0.247, "step": 32520 }, { "epoch": 0.37, "learning_rate": 4.62853457231247e-05, "loss": 0.2012, "step": 32540 }, { "epoch": 0.37, "learning_rate": 4.628306259203872e-05, "loss": 0.3325, "step": 32560 }, { "epoch": 0.37, "learning_rate": 4.628077946095275e-05, "loss": 0.2574, "step": 32580 }, { "epoch": 0.37, "learning_rate": 4.627849632986678e-05, "loss": 0.1398, "step": 32600 }, { "epoch": 0.37, "learning_rate": 4.627621319878081e-05, "loss": 0.193, "step": 32620 }, { "epoch": 0.37, "learning_rate": 4.627393006769484e-05, "loss": 0.1477, "step": 32640 }, { "epoch": 0.37, "learning_rate": 4.627164693660887e-05, "loss": 0.3111, "step": 32660 }, { "epoch": 0.37, "learning_rate": 4.62693638055229e-05, "loss": 0.2367, "step": 32680 }, { "epoch": 0.37, "learning_rate": 4.626708067443692e-05, "loss": 0.1922, "step": 32700 }, { "epoch": 0.37, "learning_rate": 4.626479754335095e-05, "loss": 0.2759, "step": 32720 }, { "epoch": 0.37, "learning_rate": 4.626251441226499e-05, "loss": 0.2646, "step": 32740 }, { "epoch": 0.37, "learning_rate": 4.626023128117901e-05, "loss": 0.1716, "step": 32760 }, { "epoch": 0.37, "learning_rate": 4.625794815009304e-05, "loss": 0.174, "step": 32780 }, { "epoch": 0.37, "learning_rate": 4.625566501900706e-05, "loss": 0.1871, "step": 32800 }, { "epoch": 0.37, "learning_rate": 4.62533818879211e-05, "loss": 0.189, "step": 32820 }, { "epoch": 0.37, "learning_rate": 4.625109875683513e-05, "loss": 0.3022, "step": 32840 }, { "epoch": 0.38, "learning_rate": 4.624881562574915e-05, "loss": 0.1949, "step": 32860 }, { "epoch": 0.38, "learning_rate": 4.624653249466318e-05, "loss": 0.3119, "step": 32880 }, { "epoch": 0.38, "learning_rate": 4.624424936357721e-05, "loss": 0.1647, "step": 32900 }, { "epoch": 0.38, "learning_rate": 4.624196623249124e-05, "loss": 0.3745, "step": 32920 }, { "epoch": 0.38, "learning_rate": 4.623968310140527e-05, "loss": 0.0472, "step": 32940 }, { "epoch": 0.38, "learning_rate": 4.62373999703193e-05, "loss": 0.2171, "step": 32960 }, { "epoch": 0.38, "learning_rate": 4.623511683923333e-05, "loss": 0.519, "step": 32980 }, { "epoch": 0.38, "learning_rate": 4.623283370814735e-05, "loss": 0.2835, "step": 33000 }, { "epoch": 0.38, "learning_rate": 4.623055057706138e-05, "loss": 0.2744, "step": 33020 }, { "epoch": 0.38, "learning_rate": 4.622826744597541e-05, "loss": 0.1532, "step": 33040 }, { "epoch": 0.38, "learning_rate": 4.622598431488944e-05, "loss": 0.2194, "step": 33060 }, { "epoch": 0.38, "learning_rate": 4.622370118380347e-05, "loss": 0.1865, "step": 33080 }, { "epoch": 0.38, "learning_rate": 4.6221418052717494e-05, "loss": 0.3072, "step": 33100 }, { "epoch": 0.38, "learning_rate": 4.621913492163153e-05, "loss": 0.3143, "step": 33120 }, { "epoch": 0.38, "learning_rate": 4.6216851790545554e-05, "loss": 0.4519, "step": 33140 }, { "epoch": 0.38, "learning_rate": 4.6214568659459584e-05, "loss": 0.2289, "step": 33160 }, { "epoch": 0.38, "learning_rate": 4.621228552837361e-05, "loss": 0.203, "step": 33180 }, { "epoch": 0.38, "learning_rate": 4.621000239728764e-05, "loss": 0.2359, "step": 33200 }, { "epoch": 0.38, "learning_rate": 4.620771926620167e-05, "loss": 0.1554, "step": 33220 }, { "epoch": 0.38, "learning_rate": 4.6205436135115695e-05, "loss": 0.2007, "step": 33240 }, { "epoch": 0.38, "learning_rate": 4.620315300402973e-05, "loss": 0.1452, "step": 33260 }, { "epoch": 0.38, "learning_rate": 4.620086987294376e-05, "loss": 0.2908, "step": 33280 }, { "epoch": 0.38, "learning_rate": 4.6198586741857785e-05, "loss": 0.068, "step": 33300 }, { "epoch": 0.38, "learning_rate": 4.6196303610771814e-05, "loss": 0.246, "step": 33320 }, { "epoch": 0.38, "learning_rate": 4.6194020479685844e-05, "loss": 0.4249, "step": 33340 }, { "epoch": 0.38, "learning_rate": 4.6191737348599874e-05, "loss": 0.1977, "step": 33360 }, { "epoch": 0.38, "learning_rate": 4.61894542175139e-05, "loss": 0.2811, "step": 33380 }, { "epoch": 0.38, "learning_rate": 4.6187171086427926e-05, "loss": 0.4903, "step": 33400 }, { "epoch": 0.38, "learning_rate": 4.618488795534196e-05, "loss": 0.299, "step": 33420 }, { "epoch": 0.38, "learning_rate": 4.6182604824255986e-05, "loss": 0.1881, "step": 33440 }, { "epoch": 0.38, "learning_rate": 4.6180321693170015e-05, "loss": 0.2519, "step": 33460 }, { "epoch": 0.38, "learning_rate": 4.6178038562084045e-05, "loss": 0.1152, "step": 33480 }, { "epoch": 0.38, "learning_rate": 4.6175755430998075e-05, "loss": 0.3352, "step": 33500 }, { "epoch": 0.38, "learning_rate": 4.6173472299912104e-05, "loss": 0.5213, "step": 33520 }, { "epoch": 0.38, "learning_rate": 4.617118916882613e-05, "loss": 0.2125, "step": 33540 }, { "epoch": 0.38, "learning_rate": 4.6168906037740164e-05, "loss": 0.0847, "step": 33560 }, { "epoch": 0.38, "learning_rate": 4.6166622906654187e-05, "loss": 0.3868, "step": 33580 }, { "epoch": 0.38, "learning_rate": 4.6164339775568216e-05, "loss": 0.0104, "step": 33600 }, { "epoch": 0.38, "learning_rate": 4.6162056644482246e-05, "loss": 0.1436, "step": 33620 }, { "epoch": 0.38, "learning_rate": 4.6159773513396276e-05, "loss": 0.1521, "step": 33640 }, { "epoch": 0.38, "learning_rate": 4.6157490382310305e-05, "loss": 0.1074, "step": 33660 }, { "epoch": 0.38, "learning_rate": 4.615520725122433e-05, "loss": 0.2926, "step": 33680 }, { "epoch": 0.38, "learning_rate": 4.615292412013836e-05, "loss": 0.1324, "step": 33700 }, { "epoch": 0.38, "learning_rate": 4.615064098905239e-05, "loss": 0.1029, "step": 33720 }, { "epoch": 0.39, "learning_rate": 4.614835785796642e-05, "loss": 0.2498, "step": 33740 }, { "epoch": 0.39, "learning_rate": 4.614607472688045e-05, "loss": 0.1711, "step": 33760 }, { "epoch": 0.39, "learning_rate": 4.614379159579447e-05, "loss": 0.1433, "step": 33780 }, { "epoch": 0.39, "learning_rate": 4.6141508464708506e-05, "loss": 0.2642, "step": 33800 }, { "epoch": 0.39, "learning_rate": 4.613922533362253e-05, "loss": 0.1436, "step": 33820 }, { "epoch": 0.39, "learning_rate": 4.613694220253656e-05, "loss": 0.2416, "step": 33840 }, { "epoch": 0.39, "learning_rate": 4.6134659071450595e-05, "loss": 0.3713, "step": 33860 }, { "epoch": 0.39, "learning_rate": 4.613237594036462e-05, "loss": 0.2843, "step": 33880 }, { "epoch": 0.39, "learning_rate": 4.613009280927865e-05, "loss": 0.3357, "step": 33900 }, { "epoch": 0.39, "learning_rate": 4.612780967819267e-05, "loss": 0.35, "step": 33920 }, { "epoch": 0.39, "learning_rate": 4.612552654710671e-05, "loss": 0.1908, "step": 33940 }, { "epoch": 0.39, "learning_rate": 4.612324341602074e-05, "loss": 0.2394, "step": 33960 }, { "epoch": 0.39, "learning_rate": 4.612096028493476e-05, "loss": 0.1836, "step": 33980 }, { "epoch": 0.39, "learning_rate": 4.611867715384879e-05, "loss": 0.1045, "step": 34000 }, { "epoch": 0.39, "learning_rate": 4.611639402276282e-05, "loss": 0.4418, "step": 34020 }, { "epoch": 0.39, "learning_rate": 4.611411089167685e-05, "loss": 0.1294, "step": 34040 }, { "epoch": 0.39, "learning_rate": 4.611182776059088e-05, "loss": 0.2416, "step": 34060 }, { "epoch": 0.39, "learning_rate": 4.61095446295049e-05, "loss": 0.6843, "step": 34080 }, { "epoch": 0.39, "learning_rate": 4.610726149841894e-05, "loss": 0.2353, "step": 34100 }, { "epoch": 0.39, "learning_rate": 4.610497836733296e-05, "loss": 0.1601, "step": 34120 }, { "epoch": 0.39, "learning_rate": 4.610269523624699e-05, "loss": 0.3106, "step": 34140 }, { "epoch": 0.39, "learning_rate": 4.610041210516102e-05, "loss": 0.3056, "step": 34160 }, { "epoch": 0.39, "learning_rate": 4.609812897407505e-05, "loss": 0.1997, "step": 34180 }, { "epoch": 0.39, "learning_rate": 4.609584584298908e-05, "loss": 0.1983, "step": 34200 }, { "epoch": 0.39, "learning_rate": 4.60935627119031e-05, "loss": 0.2345, "step": 34220 }, { "epoch": 0.39, "learning_rate": 4.609127958081714e-05, "loss": 0.2266, "step": 34240 }, { "epoch": 0.39, "learning_rate": 4.608899644973116e-05, "loss": 0.4069, "step": 34260 }, { "epoch": 0.39, "learning_rate": 4.608671331864519e-05, "loss": 0.2385, "step": 34280 }, { "epoch": 0.39, "learning_rate": 4.608443018755922e-05, "loss": 0.2375, "step": 34300 }, { "epoch": 0.39, "learning_rate": 4.608214705647325e-05, "loss": 0.4143, "step": 34320 }, { "epoch": 0.39, "learning_rate": 4.607986392538728e-05, "loss": 0.3059, "step": 34340 }, { "epoch": 0.39, "learning_rate": 4.6077580794301304e-05, "loss": 0.1864, "step": 34360 }, { "epoch": 0.39, "learning_rate": 4.607529766321533e-05, "loss": 0.1022, "step": 34380 }, { "epoch": 0.39, "learning_rate": 4.607301453212936e-05, "loss": 0.4841, "step": 34400 }, { "epoch": 0.39, "learning_rate": 4.607073140104339e-05, "loss": 0.1536, "step": 34420 }, { "epoch": 0.39, "learning_rate": 4.606844826995742e-05, "loss": 0.1452, "step": 34440 }, { "epoch": 0.39, "learning_rate": 4.606616513887145e-05, "loss": 0.2281, "step": 34460 }, { "epoch": 0.39, "learning_rate": 4.606388200778548e-05, "loss": 0.1677, "step": 34480 }, { "epoch": 0.39, "learning_rate": 4.6061598876699505e-05, "loss": 0.1321, "step": 34500 }, { "epoch": 0.39, "learning_rate": 4.6059315745613534e-05, "loss": 0.3636, "step": 34520 }, { "epoch": 0.39, "learning_rate": 4.605703261452757e-05, "loss": 0.1932, "step": 34540 }, { "epoch": 0.39, "learning_rate": 4.6054749483441594e-05, "loss": 0.3178, "step": 34560 }, { "epoch": 0.39, "learning_rate": 4.605246635235562e-05, "loss": 0.3016, "step": 34580 }, { "epoch": 0.39, "learning_rate": 4.6050183221269646e-05, "loss": 0.1589, "step": 34600 }, { "epoch": 0.4, "learning_rate": 4.604790009018368e-05, "loss": 0.1691, "step": 34620 }, { "epoch": 0.4, "learning_rate": 4.604561695909771e-05, "loss": 0.2003, "step": 34640 }, { "epoch": 0.4, "learning_rate": 4.6043333828011735e-05, "loss": 0.2044, "step": 34660 }, { "epoch": 0.4, "learning_rate": 4.6041050696925765e-05, "loss": 0.1235, "step": 34680 }, { "epoch": 0.4, "learning_rate": 4.6038767565839795e-05, "loss": 0.1754, "step": 34700 }, { "epoch": 0.4, "learning_rate": 4.6036484434753824e-05, "loss": 0.1116, "step": 34720 }, { "epoch": 0.4, "learning_rate": 4.6034201303667854e-05, "loss": 0.072, "step": 34740 }, { "epoch": 0.4, "learning_rate": 4.603191817258188e-05, "loss": 0.3975, "step": 34760 }, { "epoch": 0.4, "learning_rate": 4.602963504149591e-05, "loss": 0.2381, "step": 34780 }, { "epoch": 0.4, "learning_rate": 4.6027351910409936e-05, "loss": 0.3387, "step": 34800 }, { "epoch": 0.4, "learning_rate": 4.6025068779323966e-05, "loss": 0.3822, "step": 34820 }, { "epoch": 0.4, "learning_rate": 4.6022785648237996e-05, "loss": 0.1306, "step": 34840 }, { "epoch": 0.4, "learning_rate": 4.6020502517152025e-05, "loss": 0.1674, "step": 34860 }, { "epoch": 0.4, "learning_rate": 4.6018219386066055e-05, "loss": 0.0714, "step": 34880 }, { "epoch": 0.4, "learning_rate": 4.601593625498008e-05, "loss": 0.1543, "step": 34900 }, { "epoch": 0.4, "learning_rate": 4.6013653123894114e-05, "loss": 0.4282, "step": 34920 }, { "epoch": 0.4, "learning_rate": 4.601136999280814e-05, "loss": 0.1022, "step": 34940 }, { "epoch": 0.4, "learning_rate": 4.600908686172217e-05, "loss": 0.1782, "step": 34960 }, { "epoch": 0.4, "learning_rate": 4.6006803730636197e-05, "loss": 0.2705, "step": 34980 }, { "epoch": 0.4, "learning_rate": 4.6004520599550226e-05, "loss": 0.1281, "step": 35000 }, { "epoch": 0.4, "learning_rate": 4.6002237468464256e-05, "loss": 0.1733, "step": 35020 }, { "epoch": 0.4, "learning_rate": 4.599995433737828e-05, "loss": 0.3922, "step": 35040 }, { "epoch": 0.4, "learning_rate": 4.599767120629231e-05, "loss": 0.2519, "step": 35060 }, { "epoch": 0.4, "learning_rate": 4.599538807520634e-05, "loss": 0.4297, "step": 35080 }, { "epoch": 0.4, "learning_rate": 4.599310494412037e-05, "loss": 0.2353, "step": 35100 }, { "epoch": 0.4, "learning_rate": 4.59908218130344e-05, "loss": 0.2869, "step": 35120 }, { "epoch": 0.4, "learning_rate": 4.598853868194843e-05, "loss": 0.0887, "step": 35140 }, { "epoch": 0.4, "learning_rate": 4.598625555086246e-05, "loss": 0.1532, "step": 35160 }, { "epoch": 0.4, "learning_rate": 4.598397241977648e-05, "loss": 0.2057, "step": 35180 }, { "epoch": 0.4, "learning_rate": 4.598168928869051e-05, "loss": 0.2462, "step": 35200 }, { "epoch": 0.4, "learning_rate": 4.5979406157604546e-05, "loss": 0.5542, "step": 35220 }, { "epoch": 0.4, "learning_rate": 4.597712302651857e-05, "loss": 0.3224, "step": 35240 }, { "epoch": 0.4, "learning_rate": 4.59748398954326e-05, "loss": 0.1067, "step": 35260 }, { "epoch": 0.4, "learning_rate": 4.597255676434662e-05, "loss": 0.2913, "step": 35280 }, { "epoch": 0.4, "learning_rate": 4.597027363326066e-05, "loss": 0.3221, "step": 35300 }, { "epoch": 0.4, "learning_rate": 4.596799050217469e-05, "loss": 0.2779, "step": 35320 }, { "epoch": 0.4, "learning_rate": 4.596570737108871e-05, "loss": 0.2439, "step": 35340 }, { "epoch": 0.4, "learning_rate": 4.596342424000274e-05, "loss": 0.3098, "step": 35360 }, { "epoch": 0.4, "learning_rate": 4.596114110891677e-05, "loss": 0.297, "step": 35380 }, { "epoch": 0.4, "learning_rate": 4.59588579778308e-05, "loss": 0.2711, "step": 35400 }, { "epoch": 0.4, "learning_rate": 4.595657484674483e-05, "loss": 0.2143, "step": 35420 }, { "epoch": 0.4, "learning_rate": 4.595429171565886e-05, "loss": 0.3258, "step": 35440 }, { "epoch": 0.4, "learning_rate": 4.595200858457289e-05, "loss": 0.1164, "step": 35460 }, { "epoch": 0.41, "learning_rate": 4.594972545348691e-05, "loss": 0.2303, "step": 35480 }, { "epoch": 0.41, "learning_rate": 4.594744232240094e-05, "loss": 0.3258, "step": 35500 }, { "epoch": 0.41, "learning_rate": 4.594515919131497e-05, "loss": 0.1904, "step": 35520 }, { "epoch": 0.41, "learning_rate": 4.5942876060229e-05, "loss": 0.564, "step": 35540 }, { "epoch": 0.41, "learning_rate": 4.594059292914303e-05, "loss": 0.2149, "step": 35560 }, { "epoch": 0.41, "learning_rate": 4.593830979805705e-05, "loss": 0.062, "step": 35580 }, { "epoch": 0.41, "learning_rate": 4.593602666697109e-05, "loss": 0.3727, "step": 35600 }, { "epoch": 0.41, "learning_rate": 4.593374353588511e-05, "loss": 0.5962, "step": 35620 }, { "epoch": 0.41, "learning_rate": 4.593146040479914e-05, "loss": 0.2801, "step": 35640 }, { "epoch": 0.41, "learning_rate": 4.592917727371317e-05, "loss": 0.3875, "step": 35660 }, { "epoch": 0.41, "learning_rate": 4.59268941426272e-05, "loss": 0.1384, "step": 35680 }, { "epoch": 0.41, "learning_rate": 4.592461101154123e-05, "loss": 0.1855, "step": 35700 }, { "epoch": 0.41, "learning_rate": 4.5922327880455254e-05, "loss": 0.145, "step": 35720 }, { "epoch": 0.41, "learning_rate": 4.592004474936929e-05, "loss": 0.1589, "step": 35740 }, { "epoch": 0.41, "learning_rate": 4.591776161828332e-05, "loss": 0.1102, "step": 35760 }, { "epoch": 0.41, "learning_rate": 4.591547848719734e-05, "loss": 0.0691, "step": 35780 }, { "epoch": 0.41, "learning_rate": 4.591319535611137e-05, "loss": 0.6618, "step": 35800 }, { "epoch": 0.41, "learning_rate": 4.59109122250254e-05, "loss": 0.1807, "step": 35820 }, { "epoch": 0.41, "learning_rate": 4.590862909393943e-05, "loss": 0.1674, "step": 35840 }, { "epoch": 0.41, "learning_rate": 4.590634596285346e-05, "loss": 0.131, "step": 35860 }, { "epoch": 0.41, "learning_rate": 4.5904062831767485e-05, "loss": 0.1176, "step": 35880 }, { "epoch": 0.41, "learning_rate": 4.590177970068152e-05, "loss": 0.1357, "step": 35900 }, { "epoch": 0.41, "learning_rate": 4.5899496569595544e-05, "loss": 0.1106, "step": 35920 }, { "epoch": 0.41, "learning_rate": 4.5897213438509574e-05, "loss": 0.2364, "step": 35940 }, { "epoch": 0.41, "learning_rate": 4.5894930307423604e-05, "loss": 0.1095, "step": 35960 }, { "epoch": 0.41, "learning_rate": 4.589264717633763e-05, "loss": 0.3297, "step": 35980 }, { "epoch": 0.41, "learning_rate": 4.589036404525166e-05, "loss": 0.2222, "step": 36000 }, { "epoch": 0.41, "learning_rate": 4.5888080914165686e-05, "loss": 0.425, "step": 36020 }, { "epoch": 0.41, "learning_rate": 4.588579778307972e-05, "loss": 0.5193, "step": 36040 }, { "epoch": 0.41, "learning_rate": 4.5883514651993745e-05, "loss": 0.1997, "step": 36060 }, { "epoch": 0.41, "learning_rate": 4.5881231520907775e-05, "loss": 0.101, "step": 36080 }, { "epoch": 0.41, "learning_rate": 4.5878948389821805e-05, "loss": 0.1822, "step": 36100 }, { "epoch": 0.41, "learning_rate": 4.5876665258735834e-05, "loss": 0.5409, "step": 36120 }, { "epoch": 0.41, "learning_rate": 4.5874382127649864e-05, "loss": 0.2289, "step": 36140 }, { "epoch": 0.41, "learning_rate": 4.587209899656389e-05, "loss": 0.1707, "step": 36160 }, { "epoch": 0.41, "learning_rate": 4.5869815865477917e-05, "loss": 0.1504, "step": 36180 }, { "epoch": 0.41, "learning_rate": 4.5867532734391946e-05, "loss": 0.0976, "step": 36200 }, { "epoch": 0.41, "learning_rate": 4.5865249603305976e-05, "loss": 0.231, "step": 36220 }, { "epoch": 0.41, "learning_rate": 4.5862966472220006e-05, "loss": 0.262, "step": 36240 }, { "epoch": 0.41, "learning_rate": 4.586068334113403e-05, "loss": 0.1034, "step": 36260 }, { "epoch": 0.41, "learning_rate": 4.5858400210048065e-05, "loss": 0.2486, "step": 36280 }, { "epoch": 0.41, "learning_rate": 4.585611707896209e-05, "loss": 0.2643, "step": 36300 }, { "epoch": 0.41, "learning_rate": 4.585383394787612e-05, "loss": 0.1244, "step": 36320 }, { "epoch": 0.41, "learning_rate": 4.5851550816790154e-05, "loss": 0.1455, "step": 36340 }, { "epoch": 0.42, "learning_rate": 4.584926768570418e-05, "loss": 0.1791, "step": 36360 }, { "epoch": 0.42, "learning_rate": 4.584698455461821e-05, "loss": 0.1839, "step": 36380 }, { "epoch": 0.42, "learning_rate": 4.584470142353223e-05, "loss": 0.1214, "step": 36400 }, { "epoch": 0.42, "learning_rate": 4.5842418292446266e-05, "loss": 0.2004, "step": 36420 }, { "epoch": 0.42, "learning_rate": 4.5840135161360296e-05, "loss": 0.1648, "step": 36440 }, { "epoch": 0.42, "learning_rate": 4.583785203027432e-05, "loss": 0.187, "step": 36460 }, { "epoch": 0.42, "learning_rate": 4.583556889918835e-05, "loss": 0.3668, "step": 36480 }, { "epoch": 0.42, "learning_rate": 4.583328576810238e-05, "loss": 0.1112, "step": 36500 }, { "epoch": 0.42, "learning_rate": 4.583100263701641e-05, "loss": 0.4665, "step": 36520 }, { "epoch": 0.42, "learning_rate": 4.582871950593044e-05, "loss": 0.2466, "step": 36540 }, { "epoch": 0.42, "learning_rate": 4.582643637484446e-05, "loss": 0.3309, "step": 36560 }, { "epoch": 0.42, "learning_rate": 4.58241532437585e-05, "loss": 0.4825, "step": 36580 }, { "epoch": 0.42, "learning_rate": 4.582187011267252e-05, "loss": 0.2143, "step": 36600 }, { "epoch": 0.42, "learning_rate": 4.581958698158655e-05, "loss": 0.1256, "step": 36620 }, { "epoch": 0.42, "learning_rate": 4.581730385050058e-05, "loss": 0.2314, "step": 36640 }, { "epoch": 0.42, "learning_rate": 4.581502071941461e-05, "loss": 0.0828, "step": 36660 }, { "epoch": 0.42, "learning_rate": 4.581273758832864e-05, "loss": 0.1991, "step": 36680 }, { "epoch": 0.42, "learning_rate": 4.581045445724266e-05, "loss": 0.5436, "step": 36700 }, { "epoch": 0.42, "learning_rate": 4.58081713261567e-05, "loss": 0.2913, "step": 36720 }, { "epoch": 0.42, "learning_rate": 4.580588819507072e-05, "loss": 0.1956, "step": 36740 }, { "epoch": 0.42, "learning_rate": 4.580360506398475e-05, "loss": 0.2178, "step": 36760 }, { "epoch": 0.42, "learning_rate": 4.580132193289878e-05, "loss": 0.0956, "step": 36780 }, { "epoch": 0.42, "learning_rate": 4.579903880181281e-05, "loss": 0.1235, "step": 36800 }, { "epoch": 0.42, "learning_rate": 4.579675567072684e-05, "loss": 0.2698, "step": 36820 }, { "epoch": 0.42, "learning_rate": 4.579447253964086e-05, "loss": 0.092, "step": 36840 }, { "epoch": 0.42, "learning_rate": 4.579218940855489e-05, "loss": 0.1457, "step": 36860 }, { "epoch": 0.42, "learning_rate": 4.578990627746892e-05, "loss": 0.284, "step": 36880 }, { "epoch": 0.42, "learning_rate": 4.578762314638295e-05, "loss": 0.2998, "step": 36900 }, { "epoch": 0.42, "learning_rate": 4.578534001529698e-05, "loss": 0.3078, "step": 36920 }, { "epoch": 0.42, "learning_rate": 4.578305688421101e-05, "loss": 0.1817, "step": 36940 }, { "epoch": 0.42, "learning_rate": 4.578077375312504e-05, "loss": 0.1482, "step": 36960 }, { "epoch": 0.42, "learning_rate": 4.577849062203906e-05, "loss": 0.1008, "step": 36980 }, { "epoch": 0.42, "learning_rate": 4.577620749095309e-05, "loss": 0.154, "step": 37000 }, { "epoch": 0.42, "learning_rate": 4.577392435986713e-05, "loss": 0.5068, "step": 37020 }, { "epoch": 0.42, "learning_rate": 4.577164122878115e-05, "loss": 0.2057, "step": 37040 }, { "epoch": 0.42, "learning_rate": 4.576935809769518e-05, "loss": 0.0828, "step": 37060 }, { "epoch": 0.42, "learning_rate": 4.5767074966609205e-05, "loss": 0.2911, "step": 37080 }, { "epoch": 0.42, "learning_rate": 4.576479183552324e-05, "loss": 0.1469, "step": 37100 }, { "epoch": 0.42, "learning_rate": 4.576250870443727e-05, "loss": 0.0763, "step": 37120 }, { "epoch": 0.42, "learning_rate": 4.5760225573351294e-05, "loss": 0.3308, "step": 37140 }, { "epoch": 0.42, "learning_rate": 4.5757942442265324e-05, "loss": 0.27, "step": 37160 }, { "epoch": 0.42, "learning_rate": 4.575565931117935e-05, "loss": 0.1565, "step": 37180 }, { "epoch": 0.42, "learning_rate": 4.575337618009338e-05, "loss": 0.1723, "step": 37200 }, { "epoch": 0.42, "learning_rate": 4.575109304900741e-05, "loss": 0.322, "step": 37220 }, { "epoch": 0.43, "learning_rate": 4.5748809917921436e-05, "loss": 0.2158, "step": 37240 }, { "epoch": 0.43, "learning_rate": 4.574652678683547e-05, "loss": 0.1377, "step": 37260 }, { "epoch": 0.43, "learning_rate": 4.5744243655749495e-05, "loss": 0.1245, "step": 37280 }, { "epoch": 0.43, "learning_rate": 4.5741960524663525e-05, "loss": 0.3306, "step": 37300 }, { "epoch": 0.43, "learning_rate": 4.5739677393577554e-05, "loss": 0.2641, "step": 37320 }, { "epoch": 0.43, "learning_rate": 4.5737394262491584e-05, "loss": 0.0919, "step": 37340 }, { "epoch": 0.43, "learning_rate": 4.5735111131405614e-05, "loss": 0.199, "step": 37360 }, { "epoch": 0.43, "learning_rate": 4.5732828000319637e-05, "loss": 0.1987, "step": 37380 }, { "epoch": 0.43, "learning_rate": 4.573054486923367e-05, "loss": 0.274, "step": 37400 }, { "epoch": 0.43, "learning_rate": 4.5728261738147696e-05, "loss": 0.1275, "step": 37420 }, { "epoch": 0.43, "learning_rate": 4.5725978607061726e-05, "loss": 0.403, "step": 37440 }, { "epoch": 0.43, "learning_rate": 4.5723695475975755e-05, "loss": 0.3385, "step": 37460 }, { "epoch": 0.43, "learning_rate": 4.5721412344889785e-05, "loss": 0.3061, "step": 37480 }, { "epoch": 0.43, "learning_rate": 4.5719129213803815e-05, "loss": 0.0696, "step": 37500 }, { "epoch": 0.43, "learning_rate": 4.571684608271784e-05, "loss": 0.2439, "step": 37520 }, { "epoch": 0.43, "learning_rate": 4.571456295163187e-05, "loss": 0.1522, "step": 37540 }, { "epoch": 0.43, "learning_rate": 4.57122798205459e-05, "loss": 0.307, "step": 37560 }, { "epoch": 0.43, "learning_rate": 4.570999668945993e-05, "loss": 0.0808, "step": 37580 }, { "epoch": 0.43, "learning_rate": 4.5707713558373956e-05, "loss": 0.0886, "step": 37600 }, { "epoch": 0.43, "learning_rate": 4.5705430427287986e-05, "loss": 0.3502, "step": 37620 }, { "epoch": 0.43, "learning_rate": 4.5703147296202016e-05, "loss": 0.0974, "step": 37640 }, { "epoch": 0.43, "learning_rate": 4.570086416511604e-05, "loss": 0.8128, "step": 37660 }, { "epoch": 0.43, "learning_rate": 4.569858103403007e-05, "loss": 0.1387, "step": 37680 }, { "epoch": 0.43, "learning_rate": 4.5696297902944105e-05, "loss": 0.3563, "step": 37700 }, { "epoch": 0.43, "learning_rate": 4.569401477185813e-05, "loss": 0.1491, "step": 37720 }, { "epoch": 0.43, "learning_rate": 4.569173164077216e-05, "loss": 0.6374, "step": 37740 }, { "epoch": 0.43, "learning_rate": 4.568944850968618e-05, "loss": 0.117, "step": 37760 }, { "epoch": 0.43, "learning_rate": 4.568716537860022e-05, "loss": 0.3544, "step": 37780 }, { "epoch": 0.43, "learning_rate": 4.5684882247514246e-05, "loss": 0.3311, "step": 37800 }, { "epoch": 0.43, "learning_rate": 4.568259911642827e-05, "loss": 0.186, "step": 37820 }, { "epoch": 0.43, "learning_rate": 4.56803159853423e-05, "loss": 0.151, "step": 37840 }, { "epoch": 0.43, "learning_rate": 4.567803285425633e-05, "loss": 0.375, "step": 37860 }, { "epoch": 0.43, "learning_rate": 4.567574972317036e-05, "loss": 0.1338, "step": 37880 }, { "epoch": 0.43, "learning_rate": 4.567346659208439e-05, "loss": 0.2209, "step": 37900 }, { "epoch": 0.43, "learning_rate": 4.567118346099842e-05, "loss": 0.2656, "step": 37920 }, { "epoch": 0.43, "learning_rate": 4.566890032991245e-05, "loss": 0.4742, "step": 37940 }, { "epoch": 0.43, "learning_rate": 4.566661719882647e-05, "loss": 0.1021, "step": 37960 }, { "epoch": 0.43, "learning_rate": 4.56643340677405e-05, "loss": 0.1471, "step": 37980 }, { "epoch": 0.43, "learning_rate": 4.566205093665453e-05, "loss": 0.1494, "step": 38000 }, { "epoch": 0.43, "learning_rate": 4.565976780556856e-05, "loss": 0.0534, "step": 38020 }, { "epoch": 0.43, "learning_rate": 4.565748467448259e-05, "loss": 0.2482, "step": 38040 }, { "epoch": 0.43, "learning_rate": 4.565520154339661e-05, "loss": 0.496, "step": 38060 }, { "epoch": 0.43, "learning_rate": 4.565291841231065e-05, "loss": 0.1713, "step": 38080 }, { "epoch": 0.43, "learning_rate": 4.565063528122467e-05, "loss": 0.2769, "step": 38100 }, { "epoch": 0.44, "learning_rate": 4.56483521501387e-05, "loss": 0.2158, "step": 38120 }, { "epoch": 0.44, "learning_rate": 4.564606901905273e-05, "loss": 0.1698, "step": 38140 }, { "epoch": 0.44, "learning_rate": 4.564378588796676e-05, "loss": 0.3661, "step": 38160 }, { "epoch": 0.44, "learning_rate": 4.564150275688079e-05, "loss": 0.1807, "step": 38180 }, { "epoch": 0.44, "learning_rate": 4.563921962579481e-05, "loss": 0.2105, "step": 38200 }, { "epoch": 0.44, "learning_rate": 4.563693649470885e-05, "loss": 0.225, "step": 38220 }, { "epoch": 0.44, "learning_rate": 4.563465336362288e-05, "loss": 0.1666, "step": 38240 }, { "epoch": 0.44, "learning_rate": 4.56323702325369e-05, "loss": 0.2843, "step": 38260 }, { "epoch": 0.44, "learning_rate": 4.563008710145093e-05, "loss": 0.1365, "step": 38280 }, { "epoch": 0.44, "learning_rate": 4.562780397036496e-05, "loss": 0.1851, "step": 38300 }, { "epoch": 0.44, "learning_rate": 4.562552083927899e-05, "loss": 0.1587, "step": 38320 }, { "epoch": 0.44, "learning_rate": 4.562323770819302e-05, "loss": 0.1425, "step": 38340 }, { "epoch": 0.44, "learning_rate": 4.5620954577107044e-05, "loss": 0.1945, "step": 38360 }, { "epoch": 0.44, "learning_rate": 4.561867144602108e-05, "loss": 0.1593, "step": 38380 }, { "epoch": 0.44, "learning_rate": 4.56163883149351e-05, "loss": 0.1363, "step": 38400 }, { "epoch": 0.44, "learning_rate": 4.561410518384913e-05, "loss": 0.0635, "step": 38420 }, { "epoch": 0.44, "learning_rate": 4.561182205276316e-05, "loss": 0.152, "step": 38440 }, { "epoch": 0.44, "learning_rate": 4.560953892167719e-05, "loss": 0.2371, "step": 38460 }, { "epoch": 0.44, "learning_rate": 4.560725579059122e-05, "loss": 0.1454, "step": 38480 }, { "epoch": 0.44, "learning_rate": 4.5604972659505245e-05, "loss": 0.1561, "step": 38500 }, { "epoch": 0.44, "learning_rate": 4.560268952841928e-05, "loss": 0.103, "step": 38520 }, { "epoch": 0.44, "learning_rate": 4.5600406397333304e-05, "loss": 0.1267, "step": 38540 }, { "epoch": 0.44, "learning_rate": 4.5598123266247334e-05, "loss": 0.1786, "step": 38560 }, { "epoch": 0.44, "learning_rate": 4.559584013516136e-05, "loss": 0.2153, "step": 38580 }, { "epoch": 0.44, "learning_rate": 4.559355700407539e-05, "loss": 0.1147, "step": 38600 }, { "epoch": 0.44, "learning_rate": 4.559127387298942e-05, "loss": 0.2024, "step": 38620 }, { "epoch": 0.44, "learning_rate": 4.5588990741903446e-05, "loss": 0.0111, "step": 38640 }, { "epoch": 0.44, "learning_rate": 4.5586707610817475e-05, "loss": 0.2019, "step": 38660 }, { "epoch": 0.44, "learning_rate": 4.5584424479731505e-05, "loss": 0.1219, "step": 38680 }, { "epoch": 0.44, "learning_rate": 4.5582141348645535e-05, "loss": 0.1238, "step": 38700 }, { "epoch": 0.44, "learning_rate": 4.5579858217559564e-05, "loss": 0.2766, "step": 38720 }, { "epoch": 0.44, "learning_rate": 4.557757508647359e-05, "loss": 0.1513, "step": 38740 }, { "epoch": 0.44, "learning_rate": 4.5575291955387624e-05, "loss": 0.2834, "step": 38760 }, { "epoch": 0.44, "learning_rate": 4.5573008824301647e-05, "loss": 0.1493, "step": 38780 }, { "epoch": 0.44, "learning_rate": 4.5570725693215676e-05, "loss": 0.0935, "step": 38800 }, { "epoch": 0.44, "learning_rate": 4.556844256212971e-05, "loss": 0.0652, "step": 38820 }, { "epoch": 0.44, "learning_rate": 4.5566159431043736e-05, "loss": 0.2181, "step": 38840 }, { "epoch": 0.44, "learning_rate": 4.5563876299957765e-05, "loss": 0.2142, "step": 38860 }, { "epoch": 0.44, "learning_rate": 4.556159316887179e-05, "loss": 0.1073, "step": 38880 }, { "epoch": 0.44, "learning_rate": 4.5559310037785825e-05, "loss": 0.1404, "step": 38900 }, { "epoch": 0.44, "learning_rate": 4.5557026906699854e-05, "loss": 0.1645, "step": 38920 }, { "epoch": 0.44, "learning_rate": 4.555474377561388e-05, "loss": 0.2497, "step": 38940 }, { "epoch": 0.44, "learning_rate": 4.555246064452791e-05, "loss": 0.2322, "step": 38960 }, { "epoch": 0.44, "learning_rate": 4.555017751344194e-05, "loss": 0.1925, "step": 38980 }, { "epoch": 0.45, "learning_rate": 4.5547894382355966e-05, "loss": 0.3092, "step": 39000 }, { "epoch": 0.45, "learning_rate": 4.5545611251269996e-05, "loss": 0.3302, "step": 39020 }, { "epoch": 0.45, "learning_rate": 4.554332812018402e-05, "loss": 0.3453, "step": 39040 }, { "epoch": 0.45, "learning_rate": 4.5541044989098055e-05, "loss": 0.1915, "step": 39060 }, { "epoch": 0.45, "learning_rate": 4.553876185801208e-05, "loss": 0.1439, "step": 39080 }, { "epoch": 0.45, "learning_rate": 4.553647872692611e-05, "loss": 0.0977, "step": 39100 }, { "epoch": 0.45, "learning_rate": 4.553419559584014e-05, "loss": 0.2694, "step": 39120 }, { "epoch": 0.45, "learning_rate": 4.553191246475417e-05, "loss": 0.0821, "step": 39140 }, { "epoch": 0.45, "learning_rate": 4.55296293336682e-05, "loss": 0.1621, "step": 39160 }, { "epoch": 0.45, "learning_rate": 4.552734620258222e-05, "loss": 0.223, "step": 39180 }, { "epoch": 0.45, "learning_rate": 4.5525063071496256e-05, "loss": 0.5283, "step": 39200 }, { "epoch": 0.45, "learning_rate": 4.552277994041028e-05, "loss": 0.2515, "step": 39220 }, { "epoch": 0.45, "learning_rate": 4.552049680932431e-05, "loss": 0.2575, "step": 39240 }, { "epoch": 0.45, "learning_rate": 4.551821367823834e-05, "loss": 0.2531, "step": 39260 }, { "epoch": 0.45, "learning_rate": 4.551593054715237e-05, "loss": 0.3604, "step": 39280 }, { "epoch": 0.45, "learning_rate": 4.55136474160664e-05, "loss": 0.2183, "step": 39300 }, { "epoch": 0.45, "learning_rate": 4.551136428498042e-05, "loss": 0.3016, "step": 39320 }, { "epoch": 0.45, "learning_rate": 4.550908115389445e-05, "loss": 0.1484, "step": 39340 }, { "epoch": 0.45, "learning_rate": 4.550679802280848e-05, "loss": 0.2136, "step": 39360 }, { "epoch": 0.45, "learning_rate": 4.550451489172251e-05, "loss": 0.1705, "step": 39380 }, { "epoch": 0.45, "learning_rate": 4.550223176063654e-05, "loss": 0.3123, "step": 39400 }, { "epoch": 0.45, "learning_rate": 4.549994862955057e-05, "loss": 0.2265, "step": 39420 }, { "epoch": 0.45, "learning_rate": 4.54976654984646e-05, "loss": 0.1841, "step": 39440 }, { "epoch": 0.45, "learning_rate": 4.549538236737862e-05, "loss": 0.3519, "step": 39460 }, { "epoch": 0.45, "learning_rate": 4.549309923629265e-05, "loss": 0.2184, "step": 39480 }, { "epoch": 0.45, "learning_rate": 4.549081610520669e-05, "loss": 0.3248, "step": 39500 }, { "epoch": 0.45, "learning_rate": 4.548853297412071e-05, "loss": 0.196, "step": 39520 }, { "epoch": 0.45, "learning_rate": 4.548624984303474e-05, "loss": 0.2048, "step": 39540 }, { "epoch": 0.45, "learning_rate": 4.5483966711948764e-05, "loss": 0.2882, "step": 39560 }, { "epoch": 0.45, "learning_rate": 4.54816835808628e-05, "loss": 0.0798, "step": 39580 }, { "epoch": 0.45, "learning_rate": 4.547940044977683e-05, "loss": 0.2347, "step": 39600 }, { "epoch": 0.45, "learning_rate": 4.547711731869085e-05, "loss": 0.2488, "step": 39620 }, { "epoch": 0.45, "learning_rate": 4.547483418760488e-05, "loss": 0.3547, "step": 39640 }, { "epoch": 0.45, "learning_rate": 4.547255105651891e-05, "loss": 0.2973, "step": 39660 }, { "epoch": 0.45, "learning_rate": 4.547026792543294e-05, "loss": 0.5683, "step": 39680 }, { "epoch": 0.45, "learning_rate": 4.546798479434697e-05, "loss": 0.1154, "step": 39700 }, { "epoch": 0.45, "learning_rate": 4.5465701663260994e-05, "loss": 0.2176, "step": 39720 }, { "epoch": 0.45, "learning_rate": 4.546341853217503e-05, "loss": 0.3702, "step": 39740 }, { "epoch": 0.45, "learning_rate": 4.5461135401089054e-05, "loss": 0.2345, "step": 39760 }, { "epoch": 0.45, "learning_rate": 4.545885227000308e-05, "loss": 0.1097, "step": 39780 }, { "epoch": 0.45, "learning_rate": 4.545656913891711e-05, "loss": 0.2265, "step": 39800 }, { "epoch": 0.45, "learning_rate": 4.545428600783114e-05, "loss": 0.2307, "step": 39820 }, { "epoch": 0.45, "learning_rate": 4.545200287674517e-05, "loss": 0.1331, "step": 39840 }, { "epoch": 0.46, "learning_rate": 4.5449719745659195e-05, "loss": 0.711, "step": 39860 }, { "epoch": 0.46, "learning_rate": 4.544743661457323e-05, "loss": 0.1555, "step": 39880 }, { "epoch": 0.46, "learning_rate": 4.5445153483487255e-05, "loss": 0.4303, "step": 39900 }, { "epoch": 0.46, "learning_rate": 4.5442870352401284e-05, "loss": 0.3095, "step": 39920 }, { "epoch": 0.46, "learning_rate": 4.5440587221315314e-05, "loss": 0.2531, "step": 39940 }, { "epoch": 0.46, "learning_rate": 4.5438304090229344e-05, "loss": 0.4228, "step": 39960 }, { "epoch": 0.46, "learning_rate": 4.543602095914337e-05, "loss": 0.1632, "step": 39980 }, { "epoch": 0.46, "learning_rate": 4.5433737828057396e-05, "loss": 0.17, "step": 40000 }, { "epoch": 0.46, "learning_rate": 4.5431454696971426e-05, "loss": 0.2195, "step": 40020 }, { "epoch": 0.46, "learning_rate": 4.5429171565885456e-05, "loss": 0.1767, "step": 40040 }, { "epoch": 0.46, "learning_rate": 4.5426888434799485e-05, "loss": 0.1992, "step": 40060 }, { "epoch": 0.46, "learning_rate": 4.5424605303713515e-05, "loss": 0.3472, "step": 40080 }, { "epoch": 0.46, "learning_rate": 4.5422322172627545e-05, "loss": 0.1976, "step": 40100 }, { "epoch": 0.46, "learning_rate": 4.5420039041541574e-05, "loss": 0.2137, "step": 40120 }, { "epoch": 0.46, "learning_rate": 4.54177559104556e-05, "loss": 0.1871, "step": 40140 }, { "epoch": 0.46, "learning_rate": 4.541547277936963e-05, "loss": 0.1123, "step": 40160 }, { "epoch": 0.46, "learning_rate": 4.5413189648283663e-05, "loss": 0.2586, "step": 40180 }, { "epoch": 0.46, "learning_rate": 4.5410906517197686e-05, "loss": 0.0675, "step": 40200 }, { "epoch": 0.46, "learning_rate": 4.5408623386111716e-05, "loss": 0.1754, "step": 40220 }, { "epoch": 0.46, "learning_rate": 4.540634025502574e-05, "loss": 0.131, "step": 40240 }, { "epoch": 0.46, "learning_rate": 4.5404057123939775e-05, "loss": 0.4608, "step": 40260 }, { "epoch": 0.46, "learning_rate": 4.5401773992853805e-05, "loss": 0.1992, "step": 40280 }, { "epoch": 0.46, "learning_rate": 4.539949086176783e-05, "loss": 0.1751, "step": 40300 }, { "epoch": 0.46, "learning_rate": 4.539720773068186e-05, "loss": 0.5422, "step": 40320 }, { "epoch": 0.46, "learning_rate": 4.539492459959589e-05, "loss": 0.5331, "step": 40340 }, { "epoch": 0.46, "learning_rate": 4.539264146850992e-05, "loss": 0.1687, "step": 40360 }, { "epoch": 0.46, "learning_rate": 4.539035833742395e-05, "loss": 0.2334, "step": 40380 }, { "epoch": 0.46, "learning_rate": 4.5388075206337976e-05, "loss": 0.2194, "step": 40400 }, { "epoch": 0.46, "learning_rate": 4.5385792075252006e-05, "loss": 0.4163, "step": 40420 }, { "epoch": 0.46, "learning_rate": 4.538350894416603e-05, "loss": 0.2171, "step": 40440 }, { "epoch": 0.46, "learning_rate": 4.538122581308006e-05, "loss": 0.1178, "step": 40460 }, { "epoch": 0.46, "learning_rate": 4.537894268199409e-05, "loss": 0.2138, "step": 40480 }, { "epoch": 0.46, "learning_rate": 4.537665955090812e-05, "loss": 0.246, "step": 40500 }, { "epoch": 0.46, "learning_rate": 4.537437641982215e-05, "loss": 0.1624, "step": 40520 }, { "epoch": 0.46, "learning_rate": 4.537209328873617e-05, "loss": 0.2655, "step": 40540 }, { "epoch": 0.46, "learning_rate": 4.536981015765021e-05, "loss": 0.4588, "step": 40560 }, { "epoch": 0.46, "learning_rate": 4.536752702656423e-05, "loss": 0.155, "step": 40580 }, { "epoch": 0.46, "learning_rate": 4.536524389547826e-05, "loss": 0.2153, "step": 40600 }, { "epoch": 0.46, "learning_rate": 4.536296076439229e-05, "loss": 0.2407, "step": 40620 }, { "epoch": 0.46, "learning_rate": 4.536067763330632e-05, "loss": 0.2172, "step": 40640 }, { "epoch": 0.46, "learning_rate": 4.535839450222035e-05, "loss": 0.5185, "step": 40660 }, { "epoch": 0.46, "learning_rate": 4.535611137113437e-05, "loss": 0.5022, "step": 40680 }, { "epoch": 0.46, "learning_rate": 4.535382824004841e-05, "loss": 0.2188, "step": 40700 }, { "epoch": 0.46, "learning_rate": 4.535154510896243e-05, "loss": 0.1657, "step": 40720 }, { "epoch": 0.47, "learning_rate": 4.534926197787646e-05, "loss": 0.2506, "step": 40740 }, { "epoch": 0.47, "learning_rate": 4.534697884679049e-05, "loss": 0.157, "step": 40760 }, { "epoch": 0.47, "learning_rate": 4.534469571570452e-05, "loss": 0.1923, "step": 40780 }, { "epoch": 0.47, "learning_rate": 4.534241258461855e-05, "loss": 0.1176, "step": 40800 }, { "epoch": 0.47, "learning_rate": 4.534012945353258e-05, "loss": 0.0989, "step": 40820 }, { "epoch": 0.47, "learning_rate": 4.53378463224466e-05, "loss": 0.1671, "step": 40840 }, { "epoch": 0.47, "learning_rate": 4.533556319136064e-05, "loss": 0.565, "step": 40860 }, { "epoch": 0.47, "learning_rate": 4.533328006027466e-05, "loss": 0.22, "step": 40880 }, { "epoch": 0.47, "learning_rate": 4.533099692918869e-05, "loss": 0.3765, "step": 40900 }, { "epoch": 0.47, "learning_rate": 4.532871379810272e-05, "loss": 0.07, "step": 40920 }, { "epoch": 0.47, "learning_rate": 4.532643066701675e-05, "loss": 0.3502, "step": 40940 }, { "epoch": 0.47, "learning_rate": 4.532414753593078e-05, "loss": 0.2416, "step": 40960 }, { "epoch": 0.47, "learning_rate": 4.53218644048448e-05, "loss": 0.1792, "step": 40980 }, { "epoch": 0.47, "learning_rate": 4.531958127375884e-05, "loss": 0.3445, "step": 41000 }, { "epoch": 0.47, "learning_rate": 4.531729814267286e-05, "loss": 0.033, "step": 41020 }, { "epoch": 0.47, "learning_rate": 4.531501501158689e-05, "loss": 0.1763, "step": 41040 }, { "epoch": 0.47, "learning_rate": 4.531273188050092e-05, "loss": 0.4787, "step": 41060 }, { "epoch": 0.47, "learning_rate": 4.531044874941495e-05, "loss": 0.1711, "step": 41080 }, { "epoch": 0.47, "learning_rate": 4.530816561832898e-05, "loss": 0.369, "step": 41100 }, { "epoch": 0.47, "learning_rate": 4.5305882487243004e-05, "loss": 0.2742, "step": 41120 }, { "epoch": 0.47, "learning_rate": 4.5303599356157034e-05, "loss": 0.3324, "step": 41140 }, { "epoch": 0.47, "learning_rate": 4.5301316225071064e-05, "loss": 0.4156, "step": 41160 }, { "epoch": 0.47, "learning_rate": 4.529903309398509e-05, "loss": 0.2942, "step": 41180 }, { "epoch": 0.47, "learning_rate": 4.529674996289912e-05, "loss": 0.1429, "step": 41200 }, { "epoch": 0.47, "learning_rate": 4.5294466831813146e-05, "loss": 0.2709, "step": 41220 }, { "epoch": 0.47, "learning_rate": 4.529218370072718e-05, "loss": 0.1641, "step": 41240 }, { "epoch": 0.47, "learning_rate": 4.5289900569641205e-05, "loss": 0.4037, "step": 41260 }, { "epoch": 0.47, "learning_rate": 4.5287617438555235e-05, "loss": 0.1367, "step": 41280 }, { "epoch": 0.47, "learning_rate": 4.528533430746927e-05, "loss": 0.1772, "step": 41300 }, { "epoch": 0.47, "learning_rate": 4.5283051176383294e-05, "loss": 0.274, "step": 41320 }, { "epoch": 0.47, "learning_rate": 4.5280768045297324e-05, "loss": 0.0781, "step": 41340 }, { "epoch": 0.47, "learning_rate": 4.527848491421135e-05, "loss": 0.0722, "step": 41360 }, { "epoch": 0.47, "learning_rate": 4.5276201783125383e-05, "loss": 0.1862, "step": 41380 }, { "epoch": 0.47, "learning_rate": 4.527391865203941e-05, "loss": 0.1033, "step": 41400 }, { "epoch": 0.47, "learning_rate": 4.5271635520953436e-05, "loss": 0.3122, "step": 41420 }, { "epoch": 0.47, "learning_rate": 4.5269352389867466e-05, "loss": 0.1458, "step": 41440 }, { "epoch": 0.47, "learning_rate": 4.5267069258781495e-05, "loss": 0.5557, "step": 41460 }, { "epoch": 0.47, "learning_rate": 4.5264786127695525e-05, "loss": 0.1705, "step": 41480 }, { "epoch": 0.47, "learning_rate": 4.5262502996609555e-05, "loss": 0.1664, "step": 41500 }, { "epoch": 0.47, "learning_rate": 4.526021986552358e-05, "loss": 0.3656, "step": 41520 }, { "epoch": 0.47, "learning_rate": 4.5257936734437614e-05, "loss": 0.3504, "step": 41540 }, { "epoch": 0.47, "learning_rate": 4.525565360335164e-05, "loss": 0.0825, "step": 41560 }, { "epoch": 0.47, "learning_rate": 4.525337047226567e-05, "loss": 0.2371, "step": 41580 }, { "epoch": 0.47, "learning_rate": 4.5251087341179696e-05, "loss": 0.62, "step": 41600 }, { "epoch": 0.48, "learning_rate": 4.5248804210093726e-05, "loss": 0.12, "step": 41620 }, { "epoch": 0.48, "learning_rate": 4.5246521079007756e-05, "loss": 0.3182, "step": 41640 }, { "epoch": 0.48, "learning_rate": 4.524423794792178e-05, "loss": 0.3705, "step": 41660 }, { "epoch": 0.48, "learning_rate": 4.5241954816835815e-05, "loss": 0.1175, "step": 41680 }, { "epoch": 0.48, "learning_rate": 4.523967168574984e-05, "loss": 0.1208, "step": 41700 }, { "epoch": 0.48, "learning_rate": 4.523738855466387e-05, "loss": 0.1971, "step": 41720 }, { "epoch": 0.48, "learning_rate": 4.52351054235779e-05, "loss": 0.5353, "step": 41740 }, { "epoch": 0.48, "learning_rate": 4.523282229249193e-05, "loss": 0.2678, "step": 41760 }, { "epoch": 0.48, "learning_rate": 4.523053916140596e-05, "loss": 0.3322, "step": 41780 }, { "epoch": 0.48, "learning_rate": 4.522825603031998e-05, "loss": 0.1525, "step": 41800 }, { "epoch": 0.48, "learning_rate": 4.522597289923401e-05, "loss": 0.2874, "step": 41820 }, { "epoch": 0.48, "learning_rate": 4.522368976814804e-05, "loss": 0.496, "step": 41840 }, { "epoch": 0.48, "learning_rate": 4.522140663706207e-05, "loss": 0.1092, "step": 41860 }, { "epoch": 0.48, "learning_rate": 4.52191235059761e-05, "loss": 0.2358, "step": 41880 }, { "epoch": 0.48, "learning_rate": 4.521684037489013e-05, "loss": 0.176, "step": 41900 }, { "epoch": 0.48, "learning_rate": 4.521455724380416e-05, "loss": 0.1049, "step": 41920 }, { "epoch": 0.48, "learning_rate": 4.521227411271818e-05, "loss": 0.3688, "step": 41940 }, { "epoch": 0.48, "learning_rate": 4.520999098163221e-05, "loss": 0.2325, "step": 41960 }, { "epoch": 0.48, "learning_rate": 4.520770785054625e-05, "loss": 0.2358, "step": 41980 }, { "epoch": 0.48, "learning_rate": 4.520542471946027e-05, "loss": 0.2218, "step": 42000 }, { "epoch": 0.48, "learning_rate": 4.52031415883743e-05, "loss": 0.2111, "step": 42020 }, { "epoch": 0.48, "learning_rate": 4.520085845728832e-05, "loss": 0.4214, "step": 42040 }, { "epoch": 0.48, "learning_rate": 4.519857532620236e-05, "loss": 0.3958, "step": 42060 }, { "epoch": 0.48, "learning_rate": 4.519629219511639e-05, "loss": 0.1055, "step": 42080 }, { "epoch": 0.48, "learning_rate": 4.519400906403041e-05, "loss": 0.5427, "step": 42100 }, { "epoch": 0.48, "learning_rate": 4.519172593294444e-05, "loss": 0.2902, "step": 42120 }, { "epoch": 0.48, "learning_rate": 4.518944280185847e-05, "loss": 0.2431, "step": 42140 }, { "epoch": 0.48, "learning_rate": 4.51871596707725e-05, "loss": 0.2552, "step": 42160 }, { "epoch": 0.48, "learning_rate": 4.518487653968653e-05, "loss": 0.3333, "step": 42180 }, { "epoch": 0.48, "learning_rate": 4.518259340860055e-05, "loss": 0.265, "step": 42200 }, { "epoch": 0.48, "learning_rate": 4.518031027751459e-05, "loss": 0.2056, "step": 42220 }, { "epoch": 0.48, "learning_rate": 4.517802714642861e-05, "loss": 0.1347, "step": 42240 }, { "epoch": 0.48, "learning_rate": 4.517574401534264e-05, "loss": 0.3456, "step": 42260 }, { "epoch": 0.48, "learning_rate": 4.517346088425667e-05, "loss": 0.2135, "step": 42280 }, { "epoch": 0.48, "learning_rate": 4.51711777531707e-05, "loss": 0.2118, "step": 42300 }, { "epoch": 0.48, "learning_rate": 4.516889462208473e-05, "loss": 0.1402, "step": 42320 }, { "epoch": 0.48, "learning_rate": 4.5166611490998754e-05, "loss": 0.1876, "step": 42340 }, { "epoch": 0.48, "learning_rate": 4.516432835991279e-05, "loss": 0.2786, "step": 42360 }, { "epoch": 0.48, "learning_rate": 4.516204522882681e-05, "loss": 0.2522, "step": 42380 }, { "epoch": 0.48, "learning_rate": 4.515976209774084e-05, "loss": 0.2186, "step": 42400 }, { "epoch": 0.48, "learning_rate": 4.515747896665487e-05, "loss": 0.1071, "step": 42420 }, { "epoch": 0.48, "learning_rate": 4.51551958355689e-05, "loss": 0.2979, "step": 42440 }, { "epoch": 0.48, "learning_rate": 4.515291270448293e-05, "loss": 0.1429, "step": 42460 }, { "epoch": 0.48, "learning_rate": 4.5150629573396955e-05, "loss": 0.2143, "step": 42480 }, { "epoch": 0.49, "learning_rate": 4.5148346442310985e-05, "loss": 0.4638, "step": 42500 }, { "epoch": 0.49, "learning_rate": 4.5146063311225014e-05, "loss": 0.352, "step": 42520 }, { "epoch": 0.49, "learning_rate": 4.5143780180139044e-05, "loss": 0.5355, "step": 42540 }, { "epoch": 0.49, "learning_rate": 4.5141497049053074e-05, "loss": 0.1256, "step": 42560 }, { "epoch": 0.49, "learning_rate": 4.5139213917967103e-05, "loss": 0.1431, "step": 42580 }, { "epoch": 0.49, "learning_rate": 4.513693078688113e-05, "loss": 0.3812, "step": 42600 }, { "epoch": 0.49, "learning_rate": 4.5134647655795156e-05, "loss": 0.216, "step": 42620 }, { "epoch": 0.49, "learning_rate": 4.5132364524709186e-05, "loss": 0.2787, "step": 42640 }, { "epoch": 0.49, "learning_rate": 4.513008139362322e-05, "loss": 0.2473, "step": 42660 }, { "epoch": 0.49, "learning_rate": 4.5127798262537245e-05, "loss": 0.0584, "step": 42680 }, { "epoch": 0.49, "learning_rate": 4.5125515131451275e-05, "loss": 0.1659, "step": 42700 }, { "epoch": 0.49, "learning_rate": 4.51232320003653e-05, "loss": 0.2986, "step": 42720 }, { "epoch": 0.49, "learning_rate": 4.5120948869279334e-05, "loss": 0.1879, "step": 42740 }, { "epoch": 0.49, "learning_rate": 4.5118665738193364e-05, "loss": 0.2212, "step": 42760 }, { "epoch": 0.49, "learning_rate": 4.511638260710739e-05, "loss": 0.0983, "step": 42780 }, { "epoch": 0.49, "learning_rate": 4.5114099476021416e-05, "loss": 0.2362, "step": 42800 }, { "epoch": 0.49, "learning_rate": 4.5111816344935446e-05, "loss": 0.1334, "step": 42820 }, { "epoch": 0.49, "learning_rate": 4.5109533213849476e-05, "loss": 0.0691, "step": 42840 }, { "epoch": 0.49, "learning_rate": 4.5107250082763505e-05, "loss": 0.2283, "step": 42860 }, { "epoch": 0.49, "learning_rate": 4.5104966951677535e-05, "loss": 0.1568, "step": 42880 }, { "epoch": 0.49, "learning_rate": 4.5102683820591565e-05, "loss": 0.5317, "step": 42900 }, { "epoch": 0.49, "learning_rate": 4.510040068950559e-05, "loss": 0.2272, "step": 42920 }, { "epoch": 0.49, "learning_rate": 4.509811755841962e-05, "loss": 0.125, "step": 42940 }, { "epoch": 0.49, "learning_rate": 4.509583442733365e-05, "loss": 0.1227, "step": 42960 }, { "epoch": 0.49, "learning_rate": 4.509355129624768e-05, "loss": 0.2279, "step": 42980 }, { "epoch": 0.49, "learning_rate": 4.5091268165161706e-05, "loss": 0.2402, "step": 43000 }, { "epoch": 0.49, "learning_rate": 4.508898503407573e-05, "loss": 0.2073, "step": 43020 }, { "epoch": 0.49, "learning_rate": 4.5086701902989766e-05, "loss": 0.4023, "step": 43040 }, { "epoch": 0.49, "learning_rate": 4.508441877190379e-05, "loss": 0.4214, "step": 43060 }, { "epoch": 0.49, "learning_rate": 4.508213564081782e-05, "loss": 0.3915, "step": 43080 }, { "epoch": 0.49, "learning_rate": 4.507985250973185e-05, "loss": 0.3971, "step": 43100 }, { "epoch": 0.49, "learning_rate": 4.507756937864588e-05, "loss": 0.1666, "step": 43120 }, { "epoch": 0.49, "learning_rate": 4.507528624755991e-05, "loss": 0.2214, "step": 43140 }, { "epoch": 0.49, "learning_rate": 4.507300311647393e-05, "loss": 0.4837, "step": 43160 }, { "epoch": 0.49, "learning_rate": 4.507071998538797e-05, "loss": 0.2295, "step": 43180 }, { "epoch": 0.49, "learning_rate": 4.506843685430199e-05, "loss": 0.3458, "step": 43200 }, { "epoch": 0.49, "learning_rate": 4.506615372321602e-05, "loss": 0.5232, "step": 43220 }, { "epoch": 0.49, "learning_rate": 4.506387059213005e-05, "loss": 0.0927, "step": 43240 }, { "epoch": 0.49, "learning_rate": 4.506158746104408e-05, "loss": 0.2114, "step": 43260 }, { "epoch": 0.49, "learning_rate": 4.505930432995811e-05, "loss": 0.1059, "step": 43280 }, { "epoch": 0.49, "learning_rate": 4.505702119887213e-05, "loss": 0.094, "step": 43300 }, { "epoch": 0.49, "learning_rate": 4.505473806778616e-05, "loss": 0.1633, "step": 43320 }, { "epoch": 0.49, "learning_rate": 4.50524549367002e-05, "loss": 0.2021, "step": 43340 }, { "epoch": 0.49, "learning_rate": 4.505017180561422e-05, "loss": 0.5715, "step": 43360 }, { "epoch": 0.5, "learning_rate": 4.504788867452825e-05, "loss": 0.1866, "step": 43380 }, { "epoch": 0.5, "learning_rate": 4.504560554344227e-05, "loss": 0.3092, "step": 43400 }, { "epoch": 0.5, "learning_rate": 4.504332241235631e-05, "loss": 0.1395, "step": 43420 }, { "epoch": 0.5, "learning_rate": 4.504103928127034e-05, "loss": 0.0732, "step": 43440 }, { "epoch": 0.5, "learning_rate": 4.503875615018436e-05, "loss": 0.1449, "step": 43460 }, { "epoch": 0.5, "learning_rate": 4.50364730190984e-05, "loss": 0.7208, "step": 43480 }, { "epoch": 0.5, "learning_rate": 4.503418988801242e-05, "loss": 0.2148, "step": 43500 }, { "epoch": 0.5, "learning_rate": 4.503190675692645e-05, "loss": 0.1496, "step": 43520 }, { "epoch": 0.5, "learning_rate": 4.502962362584048e-05, "loss": 0.3446, "step": 43540 }, { "epoch": 0.5, "learning_rate": 4.502734049475451e-05, "loss": 0.1965, "step": 43560 }, { "epoch": 0.5, "learning_rate": 4.502505736366854e-05, "loss": 0.2087, "step": 43580 }, { "epoch": 0.5, "learning_rate": 4.502277423258256e-05, "loss": 0.1905, "step": 43600 }, { "epoch": 0.5, "learning_rate": 4.502049110149659e-05, "loss": 0.3817, "step": 43620 }, { "epoch": 0.5, "learning_rate": 4.501820797041062e-05, "loss": 0.3408, "step": 43640 }, { "epoch": 0.5, "learning_rate": 4.501592483932465e-05, "loss": 0.2003, "step": 43660 }, { "epoch": 0.5, "learning_rate": 4.501364170823868e-05, "loss": 0.233, "step": 43680 }, { "epoch": 0.5, "learning_rate": 4.5011358577152705e-05, "loss": 0.3165, "step": 43700 }, { "epoch": 0.5, "learning_rate": 4.500907544606674e-05, "loss": 0.3735, "step": 43720 }, { "epoch": 0.5, "learning_rate": 4.5006792314980764e-05, "loss": 0.1043, "step": 43740 }, { "epoch": 0.5, "learning_rate": 4.5004509183894794e-05, "loss": 0.4367, "step": 43760 }, { "epoch": 0.5, "learning_rate": 4.500222605280883e-05, "loss": 0.2006, "step": 43780 }, { "epoch": 0.5, "learning_rate": 4.499994292172285e-05, "loss": 0.134, "step": 43800 }, { "epoch": 0.5, "learning_rate": 4.499765979063688e-05, "loss": 0.1908, "step": 43820 }, { "epoch": 0.5, "learning_rate": 4.4995376659550906e-05, "loss": 0.3457, "step": 43840 }, { "epoch": 0.5, "learning_rate": 4.499309352846494e-05, "loss": 0.5562, "step": 43860 }, { "epoch": 0.5, "learning_rate": 4.499081039737897e-05, "loss": 0.9004, "step": 43880 }, { "epoch": 0.5, "learning_rate": 4.4988527266292995e-05, "loss": 0.2416, "step": 43900 }, { "epoch": 0.5, "learning_rate": 4.4986244135207024e-05, "loss": 0.1112, "step": 43920 }, { "epoch": 0.5, "learning_rate": 4.4983961004121054e-05, "loss": 0.2939, "step": 43940 }, { "epoch": 0.5, "learning_rate": 4.4981677873035084e-05, "loss": 0.2165, "step": 43960 }, { "epoch": 0.5, "learning_rate": 4.4979394741949113e-05, "loss": 0.3135, "step": 43980 }, { "epoch": 0.5, "learning_rate": 4.4977111610863136e-05, "loss": 0.139, "step": 44000 }, { "epoch": 0.5, "learning_rate": 4.497482847977717e-05, "loss": 0.1139, "step": 44020 }, { "epoch": 0.5, "learning_rate": 4.4972545348691196e-05, "loss": 0.2402, "step": 44040 }, { "epoch": 0.5, "learning_rate": 4.4970262217605225e-05, "loss": 0.1421, "step": 44060 }, { "epoch": 0.5, "learning_rate": 4.4967979086519255e-05, "loss": 0.1129, "step": 44080 }, { "epoch": 0.5, "learning_rate": 4.4965695955433285e-05, "loss": 0.0904, "step": 44100 }, { "epoch": 0.5, "learning_rate": 4.4963412824347314e-05, "loss": 0.1501, "step": 44120 }, { "epoch": 0.5, "learning_rate": 4.496112969326134e-05, "loss": 0.3664, "step": 44140 }, { "epoch": 0.5, "learning_rate": 4.4958846562175374e-05, "loss": 0.2271, "step": 44160 }, { "epoch": 0.5, "learning_rate": 4.49565634310894e-05, "loss": 0.0879, "step": 44180 }, { "epoch": 0.5, "learning_rate": 4.4954280300003426e-05, "loss": 0.2343, "step": 44200 }, { "epoch": 0.5, "learning_rate": 4.4951997168917456e-05, "loss": 0.0993, "step": 44220 }, { "epoch": 0.51, "learning_rate": 4.4949714037831486e-05, "loss": 0.252, "step": 44240 }, { "epoch": 0.51, "learning_rate": 4.4947430906745515e-05, "loss": 0.5189, "step": 44260 }, { "epoch": 0.51, "learning_rate": 4.494514777565954e-05, "loss": 0.1548, "step": 44280 }, { "epoch": 0.51, "learning_rate": 4.494286464457357e-05, "loss": 0.2019, "step": 44300 }, { "epoch": 0.51, "learning_rate": 4.49405815134876e-05, "loss": 0.1462, "step": 44320 }, { "epoch": 0.51, "learning_rate": 4.493829838240163e-05, "loss": 0.1077, "step": 44340 }, { "epoch": 0.51, "learning_rate": 4.493601525131566e-05, "loss": 0.116, "step": 44360 }, { "epoch": 0.51, "learning_rate": 4.493373212022968e-05, "loss": 0.2485, "step": 44380 }, { "epoch": 0.51, "learning_rate": 4.4931448989143716e-05, "loss": 0.1882, "step": 44400 }, { "epoch": 0.51, "learning_rate": 4.492916585805774e-05, "loss": 0.1103, "step": 44420 }, { "epoch": 0.51, "learning_rate": 4.492688272697177e-05, "loss": 0.1896, "step": 44440 }, { "epoch": 0.51, "learning_rate": 4.4924599595885805e-05, "loss": 0.102, "step": 44460 }, { "epoch": 0.51, "learning_rate": 4.492231646479983e-05, "loss": 0.0906, "step": 44480 }, { "epoch": 0.51, "learning_rate": 4.492003333371386e-05, "loss": 0.2274, "step": 44500 }, { "epoch": 0.51, "learning_rate": 4.491775020262788e-05, "loss": 0.581, "step": 44520 }, { "epoch": 0.51, "learning_rate": 4.491546707154192e-05, "loss": 0.187, "step": 44540 }, { "epoch": 0.51, "learning_rate": 4.491318394045595e-05, "loss": 0.2554, "step": 44560 }, { "epoch": 0.51, "learning_rate": 4.491090080936997e-05, "loss": 0.3319, "step": 44580 }, { "epoch": 0.51, "learning_rate": 4.4908617678284e-05, "loss": 0.296, "step": 44600 }, { "epoch": 0.51, "learning_rate": 4.490633454719803e-05, "loss": 0.0845, "step": 44620 }, { "epoch": 0.51, "learning_rate": 4.490405141611206e-05, "loss": 0.1198, "step": 44640 }, { "epoch": 0.51, "learning_rate": 4.490176828502609e-05, "loss": 0.119, "step": 44660 }, { "epoch": 0.51, "learning_rate": 4.489948515394011e-05, "loss": 0.2877, "step": 44680 }, { "epoch": 0.51, "learning_rate": 4.489720202285415e-05, "loss": 0.2534, "step": 44700 }, { "epoch": 0.51, "learning_rate": 4.489491889176817e-05, "loss": 0.407, "step": 44720 }, { "epoch": 0.51, "learning_rate": 4.48926357606822e-05, "loss": 0.3477, "step": 44740 }, { "epoch": 0.51, "learning_rate": 4.489035262959623e-05, "loss": 0.1476, "step": 44760 }, { "epoch": 0.51, "learning_rate": 4.488806949851026e-05, "loss": 0.1228, "step": 44780 }, { "epoch": 0.51, "learning_rate": 4.488578636742429e-05, "loss": 0.2631, "step": 44800 }, { "epoch": 0.51, "learning_rate": 4.488350323633831e-05, "loss": 0.092, "step": 44820 }, { "epoch": 0.51, "learning_rate": 4.488122010525235e-05, "loss": 0.337, "step": 44840 }, { "epoch": 0.51, "learning_rate": 4.487893697416637e-05, "loss": 0.5672, "step": 44860 }, { "epoch": 0.51, "learning_rate": 4.48766538430804e-05, "loss": 0.1301, "step": 44880 }, { "epoch": 0.51, "learning_rate": 4.487437071199443e-05, "loss": 0.1997, "step": 44900 }, { "epoch": 0.51, "learning_rate": 4.487208758090846e-05, "loss": 0.0793, "step": 44920 }, { "epoch": 0.51, "learning_rate": 4.486980444982249e-05, "loss": 0.1478, "step": 44940 }, { "epoch": 0.51, "learning_rate": 4.4867521318736514e-05, "loss": 0.1225, "step": 44960 }, { "epoch": 0.51, "learning_rate": 4.486523818765054e-05, "loss": 0.2541, "step": 44980 }, { "epoch": 0.51, "learning_rate": 4.486295505656457e-05, "loss": 0.164, "step": 45000 }, { "epoch": 0.51, "learning_rate": 4.48606719254786e-05, "loss": 0.2536, "step": 45020 }, { "epoch": 0.51, "learning_rate": 4.485838879439263e-05, "loss": 0.2687, "step": 45040 }, { "epoch": 0.51, "learning_rate": 4.485610566330666e-05, "loss": 0.0957, "step": 45060 }, { "epoch": 0.51, "learning_rate": 4.485382253222069e-05, "loss": 0.141, "step": 45080 }, { "epoch": 0.51, "learning_rate": 4.4851539401134715e-05, "loss": 0.1179, "step": 45100 }, { "epoch": 0.52, "learning_rate": 4.4849256270048744e-05, "loss": 0.3144, "step": 45120 }, { "epoch": 0.52, "learning_rate": 4.484697313896278e-05, "loss": 0.1742, "step": 45140 }, { "epoch": 0.52, "learning_rate": 4.4844690007876804e-05, "loss": 0.0738, "step": 45160 }, { "epoch": 0.52, "learning_rate": 4.4842406876790833e-05, "loss": 0.2962, "step": 45180 }, { "epoch": 0.52, "learning_rate": 4.4840123745704856e-05, "loss": 0.2395, "step": 45200 }, { "epoch": 0.52, "learning_rate": 4.483784061461889e-05, "loss": 0.2284, "step": 45220 }, { "epoch": 0.52, "learning_rate": 4.483555748353292e-05, "loss": 0.4124, "step": 45240 }, { "epoch": 0.52, "learning_rate": 4.4833274352446945e-05, "loss": 0.2764, "step": 45260 }, { "epoch": 0.52, "learning_rate": 4.4830991221360975e-05, "loss": 0.3036, "step": 45280 }, { "epoch": 0.52, "learning_rate": 4.4828708090275005e-05, "loss": 0.4551, "step": 45300 }, { "epoch": 0.52, "learning_rate": 4.4826424959189034e-05, "loss": 0.137, "step": 45320 }, { "epoch": 0.52, "learning_rate": 4.4824141828103064e-05, "loss": 0.0721, "step": 45340 }, { "epoch": 0.52, "learning_rate": 4.4821858697017094e-05, "loss": 0.126, "step": 45360 }, { "epoch": 0.52, "learning_rate": 4.4819575565931123e-05, "loss": 0.1818, "step": 45380 }, { "epoch": 0.52, "learning_rate": 4.4817292434845146e-05, "loss": 0.1701, "step": 45400 }, { "epoch": 0.52, "learning_rate": 4.4815009303759176e-05, "loss": 0.1742, "step": 45420 }, { "epoch": 0.52, "learning_rate": 4.4812726172673206e-05, "loss": 0.2793, "step": 45440 }, { "epoch": 0.52, "learning_rate": 4.4810443041587235e-05, "loss": 0.131, "step": 45460 }, { "epoch": 0.52, "learning_rate": 4.4808159910501265e-05, "loss": 0.1435, "step": 45480 }, { "epoch": 0.52, "learning_rate": 4.480587677941529e-05, "loss": 0.2236, "step": 45500 }, { "epoch": 0.52, "learning_rate": 4.4803593648329324e-05, "loss": 0.1741, "step": 45520 }, { "epoch": 0.52, "learning_rate": 4.480131051724335e-05, "loss": 0.1068, "step": 45540 }, { "epoch": 0.52, "learning_rate": 4.479902738615738e-05, "loss": 0.1459, "step": 45560 }, { "epoch": 0.52, "learning_rate": 4.479674425507141e-05, "loss": 0.2377, "step": 45580 }, { "epoch": 0.52, "learning_rate": 4.4794461123985436e-05, "loss": 0.1247, "step": 45600 }, { "epoch": 0.52, "learning_rate": 4.4792177992899466e-05, "loss": 0.132, "step": 45620 }, { "epoch": 0.52, "learning_rate": 4.478989486181349e-05, "loss": 0.3934, "step": 45640 }, { "epoch": 0.52, "learning_rate": 4.4787611730727525e-05, "loss": 0.0873, "step": 45660 }, { "epoch": 0.52, "learning_rate": 4.478532859964155e-05, "loss": 0.2343, "step": 45680 }, { "epoch": 0.52, "learning_rate": 4.478304546855558e-05, "loss": 0.2549, "step": 45700 }, { "epoch": 0.52, "learning_rate": 4.478076233746961e-05, "loss": 0.0773, "step": 45720 }, { "epoch": 0.52, "learning_rate": 4.477847920638364e-05, "loss": 0.2066, "step": 45740 }, { "epoch": 0.52, "learning_rate": 4.477619607529767e-05, "loss": 0.331, "step": 45760 }, { "epoch": 0.52, "learning_rate": 4.477391294421169e-05, "loss": 0.3703, "step": 45780 }, { "epoch": 0.52, "learning_rate": 4.477162981312572e-05, "loss": 0.2165, "step": 45800 }, { "epoch": 0.52, "learning_rate": 4.4769346682039756e-05, "loss": 0.2407, "step": 45820 }, { "epoch": 0.52, "learning_rate": 4.476706355095378e-05, "loss": 0.2928, "step": 45840 }, { "epoch": 0.52, "learning_rate": 4.476478041986781e-05, "loss": 0.1968, "step": 45860 }, { "epoch": 0.52, "learning_rate": 4.476249728878183e-05, "loss": 0.4645, "step": 45880 }, { "epoch": 0.52, "learning_rate": 4.476021415769587e-05, "loss": 0.2016, "step": 45900 }, { "epoch": 0.52, "learning_rate": 4.47579310266099e-05, "loss": 0.4563, "step": 45920 }, { "epoch": 0.52, "learning_rate": 4.475564789552392e-05, "loss": 0.0934, "step": 45940 }, { "epoch": 0.52, "learning_rate": 4.475336476443796e-05, "loss": 0.179, "step": 45960 }, { "epoch": 0.52, "learning_rate": 4.475108163335198e-05, "loss": 0.2791, "step": 45980 }, { "epoch": 0.53, "learning_rate": 4.474879850226601e-05, "loss": 0.4735, "step": 46000 }, { "epoch": 0.53, "learning_rate": 4.474651537118004e-05, "loss": 0.1834, "step": 46020 }, { "epoch": 0.53, "learning_rate": 4.474423224009407e-05, "loss": 0.2162, "step": 46040 }, { "epoch": 0.53, "learning_rate": 4.47419491090081e-05, "loss": 0.0964, "step": 46060 }, { "epoch": 0.53, "learning_rate": 4.473966597792212e-05, "loss": 0.3301, "step": 46080 }, { "epoch": 0.53, "learning_rate": 4.473738284683615e-05, "loss": 0.3957, "step": 46100 }, { "epoch": 0.53, "learning_rate": 4.473509971575018e-05, "loss": 0.2526, "step": 46120 }, { "epoch": 0.53, "learning_rate": 4.473281658466421e-05, "loss": 0.1567, "step": 46140 }, { "epoch": 0.53, "learning_rate": 4.473053345357824e-05, "loss": 0.1701, "step": 46160 }, { "epoch": 0.53, "learning_rate": 4.472825032249226e-05, "loss": 0.2261, "step": 46180 }, { "epoch": 0.53, "learning_rate": 4.47259671914063e-05, "loss": 0.1176, "step": 46200 }, { "epoch": 0.53, "learning_rate": 4.472368406032032e-05, "loss": 0.3645, "step": 46220 }, { "epoch": 0.53, "learning_rate": 4.472140092923435e-05, "loss": 0.2688, "step": 46240 }, { "epoch": 0.53, "learning_rate": 4.471911779814839e-05, "loss": 0.299, "step": 46260 }, { "epoch": 0.53, "learning_rate": 4.471683466706241e-05, "loss": 0.145, "step": 46280 }, { "epoch": 0.53, "learning_rate": 4.471455153597644e-05, "loss": 0.1679, "step": 46300 }, { "epoch": 0.53, "learning_rate": 4.4712268404890464e-05, "loss": 0.1787, "step": 46320 }, { "epoch": 0.53, "learning_rate": 4.47099852738045e-05, "loss": 0.1743, "step": 46340 }, { "epoch": 0.53, "learning_rate": 4.470770214271853e-05, "loss": 0.229, "step": 46360 }, { "epoch": 0.53, "learning_rate": 4.4705419011632553e-05, "loss": 0.2424, "step": 46380 }, { "epoch": 0.53, "learning_rate": 4.470313588054658e-05, "loss": 0.2284, "step": 46400 }, { "epoch": 0.53, "learning_rate": 4.470085274946061e-05, "loss": 0.0474, "step": 46420 }, { "epoch": 0.53, "learning_rate": 4.469856961837464e-05, "loss": 0.2346, "step": 46440 }, { "epoch": 0.53, "learning_rate": 4.469628648728867e-05, "loss": 0.2811, "step": 46460 }, { "epoch": 0.53, "learning_rate": 4.4694003356202695e-05, "loss": 0.3333, "step": 46480 }, { "epoch": 0.53, "learning_rate": 4.469172022511673e-05, "loss": 0.2273, "step": 46500 }, { "epoch": 0.53, "learning_rate": 4.4689437094030754e-05, "loss": 0.3617, "step": 46520 }, { "epoch": 0.53, "learning_rate": 4.4687153962944784e-05, "loss": 0.2937, "step": 46540 }, { "epoch": 0.53, "learning_rate": 4.4684870831858814e-05, "loss": 0.1922, "step": 46560 }, { "epoch": 0.53, "learning_rate": 4.4682587700772843e-05, "loss": 0.5191, "step": 46580 }, { "epoch": 0.53, "learning_rate": 4.468030456968687e-05, "loss": 0.5811, "step": 46600 }, { "epoch": 0.53, "learning_rate": 4.4678021438600896e-05, "loss": 0.4182, "step": 46620 }, { "epoch": 0.53, "learning_rate": 4.467573830751493e-05, "loss": 0.1378, "step": 46640 }, { "epoch": 0.53, "learning_rate": 4.4673455176428955e-05, "loss": 0.0987, "step": 46660 }, { "epoch": 0.53, "learning_rate": 4.4671172045342985e-05, "loss": 0.146, "step": 46680 }, { "epoch": 0.53, "learning_rate": 4.4668888914257015e-05, "loss": 0.4342, "step": 46700 }, { "epoch": 0.53, "learning_rate": 4.4666605783171044e-05, "loss": 0.1863, "step": 46720 }, { "epoch": 0.53, "learning_rate": 4.4664322652085074e-05, "loss": 0.2767, "step": 46740 }, { "epoch": 0.53, "learning_rate": 4.46620395209991e-05, "loss": 0.2985, "step": 46760 }, { "epoch": 0.53, "learning_rate": 4.465975638991313e-05, "loss": 0.1638, "step": 46780 }, { "epoch": 0.53, "learning_rate": 4.4657473258827156e-05, "loss": 0.1727, "step": 46800 }, { "epoch": 0.53, "learning_rate": 4.4655190127741186e-05, "loss": 0.0796, "step": 46820 }, { "epoch": 0.53, "learning_rate": 4.4652906996655216e-05, "loss": 0.2092, "step": 46840 }, { "epoch": 0.53, "learning_rate": 4.465062386556924e-05, "loss": 0.1388, "step": 46860 }, { "epoch": 0.54, "learning_rate": 4.4648340734483275e-05, "loss": 0.3187, "step": 46880 }, { "epoch": 0.54, "learning_rate": 4.46460576033973e-05, "loss": 0.3177, "step": 46900 }, { "epoch": 0.54, "learning_rate": 4.464377447231133e-05, "loss": 0.1024, "step": 46920 }, { "epoch": 0.54, "learning_rate": 4.4641491341225364e-05, "loss": 0.1061, "step": 46940 }, { "epoch": 0.54, "learning_rate": 4.463920821013939e-05, "loss": 0.0842, "step": 46960 }, { "epoch": 0.54, "learning_rate": 4.463692507905342e-05, "loss": 0.1648, "step": 46980 }, { "epoch": 0.54, "learning_rate": 4.463464194796744e-05, "loss": 0.1747, "step": 47000 }, { "epoch": 0.54, "learning_rate": 4.4632358816881476e-05, "loss": 0.3213, "step": 47020 }, { "epoch": 0.54, "learning_rate": 4.4630075685795506e-05, "loss": 0.1911, "step": 47040 }, { "epoch": 0.54, "learning_rate": 4.462779255470953e-05, "loss": 0.117, "step": 47060 }, { "epoch": 0.54, "learning_rate": 4.462550942362356e-05, "loss": 0.2005, "step": 47080 }, { "epoch": 0.54, "learning_rate": 4.462322629253759e-05, "loss": 0.14, "step": 47100 }, { "epoch": 0.54, "learning_rate": 4.462094316145162e-05, "loss": 0.1155, "step": 47120 }, { "epoch": 0.54, "learning_rate": 4.461866003036565e-05, "loss": 0.1256, "step": 47140 }, { "epoch": 0.54, "learning_rate": 4.461637689927967e-05, "loss": 0.1583, "step": 47160 }, { "epoch": 0.54, "learning_rate": 4.461409376819371e-05, "loss": 0.1156, "step": 47180 }, { "epoch": 0.54, "learning_rate": 4.461181063710773e-05, "loss": 0.1872, "step": 47200 }, { "epoch": 0.54, "learning_rate": 4.460952750602176e-05, "loss": 0.1736, "step": 47220 }, { "epoch": 0.54, "learning_rate": 4.460724437493579e-05, "loss": 0.1494, "step": 47240 }, { "epoch": 0.54, "learning_rate": 4.460496124384982e-05, "loss": 0.0697, "step": 47260 }, { "epoch": 0.54, "learning_rate": 4.460267811276385e-05, "loss": 0.4533, "step": 47280 }, { "epoch": 0.54, "learning_rate": 4.460039498167787e-05, "loss": 0.307, "step": 47300 }, { "epoch": 0.54, "learning_rate": 4.459811185059191e-05, "loss": 0.0986, "step": 47320 }, { "epoch": 0.54, "learning_rate": 4.459582871950593e-05, "loss": 0.3352, "step": 47340 }, { "epoch": 0.54, "learning_rate": 4.459354558841996e-05, "loss": 0.1759, "step": 47360 }, { "epoch": 0.54, "learning_rate": 4.459126245733399e-05, "loss": 0.2073, "step": 47380 }, { "epoch": 0.54, "learning_rate": 4.458897932624802e-05, "loss": 0.0463, "step": 47400 }, { "epoch": 0.54, "learning_rate": 4.458669619516205e-05, "loss": 0.5515, "step": 47420 }, { "epoch": 0.54, "learning_rate": 4.458441306407607e-05, "loss": 0.1378, "step": 47440 }, { "epoch": 0.54, "learning_rate": 4.45821299329901e-05, "loss": 0.1465, "step": 47460 }, { "epoch": 0.54, "learning_rate": 4.457984680190413e-05, "loss": 0.2884, "step": 47480 }, { "epoch": 0.54, "learning_rate": 4.457756367081816e-05, "loss": 0.2963, "step": 47500 }, { "epoch": 0.54, "learning_rate": 4.457528053973219e-05, "loss": 0.1319, "step": 47520 }, { "epoch": 0.54, "learning_rate": 4.457299740864622e-05, "loss": 0.2123, "step": 47540 }, { "epoch": 0.54, "learning_rate": 4.457071427756025e-05, "loss": 0.1155, "step": 47560 }, { "epoch": 0.54, "learning_rate": 4.4568431146474273e-05, "loss": 0.284, "step": 47580 }, { "epoch": 0.54, "learning_rate": 4.45661480153883e-05, "loss": 0.3944, "step": 47600 }, { "epoch": 0.54, "learning_rate": 4.456386488430234e-05, "loss": 0.3748, "step": 47620 }, { "epoch": 0.54, "learning_rate": 4.456158175321636e-05, "loss": 0.3293, "step": 47640 }, { "epoch": 0.54, "learning_rate": 4.455929862213039e-05, "loss": 0.269, "step": 47660 }, { "epoch": 0.54, "learning_rate": 4.4557015491044415e-05, "loss": 0.1816, "step": 47680 }, { "epoch": 0.54, "learning_rate": 4.455473235995845e-05, "loss": 0.1197, "step": 47700 }, { "epoch": 0.54, "learning_rate": 4.455244922887248e-05, "loss": 0.269, "step": 47720 }, { "epoch": 0.54, "learning_rate": 4.4550166097786504e-05, "loss": 0.2776, "step": 47740 }, { "epoch": 0.55, "learning_rate": 4.4547882966700534e-05, "loss": 0.1412, "step": 47760 }, { "epoch": 0.55, "learning_rate": 4.4545599835614563e-05, "loss": 0.1931, "step": 47780 }, { "epoch": 0.55, "learning_rate": 4.454331670452859e-05, "loss": 0.2291, "step": 47800 }, { "epoch": 0.55, "learning_rate": 4.454103357344262e-05, "loss": 0.2201, "step": 47820 }, { "epoch": 0.55, "learning_rate": 4.453875044235665e-05, "loss": 0.1499, "step": 47840 }, { "epoch": 0.55, "learning_rate": 4.453646731127068e-05, "loss": 0.2009, "step": 47860 }, { "epoch": 0.55, "learning_rate": 4.4534184180184705e-05, "loss": 0.0648, "step": 47880 }, { "epoch": 0.55, "learning_rate": 4.4531901049098735e-05, "loss": 0.2208, "step": 47900 }, { "epoch": 0.55, "learning_rate": 4.4529617918012764e-05, "loss": 0.3191, "step": 47920 }, { "epoch": 0.55, "learning_rate": 4.4527334786926794e-05, "loss": 0.3156, "step": 47940 }, { "epoch": 0.55, "learning_rate": 4.4525051655840824e-05, "loss": 0.4567, "step": 47960 }, { "epoch": 0.55, "learning_rate": 4.452276852475485e-05, "loss": 0.1905, "step": 47980 }, { "epoch": 0.55, "learning_rate": 4.452048539366888e-05, "loss": 0.2679, "step": 48000 }, { "epoch": 0.55, "learning_rate": 4.4518202262582906e-05, "loss": 0.2296, "step": 48020 }, { "epoch": 0.55, "learning_rate": 4.4515919131496936e-05, "loss": 0.3611, "step": 48040 }, { "epoch": 0.55, "learning_rate": 4.4513636000410965e-05, "loss": 0.2428, "step": 48060 }, { "epoch": 0.55, "learning_rate": 4.4511352869324995e-05, "loss": 0.227, "step": 48080 }, { "epoch": 0.55, "learning_rate": 4.4509069738239025e-05, "loss": 0.1801, "step": 48100 }, { "epoch": 0.55, "learning_rate": 4.450678660715305e-05, "loss": 0.4458, "step": 48120 }, { "epoch": 0.55, "learning_rate": 4.4504503476067084e-05, "loss": 0.1787, "step": 48140 }, { "epoch": 0.55, "learning_rate": 4.450222034498111e-05, "loss": 0.1812, "step": 48160 }, { "epoch": 0.55, "learning_rate": 4.449993721389514e-05, "loss": 0.3648, "step": 48180 }, { "epoch": 0.55, "learning_rate": 4.4497654082809166e-05, "loss": 0.4859, "step": 48200 }, { "epoch": 0.55, "learning_rate": 4.4495370951723196e-05, "loss": 0.1427, "step": 48220 }, { "epoch": 0.55, "learning_rate": 4.4493087820637226e-05, "loss": 0.1279, "step": 48240 }, { "epoch": 0.55, "learning_rate": 4.449080468955125e-05, "loss": 0.0686, "step": 48260 }, { "epoch": 0.55, "learning_rate": 4.448852155846528e-05, "loss": 0.1576, "step": 48280 }, { "epoch": 0.55, "learning_rate": 4.4486238427379315e-05, "loss": 0.1869, "step": 48300 }, { "epoch": 0.55, "learning_rate": 4.448395529629334e-05, "loss": 0.3943, "step": 48320 }, { "epoch": 0.55, "learning_rate": 4.448167216520737e-05, "loss": 0.1165, "step": 48340 }, { "epoch": 0.55, "learning_rate": 4.447938903412139e-05, "loss": 0.1996, "step": 48360 }, { "epoch": 0.55, "learning_rate": 4.447710590303543e-05, "loss": 0.116, "step": 48380 }, { "epoch": 0.55, "learning_rate": 4.4474822771949457e-05, "loss": 0.1768, "step": 48400 }, { "epoch": 0.55, "learning_rate": 4.447253964086348e-05, "loss": 0.4808, "step": 48420 }, { "epoch": 0.55, "learning_rate": 4.4470256509777516e-05, "loss": 0.2782, "step": 48440 }, { "epoch": 0.55, "learning_rate": 4.446797337869154e-05, "loss": 0.0776, "step": 48460 }, { "epoch": 0.55, "learning_rate": 4.446569024760557e-05, "loss": 0.3581, "step": 48480 }, { "epoch": 0.55, "learning_rate": 4.44634071165196e-05, "loss": 0.1667, "step": 48500 }, { "epoch": 0.55, "learning_rate": 4.446112398543363e-05, "loss": 0.0618, "step": 48520 }, { "epoch": 0.55, "learning_rate": 4.445884085434766e-05, "loss": 0.3535, "step": 48540 }, { "epoch": 0.55, "learning_rate": 4.445655772326168e-05, "loss": 0.0952, "step": 48560 }, { "epoch": 0.55, "learning_rate": 4.445427459217571e-05, "loss": 0.228, "step": 48580 }, { "epoch": 0.55, "learning_rate": 4.445199146108974e-05, "loss": 0.6014, "step": 48600 }, { "epoch": 0.56, "learning_rate": 4.444970833000377e-05, "loss": 0.1478, "step": 48620 }, { "epoch": 0.56, "learning_rate": 4.44474251989178e-05, "loss": 0.013, "step": 48640 }, { "epoch": 0.56, "learning_rate": 4.444514206783182e-05, "loss": 0.2949, "step": 48660 }, { "epoch": 0.56, "learning_rate": 4.444285893674586e-05, "loss": 0.1815, "step": 48680 }, { "epoch": 0.56, "learning_rate": 4.444057580565988e-05, "loss": 0.2026, "step": 48700 }, { "epoch": 0.56, "learning_rate": 4.443829267457391e-05, "loss": 0.107, "step": 48720 }, { "epoch": 0.56, "learning_rate": 4.443600954348795e-05, "loss": 0.1936, "step": 48740 }, { "epoch": 0.56, "learning_rate": 4.443372641240197e-05, "loss": 0.1137, "step": 48760 }, { "epoch": 0.56, "learning_rate": 4.4431443281316e-05, "loss": 0.2622, "step": 48780 }, { "epoch": 0.56, "learning_rate": 4.442916015023002e-05, "loss": 0.4964, "step": 48800 }, { "epoch": 0.56, "learning_rate": 4.442687701914406e-05, "loss": 0.6445, "step": 48820 }, { "epoch": 0.56, "learning_rate": 4.442459388805809e-05, "loss": 0.7346, "step": 48840 }, { "epoch": 0.56, "learning_rate": 4.442231075697211e-05, "loss": 0.1771, "step": 48860 }, { "epoch": 0.56, "learning_rate": 4.442002762588614e-05, "loss": 0.333, "step": 48880 }, { "epoch": 0.56, "learning_rate": 4.441774449480017e-05, "loss": 0.4001, "step": 48900 }, { "epoch": 0.56, "learning_rate": 4.44154613637142e-05, "loss": 0.0664, "step": 48920 }, { "epoch": 0.56, "learning_rate": 4.441317823262823e-05, "loss": 0.3889, "step": 48940 }, { "epoch": 0.56, "learning_rate": 4.4410895101542254e-05, "loss": 0.2697, "step": 48960 }, { "epoch": 0.56, "learning_rate": 4.440861197045629e-05, "loss": 0.1714, "step": 48980 }, { "epoch": 0.56, "learning_rate": 4.440632883937031e-05, "loss": 0.2381, "step": 49000 }, { "epoch": 0.56, "learning_rate": 4.440404570828434e-05, "loss": 0.2624, "step": 49020 }, { "epoch": 0.56, "learning_rate": 4.440176257719837e-05, "loss": 0.3434, "step": 49040 }, { "epoch": 0.56, "learning_rate": 4.43994794461124e-05, "loss": 0.2972, "step": 49060 }, { "epoch": 0.56, "learning_rate": 4.439719631502643e-05, "loss": 0.0841, "step": 49080 }, { "epoch": 0.56, "learning_rate": 4.4394913183940455e-05, "loss": 0.1605, "step": 49100 }, { "epoch": 0.56, "learning_rate": 4.439263005285449e-05, "loss": 0.2553, "step": 49120 }, { "epoch": 0.56, "learning_rate": 4.4390346921768514e-05, "loss": 0.5199, "step": 49140 }, { "epoch": 0.56, "learning_rate": 4.4388063790682544e-05, "loss": 0.7678, "step": 49160 }, { "epoch": 0.56, "learning_rate": 4.4385780659596573e-05, "loss": 0.1714, "step": 49180 }, { "epoch": 0.56, "learning_rate": 4.43834975285106e-05, "loss": 0.4036, "step": 49200 }, { "epoch": 0.56, "learning_rate": 4.438121439742463e-05, "loss": 0.1477, "step": 49220 }, { "epoch": 0.56, "learning_rate": 4.4378931266338656e-05, "loss": 0.2963, "step": 49240 }, { "epoch": 0.56, "learning_rate": 4.4376648135252685e-05, "loss": 0.2903, "step": 49260 }, { "epoch": 0.56, "learning_rate": 4.4374365004166715e-05, "loss": 0.2465, "step": 49280 }, { "epoch": 0.56, "learning_rate": 4.4372081873080745e-05, "loss": 0.2458, "step": 49300 }, { "epoch": 0.56, "learning_rate": 4.4369798741994774e-05, "loss": 0.1398, "step": 49320 }, { "epoch": 0.56, "learning_rate": 4.43675156109088e-05, "loss": 0.2946, "step": 49340 }, { "epoch": 0.56, "learning_rate": 4.4365232479822834e-05, "loss": 0.252, "step": 49360 }, { "epoch": 0.56, "learning_rate": 4.436294934873686e-05, "loss": 0.3118, "step": 49380 }, { "epoch": 0.56, "learning_rate": 4.4360666217650886e-05, "loss": 0.3591, "step": 49400 }, { "epoch": 0.56, "learning_rate": 4.435838308656492e-05, "loss": 0.1084, "step": 49420 }, { "epoch": 0.56, "learning_rate": 4.4356099955478946e-05, "loss": 0.2252, "step": 49440 }, { "epoch": 0.56, "learning_rate": 4.4353816824392975e-05, "loss": 0.2, "step": 49460 }, { "epoch": 0.56, "learning_rate": 4.4351533693307e-05, "loss": 0.0799, "step": 49480 }, { "epoch": 0.57, "learning_rate": 4.4349250562221035e-05, "loss": 0.0617, "step": 49500 }, { "epoch": 0.57, "learning_rate": 4.4346967431135065e-05, "loss": 0.2681, "step": 49520 }, { "epoch": 0.57, "learning_rate": 4.434468430004909e-05, "loss": 0.1567, "step": 49540 }, { "epoch": 0.57, "learning_rate": 4.434240116896312e-05, "loss": 0.2449, "step": 49560 }, { "epoch": 0.57, "learning_rate": 4.434011803787715e-05, "loss": 0.4459, "step": 49580 }, { "epoch": 0.57, "learning_rate": 4.4337834906791177e-05, "loss": 0.1592, "step": 49600 }, { "epoch": 0.57, "learning_rate": 4.4335551775705206e-05, "loss": 0.2848, "step": 49620 }, { "epoch": 0.57, "learning_rate": 4.433326864461923e-05, "loss": 0.3525, "step": 49640 }, { "epoch": 0.57, "learning_rate": 4.4330985513533266e-05, "loss": 0.3159, "step": 49660 }, { "epoch": 0.57, "learning_rate": 4.432870238244729e-05, "loss": 0.2685, "step": 49680 }, { "epoch": 0.57, "learning_rate": 4.432641925136132e-05, "loss": 0.2973, "step": 49700 }, { "epoch": 0.57, "learning_rate": 4.432413612027535e-05, "loss": 0.4014, "step": 49720 }, { "epoch": 0.57, "learning_rate": 4.432185298918938e-05, "loss": 0.1993, "step": 49740 }, { "epoch": 0.57, "learning_rate": 4.431956985810341e-05, "loss": 0.1721, "step": 49760 }, { "epoch": 0.57, "learning_rate": 4.431728672701743e-05, "loss": 0.2719, "step": 49780 }, { "epoch": 0.57, "learning_rate": 4.4315003595931467e-05, "loss": 0.3766, "step": 49800 }, { "epoch": 0.57, "learning_rate": 4.431272046484549e-05, "loss": 0.1105, "step": 49820 }, { "epoch": 0.57, "learning_rate": 4.431043733375952e-05, "loss": 0.0446, "step": 49840 }, { "epoch": 0.57, "learning_rate": 4.430815420267355e-05, "loss": 0.1218, "step": 49860 }, { "epoch": 0.57, "learning_rate": 4.430587107158758e-05, "loss": 0.2071, "step": 49880 }, { "epoch": 0.57, "learning_rate": 4.430358794050161e-05, "loss": 0.4358, "step": 49900 }, { "epoch": 0.57, "learning_rate": 4.430130480941563e-05, "loss": 0.1608, "step": 49920 }, { "epoch": 0.57, "learning_rate": 4.429902167832966e-05, "loss": 0.6086, "step": 49940 }, { "epoch": 0.57, "learning_rate": 4.429673854724369e-05, "loss": 0.1768, "step": 49960 }, { "epoch": 0.57, "learning_rate": 4.429445541615772e-05, "loss": 0.1074, "step": 49980 }, { "epoch": 0.57, "learning_rate": 4.429217228507175e-05, "loss": 0.2501, "step": 50000 }, { "epoch": 0.57, "learning_rate": 4.428988915398578e-05, "loss": 0.1379, "step": 50020 }, { "epoch": 0.57, "learning_rate": 4.428760602289981e-05, "loss": 0.094, "step": 50040 }, { "epoch": 0.57, "learning_rate": 4.428532289181383e-05, "loss": 0.2751, "step": 50060 }, { "epoch": 0.57, "learning_rate": 4.428303976072786e-05, "loss": 0.2281, "step": 50080 }, { "epoch": 0.57, "learning_rate": 4.42807566296419e-05, "loss": 0.2859, "step": 50100 }, { "epoch": 0.57, "learning_rate": 4.427847349855592e-05, "loss": 0.2681, "step": 50120 }, { "epoch": 0.57, "learning_rate": 4.427619036746995e-05, "loss": 0.1351, "step": 50140 }, { "epoch": 0.57, "learning_rate": 4.4273907236383974e-05, "loss": 0.2487, "step": 50160 }, { "epoch": 0.57, "learning_rate": 4.427162410529801e-05, "loss": 0.1737, "step": 50180 }, { "epoch": 0.57, "learning_rate": 4.426934097421204e-05, "loss": 0.4194, "step": 50200 }, { "epoch": 0.57, "learning_rate": 4.426705784312606e-05, "loss": 0.2243, "step": 50220 }, { "epoch": 0.57, "learning_rate": 4.426477471204009e-05, "loss": 0.1151, "step": 50240 }, { "epoch": 0.57, "learning_rate": 4.426249158095412e-05, "loss": 0.1456, "step": 50260 }, { "epoch": 0.57, "learning_rate": 4.426020844986815e-05, "loss": 0.0952, "step": 50280 }, { "epoch": 0.57, "learning_rate": 4.425792531878218e-05, "loss": 0.0556, "step": 50300 }, { "epoch": 0.57, "learning_rate": 4.425564218769621e-05, "loss": 0.3914, "step": 50320 }, { "epoch": 0.57, "learning_rate": 4.425335905661024e-05, "loss": 0.1599, "step": 50340 }, { "epoch": 0.57, "learning_rate": 4.4251075925524264e-05, "loss": 0.2258, "step": 50360 }, { "epoch": 0.58, "learning_rate": 4.4248792794438293e-05, "loss": 0.0455, "step": 50380 }, { "epoch": 0.58, "learning_rate": 4.424650966335232e-05, "loss": 0.1606, "step": 50400 }, { "epoch": 0.58, "learning_rate": 4.424422653226635e-05, "loss": 0.2091, "step": 50420 }, { "epoch": 0.58, "learning_rate": 4.424194340118038e-05, "loss": 0.1548, "step": 50440 }, { "epoch": 0.58, "learning_rate": 4.4239660270094405e-05, "loss": 0.2673, "step": 50460 }, { "epoch": 0.58, "learning_rate": 4.423737713900844e-05, "loss": 0.1802, "step": 50480 }, { "epoch": 0.58, "learning_rate": 4.4235094007922465e-05, "loss": 0.529, "step": 50500 }, { "epoch": 0.58, "learning_rate": 4.4232810876836494e-05, "loss": 0.3033, "step": 50520 }, { "epoch": 0.58, "learning_rate": 4.4230527745750524e-05, "loss": 0.1697, "step": 50540 }, { "epoch": 0.58, "learning_rate": 4.4228244614664554e-05, "loss": 0.106, "step": 50560 }, { "epoch": 0.58, "learning_rate": 4.4225961483578584e-05, "loss": 0.0731, "step": 50580 }, { "epoch": 0.58, "learning_rate": 4.4223678352492606e-05, "loss": 0.0842, "step": 50600 }, { "epoch": 0.58, "learning_rate": 4.422139522140664e-05, "loss": 0.1853, "step": 50620 }, { "epoch": 0.58, "learning_rate": 4.4219112090320666e-05, "loss": 0.1421, "step": 50640 }, { "epoch": 0.58, "learning_rate": 4.4216828959234695e-05, "loss": 0.3275, "step": 50660 }, { "epoch": 0.58, "learning_rate": 4.4214545828148725e-05, "loss": 0.2207, "step": 50680 }, { "epoch": 0.58, "learning_rate": 4.4212262697062755e-05, "loss": 0.3028, "step": 50700 }, { "epoch": 0.58, "learning_rate": 4.4209979565976785e-05, "loss": 0.0804, "step": 50720 }, { "epoch": 0.58, "learning_rate": 4.420769643489081e-05, "loss": 0.1823, "step": 50740 }, { "epoch": 0.58, "learning_rate": 4.420541330380484e-05, "loss": 0.1154, "step": 50760 }, { "epoch": 0.58, "learning_rate": 4.4203130172718874e-05, "loss": 0.2479, "step": 50780 }, { "epoch": 0.58, "learning_rate": 4.4200847041632896e-05, "loss": 0.2389, "step": 50800 }, { "epoch": 0.58, "learning_rate": 4.4198563910546926e-05, "loss": 0.2889, "step": 50820 }, { "epoch": 0.58, "learning_rate": 4.419628077946095e-05, "loss": 0.1288, "step": 50840 }, { "epoch": 0.58, "learning_rate": 4.4193997648374986e-05, "loss": 0.2473, "step": 50860 }, { "epoch": 0.58, "learning_rate": 4.4191714517289015e-05, "loss": 0.2856, "step": 50880 }, { "epoch": 0.58, "learning_rate": 4.418943138620304e-05, "loss": 0.2485, "step": 50900 }, { "epoch": 0.58, "learning_rate": 4.4187148255117075e-05, "loss": 0.1622, "step": 50920 }, { "epoch": 0.58, "learning_rate": 4.41848651240311e-05, "loss": 0.3508, "step": 50940 }, { "epoch": 0.58, "learning_rate": 4.418258199294513e-05, "loss": 0.0844, "step": 50960 }, { "epoch": 0.58, "learning_rate": 4.418029886185916e-05, "loss": 0.4713, "step": 50980 }, { "epoch": 0.58, "learning_rate": 4.4178015730773187e-05, "loss": 0.1757, "step": 51000 }, { "epoch": 0.58, "learning_rate": 4.4175732599687216e-05, "loss": 0.1194, "step": 51020 }, { "epoch": 0.58, "learning_rate": 4.417344946860124e-05, "loss": 0.1956, "step": 51040 }, { "epoch": 0.58, "learning_rate": 4.417116633751527e-05, "loss": 0.1438, "step": 51060 }, { "epoch": 0.58, "learning_rate": 4.41688832064293e-05, "loss": 0.1809, "step": 51080 }, { "epoch": 0.58, "learning_rate": 4.416660007534333e-05, "loss": 0.2753, "step": 51100 }, { "epoch": 0.58, "learning_rate": 4.416431694425736e-05, "loss": 0.4183, "step": 51120 }, { "epoch": 0.58, "learning_rate": 4.416203381317138e-05, "loss": 0.3088, "step": 51140 }, { "epoch": 0.58, "learning_rate": 4.415975068208542e-05, "loss": 0.1566, "step": 51160 }, { "epoch": 0.58, "learning_rate": 4.415746755099944e-05, "loss": 0.5239, "step": 51180 }, { "epoch": 0.58, "learning_rate": 4.415518441991347e-05, "loss": 0.1704, "step": 51200 }, { "epoch": 0.58, "learning_rate": 4.41529012888275e-05, "loss": 0.3093, "step": 51220 }, { "epoch": 0.58, "learning_rate": 4.415061815774153e-05, "loss": 0.2015, "step": 51240 }, { "epoch": 0.59, "learning_rate": 4.414833502665556e-05, "loss": 0.3477, "step": 51260 }, { "epoch": 0.59, "learning_rate": 4.414605189556958e-05, "loss": 0.2429, "step": 51280 }, { "epoch": 0.59, "learning_rate": 4.414376876448362e-05, "loss": 0.1872, "step": 51300 }, { "epoch": 0.59, "learning_rate": 4.414148563339765e-05, "loss": 0.3876, "step": 51320 }, { "epoch": 0.59, "learning_rate": 4.413920250231167e-05, "loss": 0.0954, "step": 51340 }, { "epoch": 0.59, "learning_rate": 4.41369193712257e-05, "loss": 0.2367, "step": 51360 }, { "epoch": 0.59, "learning_rate": 4.413463624013973e-05, "loss": 0.1025, "step": 51380 }, { "epoch": 0.59, "learning_rate": 4.413235310905376e-05, "loss": 0.148, "step": 51400 }, { "epoch": 0.59, "learning_rate": 4.413006997796779e-05, "loss": 0.5506, "step": 51420 }, { "epoch": 0.59, "learning_rate": 4.412778684688181e-05, "loss": 0.249, "step": 51440 }, { "epoch": 0.59, "learning_rate": 4.412550371579585e-05, "loss": 0.1844, "step": 51460 }, { "epoch": 0.59, "learning_rate": 4.412322058470987e-05, "loss": 0.4781, "step": 51480 }, { "epoch": 0.59, "learning_rate": 4.41209374536239e-05, "loss": 0.2513, "step": 51500 }, { "epoch": 0.59, "learning_rate": 4.411865432253793e-05, "loss": 0.1708, "step": 51520 }, { "epoch": 0.59, "learning_rate": 4.411637119145196e-05, "loss": 0.1153, "step": 51540 }, { "epoch": 0.59, "learning_rate": 4.411408806036599e-05, "loss": 0.2451, "step": 51560 }, { "epoch": 0.59, "learning_rate": 4.4111804929280013e-05, "loss": 0.1898, "step": 51580 }, { "epoch": 0.59, "learning_rate": 4.410952179819405e-05, "loss": 0.2822, "step": 51600 }, { "epoch": 0.59, "learning_rate": 4.410723866710807e-05, "loss": 0.1531, "step": 51620 }, { "epoch": 0.59, "learning_rate": 4.41049555360221e-05, "loss": 0.3065, "step": 51640 }, { "epoch": 0.59, "learning_rate": 4.410267240493613e-05, "loss": 0.0825, "step": 51660 }, { "epoch": 0.59, "learning_rate": 4.410038927385016e-05, "loss": 0.2338, "step": 51680 }, { "epoch": 0.59, "learning_rate": 4.409810614276419e-05, "loss": 0.1228, "step": 51700 }, { "epoch": 0.59, "learning_rate": 4.4095823011678214e-05, "loss": 0.3895, "step": 51720 }, { "epoch": 0.59, "learning_rate": 4.4093539880592244e-05, "loss": 0.2341, "step": 51740 }, { "epoch": 0.59, "learning_rate": 4.4091256749506274e-05, "loss": 0.419, "step": 51760 }, { "epoch": 0.59, "learning_rate": 4.4088973618420304e-05, "loss": 0.0857, "step": 51780 }, { "epoch": 0.59, "learning_rate": 4.408669048733433e-05, "loss": 0.4743, "step": 51800 }, { "epoch": 0.59, "learning_rate": 4.4084407356248356e-05, "loss": 0.2519, "step": 51820 }, { "epoch": 0.59, "learning_rate": 4.408212422516239e-05, "loss": 0.4979, "step": 51840 }, { "epoch": 0.59, "learning_rate": 4.4079841094076415e-05, "loss": 0.2045, "step": 51860 }, { "epoch": 0.59, "learning_rate": 4.4077557962990445e-05, "loss": 0.2317, "step": 51880 }, { "epoch": 0.59, "learning_rate": 4.407527483190448e-05, "loss": 0.1721, "step": 51900 }, { "epoch": 0.59, "learning_rate": 4.4072991700818505e-05, "loss": 0.1785, "step": 51920 }, { "epoch": 0.59, "learning_rate": 4.4070708569732534e-05, "loss": 0.3077, "step": 51940 }, { "epoch": 0.59, "learning_rate": 4.406842543864656e-05, "loss": 0.3741, "step": 51960 }, { "epoch": 0.59, "learning_rate": 4.4066142307560594e-05, "loss": 0.1953, "step": 51980 }, { "epoch": 0.59, "learning_rate": 4.406385917647462e-05, "loss": 0.4761, "step": 52000 }, { "epoch": 0.59, "learning_rate": 4.4061576045388646e-05, "loss": 0.3194, "step": 52020 }, { "epoch": 0.59, "learning_rate": 4.4059292914302676e-05, "loss": 0.1671, "step": 52040 }, { "epoch": 0.59, "learning_rate": 4.4057009783216706e-05, "loss": 0.2983, "step": 52060 }, { "epoch": 0.59, "learning_rate": 4.4054726652130735e-05, "loss": 0.1966, "step": 52080 }, { "epoch": 0.59, "learning_rate": 4.4052443521044765e-05, "loss": 0.2573, "step": 52100 }, { "epoch": 0.59, "learning_rate": 4.405016038995879e-05, "loss": 0.6134, "step": 52120 }, { "epoch": 0.6, "learning_rate": 4.4047877258872824e-05, "loss": 0.0676, "step": 52140 }, { "epoch": 0.6, "learning_rate": 4.404559412778685e-05, "loss": 0.5265, "step": 52160 }, { "epoch": 0.6, "learning_rate": 4.404331099670088e-05, "loss": 0.2129, "step": 52180 }, { "epoch": 0.6, "learning_rate": 4.4041027865614907e-05, "loss": 0.2984, "step": 52200 }, { "epoch": 0.6, "learning_rate": 4.4038744734528936e-05, "loss": 0.1636, "step": 52220 }, { "epoch": 0.6, "learning_rate": 4.4036461603442966e-05, "loss": 0.3565, "step": 52240 }, { "epoch": 0.6, "learning_rate": 4.403417847235699e-05, "loss": 0.201, "step": 52260 }, { "epoch": 0.6, "learning_rate": 4.4031895341271025e-05, "loss": 0.1831, "step": 52280 }, { "epoch": 0.6, "learning_rate": 4.402961221018505e-05, "loss": 0.242, "step": 52300 }, { "epoch": 0.6, "learning_rate": 4.402732907909908e-05, "loss": 0.1568, "step": 52320 }, { "epoch": 0.6, "learning_rate": 4.402504594801311e-05, "loss": 0.5503, "step": 52340 }, { "epoch": 0.6, "learning_rate": 4.402276281692714e-05, "loss": 0.3033, "step": 52360 }, { "epoch": 0.6, "learning_rate": 4.402047968584117e-05, "loss": 0.0765, "step": 52380 }, { "epoch": 0.6, "learning_rate": 4.401819655475519e-05, "loss": 0.0503, "step": 52400 }, { "epoch": 0.6, "learning_rate": 4.401591342366922e-05, "loss": 0.1261, "step": 52420 }, { "epoch": 0.6, "learning_rate": 4.401363029258325e-05, "loss": 0.2017, "step": 52440 }, { "epoch": 0.6, "learning_rate": 4.401134716149728e-05, "loss": 0.3007, "step": 52460 }, { "epoch": 0.6, "learning_rate": 4.400906403041131e-05, "loss": 0.1438, "step": 52480 }, { "epoch": 0.6, "learning_rate": 4.400678089932534e-05, "loss": 0.1285, "step": 52500 }, { "epoch": 0.6, "learning_rate": 4.400449776823937e-05, "loss": 0.4342, "step": 52520 }, { "epoch": 0.6, "learning_rate": 4.400221463715339e-05, "loss": 0.3152, "step": 52540 }, { "epoch": 0.6, "learning_rate": 4.399993150606742e-05, "loss": 0.2205, "step": 52560 }, { "epoch": 0.6, "learning_rate": 4.399764837498146e-05, "loss": 0.2106, "step": 52580 }, { "epoch": 0.6, "learning_rate": 4.399536524389548e-05, "loss": 0.2489, "step": 52600 }, { "epoch": 0.6, "learning_rate": 4.399308211280951e-05, "loss": 0.1492, "step": 52620 }, { "epoch": 0.6, "learning_rate": 4.399079898172353e-05, "loss": 0.2598, "step": 52640 }, { "epoch": 0.6, "learning_rate": 4.398851585063757e-05, "loss": 0.1742, "step": 52660 }, { "epoch": 0.6, "learning_rate": 4.39862327195516e-05, "loss": 0.2734, "step": 52680 }, { "epoch": 0.6, "learning_rate": 4.398394958846562e-05, "loss": 0.3214, "step": 52700 }, { "epoch": 0.6, "learning_rate": 4.398166645737965e-05, "loss": 0.3806, "step": 52720 }, { "epoch": 0.6, "learning_rate": 4.397938332629368e-05, "loss": 0.1298, "step": 52740 }, { "epoch": 0.6, "learning_rate": 4.397710019520771e-05, "loss": 0.2996, "step": 52760 }, { "epoch": 0.6, "learning_rate": 4.397481706412174e-05, "loss": 0.1634, "step": 52780 }, { "epoch": 0.6, "learning_rate": 4.397253393303577e-05, "loss": 0.1137, "step": 52800 }, { "epoch": 0.6, "learning_rate": 4.39702508019498e-05, "loss": 0.2786, "step": 52820 }, { "epoch": 0.6, "learning_rate": 4.396796767086382e-05, "loss": 0.2277, "step": 52840 }, { "epoch": 0.6, "learning_rate": 4.396568453977785e-05, "loss": 0.2985, "step": 52860 }, { "epoch": 0.6, "learning_rate": 4.396340140869188e-05, "loss": 0.1082, "step": 52880 }, { "epoch": 0.6, "learning_rate": 4.396111827760591e-05, "loss": 0.4104, "step": 52900 }, { "epoch": 0.6, "learning_rate": 4.395883514651994e-05, "loss": 0.1278, "step": 52920 }, { "epoch": 0.6, "learning_rate": 4.3956552015433964e-05, "loss": 0.1758, "step": 52940 }, { "epoch": 0.6, "learning_rate": 4.3954268884348e-05, "loss": 0.2942, "step": 52960 }, { "epoch": 0.6, "learning_rate": 4.3951985753262024e-05, "loss": 0.1714, "step": 52980 }, { "epoch": 0.61, "learning_rate": 4.394970262217605e-05, "loss": 0.0723, "step": 53000 }, { "epoch": 0.61, "learning_rate": 4.394741949109008e-05, "loss": 0.5004, "step": 53020 }, { "epoch": 0.61, "learning_rate": 4.394513636000411e-05, "loss": 0.108, "step": 53040 }, { "epoch": 0.61, "learning_rate": 4.394285322891814e-05, "loss": 0.234, "step": 53060 }, { "epoch": 0.61, "learning_rate": 4.3940570097832165e-05, "loss": 0.1009, "step": 53080 }, { "epoch": 0.61, "learning_rate": 4.39382869667462e-05, "loss": 0.2283, "step": 53100 }, { "epoch": 0.61, "learning_rate": 4.3936003835660225e-05, "loss": 0.3843, "step": 53120 }, { "epoch": 0.61, "learning_rate": 4.3933720704574254e-05, "loss": 0.1887, "step": 53140 }, { "epoch": 0.61, "learning_rate": 4.3931437573488284e-05, "loss": 0.1982, "step": 53160 }, { "epoch": 0.61, "learning_rate": 4.3929154442402314e-05, "loss": 0.1279, "step": 53180 }, { "epoch": 0.61, "learning_rate": 4.392687131131634e-05, "loss": 0.2491, "step": 53200 }, { "epoch": 0.61, "learning_rate": 4.3924588180230366e-05, "loss": 0.1446, "step": 53220 }, { "epoch": 0.61, "learning_rate": 4.3922305049144396e-05, "loss": 0.1203, "step": 53240 }, { "epoch": 0.61, "learning_rate": 4.392002191805843e-05, "loss": 0.0951, "step": 53260 }, { "epoch": 0.61, "learning_rate": 4.3917738786972455e-05, "loss": 0.2776, "step": 53280 }, { "epoch": 0.61, "learning_rate": 4.3915455655886485e-05, "loss": 0.2513, "step": 53300 }, { "epoch": 0.61, "learning_rate": 4.391317252480051e-05, "loss": 0.4514, "step": 53320 }, { "epoch": 0.61, "learning_rate": 4.3910889393714544e-05, "loss": 0.162, "step": 53340 }, { "epoch": 0.61, "learning_rate": 4.3908606262628574e-05, "loss": 0.1906, "step": 53360 }, { "epoch": 0.61, "learning_rate": 4.39063231315426e-05, "loss": 0.2529, "step": 53380 }, { "epoch": 0.61, "learning_rate": 4.390404000045663e-05, "loss": 0.3741, "step": 53400 }, { "epoch": 0.61, "learning_rate": 4.3901756869370656e-05, "loss": 0.4335, "step": 53420 }, { "epoch": 0.61, "learning_rate": 4.3899473738284686e-05, "loss": 0.2906, "step": 53440 }, { "epoch": 0.61, "learning_rate": 4.3897190607198716e-05, "loss": 0.1225, "step": 53460 }, { "epoch": 0.61, "learning_rate": 4.3894907476112745e-05, "loss": 0.1723, "step": 53480 }, { "epoch": 0.61, "learning_rate": 4.3892624345026775e-05, "loss": 0.1454, "step": 53500 }, { "epoch": 0.61, "learning_rate": 4.38903412139408e-05, "loss": 0.4386, "step": 53520 }, { "epoch": 0.61, "learning_rate": 4.388805808285483e-05, "loss": 0.276, "step": 53540 }, { "epoch": 0.61, "learning_rate": 4.388577495176886e-05, "loss": 0.2569, "step": 53560 }, { "epoch": 0.61, "learning_rate": 4.388349182068289e-05, "loss": 0.1675, "step": 53580 }, { "epoch": 0.61, "learning_rate": 4.3881208689596917e-05, "loss": 0.2932, "step": 53600 }, { "epoch": 0.61, "learning_rate": 4.387892555851094e-05, "loss": 0.1922, "step": 53620 }, { "epoch": 0.61, "learning_rate": 4.3876642427424976e-05, "loss": 0.3242, "step": 53640 }, { "epoch": 0.61, "learning_rate": 4.3874359296339e-05, "loss": 0.1143, "step": 53660 }, { "epoch": 0.61, "learning_rate": 4.387207616525303e-05, "loss": 0.1829, "step": 53680 }, { "epoch": 0.61, "learning_rate": 4.386979303416706e-05, "loss": 0.1117, "step": 53700 }, { "epoch": 0.61, "learning_rate": 4.386750990308109e-05, "loss": 0.1251, "step": 53720 }, { "epoch": 0.61, "learning_rate": 4.386522677199512e-05, "loss": 0.08, "step": 53740 }, { "epoch": 0.61, "learning_rate": 4.386294364090914e-05, "loss": 0.3658, "step": 53760 }, { "epoch": 0.61, "learning_rate": 4.386066050982318e-05, "loss": 0.2249, "step": 53780 }, { "epoch": 0.61, "learning_rate": 4.38583773787372e-05, "loss": 0.1387, "step": 53800 }, { "epoch": 0.61, "learning_rate": 4.385609424765123e-05, "loss": 0.442, "step": 53820 }, { "epoch": 0.61, "learning_rate": 4.385381111656526e-05, "loss": 0.1128, "step": 53840 }, { "epoch": 0.61, "learning_rate": 4.385152798547929e-05, "loss": 0.2593, "step": 53860 }, { "epoch": 0.62, "learning_rate": 4.384924485439332e-05, "loss": 0.1697, "step": 53880 }, { "epoch": 0.62, "learning_rate": 4.384696172330734e-05, "loss": 0.2282, "step": 53900 }, { "epoch": 0.62, "learning_rate": 4.384467859222137e-05, "loss": 0.2848, "step": 53920 }, { "epoch": 0.62, "learning_rate": 4.384239546113541e-05, "loss": 0.1083, "step": 53940 }, { "epoch": 0.62, "learning_rate": 4.384011233004943e-05, "loss": 0.3224, "step": 53960 }, { "epoch": 0.62, "learning_rate": 4.383782919896346e-05, "loss": 0.2995, "step": 53980 }, { "epoch": 0.62, "learning_rate": 4.383554606787749e-05, "loss": 0.4412, "step": 54000 }, { "epoch": 0.62, "learning_rate": 4.383326293679152e-05, "loss": 0.1408, "step": 54020 }, { "epoch": 0.62, "learning_rate": 4.383097980570555e-05, "loss": 0.2079, "step": 54040 }, { "epoch": 0.62, "learning_rate": 4.382869667461957e-05, "loss": 0.1672, "step": 54060 }, { "epoch": 0.62, "learning_rate": 4.382641354353361e-05, "loss": 0.3369, "step": 54080 }, { "epoch": 0.62, "learning_rate": 4.382413041244763e-05, "loss": 0.2533, "step": 54100 }, { "epoch": 0.62, "learning_rate": 4.382184728136166e-05, "loss": 0.124, "step": 54120 }, { "epoch": 0.62, "learning_rate": 4.381956415027569e-05, "loss": 0.2344, "step": 54140 }, { "epoch": 0.62, "learning_rate": 4.381728101918972e-05, "loss": 0.1448, "step": 54160 }, { "epoch": 0.62, "learning_rate": 4.381499788810375e-05, "loss": 0.0739, "step": 54180 }, { "epoch": 0.62, "learning_rate": 4.381271475701777e-05, "loss": 0.3725, "step": 54200 }, { "epoch": 0.62, "learning_rate": 4.38104316259318e-05, "loss": 0.1971, "step": 54220 }, { "epoch": 0.62, "learning_rate": 4.380814849484583e-05, "loss": 0.1733, "step": 54240 }, { "epoch": 0.62, "learning_rate": 4.380586536375986e-05, "loss": 0.1102, "step": 54260 }, { "epoch": 0.62, "learning_rate": 4.380358223267389e-05, "loss": 0.1558, "step": 54280 }, { "epoch": 0.62, "learning_rate": 4.3801299101587915e-05, "loss": 0.1672, "step": 54300 }, { "epoch": 0.62, "learning_rate": 4.379901597050195e-05, "loss": 0.2151, "step": 54320 }, { "epoch": 0.62, "learning_rate": 4.3796732839415974e-05, "loss": 0.3159, "step": 54340 }, { "epoch": 0.62, "learning_rate": 4.3794449708330004e-05, "loss": 0.4123, "step": 54360 }, { "epoch": 0.62, "learning_rate": 4.379216657724404e-05, "loss": 0.2294, "step": 54380 }, { "epoch": 0.62, "learning_rate": 4.378988344615806e-05, "loss": 0.2165, "step": 54400 }, { "epoch": 0.62, "learning_rate": 4.378760031507209e-05, "loss": 0.2422, "step": 54420 }, { "epoch": 0.62, "learning_rate": 4.3785317183986116e-05, "loss": 0.0678, "step": 54440 }, { "epoch": 0.62, "learning_rate": 4.378303405290015e-05, "loss": 0.2919, "step": 54460 }, { "epoch": 0.62, "learning_rate": 4.378075092181418e-05, "loss": 0.4449, "step": 54480 }, { "epoch": 0.62, "learning_rate": 4.3778467790728205e-05, "loss": 0.1231, "step": 54500 }, { "epoch": 0.62, "learning_rate": 4.3776184659642235e-05, "loss": 0.1778, "step": 54520 }, { "epoch": 0.62, "learning_rate": 4.3773901528556264e-05, "loss": 0.1079, "step": 54540 }, { "epoch": 0.62, "learning_rate": 4.3771618397470294e-05, "loss": 0.0618, "step": 54560 }, { "epoch": 0.62, "learning_rate": 4.3769335266384324e-05, "loss": 0.2005, "step": 54580 }, { "epoch": 0.62, "learning_rate": 4.3767052135298347e-05, "loss": 0.1108, "step": 54600 }, { "epoch": 0.62, "learning_rate": 4.376476900421238e-05, "loss": 0.352, "step": 54620 }, { "epoch": 0.62, "learning_rate": 4.3762485873126406e-05, "loss": 0.4573, "step": 54640 }, { "epoch": 0.62, "learning_rate": 4.3760202742040436e-05, "loss": 0.1476, "step": 54660 }, { "epoch": 0.62, "learning_rate": 4.3757919610954465e-05, "loss": 0.1334, "step": 54680 }, { "epoch": 0.62, "learning_rate": 4.3755636479868495e-05, "loss": 0.2695, "step": 54700 }, { "epoch": 0.62, "learning_rate": 4.3753353348782525e-05, "loss": 0.1474, "step": 54720 }, { "epoch": 0.62, "learning_rate": 4.375107021769655e-05, "loss": 0.3469, "step": 54740 }, { "epoch": 0.63, "learning_rate": 4.3748787086610584e-05, "loss": 0.2435, "step": 54760 }, { "epoch": 0.63, "learning_rate": 4.374650395552461e-05, "loss": 0.1951, "step": 54780 }, { "epoch": 0.63, "learning_rate": 4.3744220824438637e-05, "loss": 0.1341, "step": 54800 }, { "epoch": 0.63, "learning_rate": 4.3741937693352666e-05, "loss": 0.1021, "step": 54820 }, { "epoch": 0.63, "learning_rate": 4.3739654562266696e-05, "loss": 0.2176, "step": 54840 }, { "epoch": 0.63, "learning_rate": 4.3737371431180726e-05, "loss": 0.4203, "step": 54860 }, { "epoch": 0.63, "learning_rate": 4.373508830009475e-05, "loss": 0.0518, "step": 54880 }, { "epoch": 0.63, "learning_rate": 4.373280516900878e-05, "loss": 0.1641, "step": 54900 }, { "epoch": 0.63, "learning_rate": 4.373052203792281e-05, "loss": 0.166, "step": 54920 }, { "epoch": 0.63, "learning_rate": 4.372823890683684e-05, "loss": 0.4471, "step": 54940 }, { "epoch": 0.63, "learning_rate": 4.372595577575087e-05, "loss": 0.0612, "step": 54960 }, { "epoch": 0.63, "learning_rate": 4.37236726446649e-05, "loss": 0.4015, "step": 54980 }, { "epoch": 0.63, "learning_rate": 4.3721389513578927e-05, "loss": 0.4025, "step": 55000 }, { "epoch": 0.63, "learning_rate": 4.371910638249295e-05, "loss": 0.2028, "step": 55020 }, { "epoch": 0.63, "learning_rate": 4.371682325140698e-05, "loss": 0.3687, "step": 55040 }, { "epoch": 0.63, "learning_rate": 4.3714540120321016e-05, "loss": 0.4432, "step": 55060 }, { "epoch": 0.63, "learning_rate": 4.371225698923504e-05, "loss": 0.2686, "step": 55080 }, { "epoch": 0.63, "learning_rate": 4.370997385814907e-05, "loss": 0.2985, "step": 55100 }, { "epoch": 0.63, "learning_rate": 4.370769072706309e-05, "loss": 0.2503, "step": 55120 }, { "epoch": 0.63, "learning_rate": 4.370540759597713e-05, "loss": 0.177, "step": 55140 }, { "epoch": 0.63, "learning_rate": 4.370312446489116e-05, "loss": 0.2486, "step": 55160 }, { "epoch": 0.63, "learning_rate": 4.370084133380518e-05, "loss": 0.1046, "step": 55180 }, { "epoch": 0.63, "learning_rate": 4.369855820271921e-05, "loss": 0.2464, "step": 55200 }, { "epoch": 0.63, "learning_rate": 4.369627507163324e-05, "loss": 0.3907, "step": 55220 }, { "epoch": 0.63, "learning_rate": 4.369399194054727e-05, "loss": 0.1758, "step": 55240 }, { "epoch": 0.63, "learning_rate": 4.36917088094613e-05, "loss": 0.0733, "step": 55260 }, { "epoch": 0.63, "learning_rate": 4.368942567837533e-05, "loss": 0.1881, "step": 55280 }, { "epoch": 0.63, "learning_rate": 4.368714254728936e-05, "loss": 0.2173, "step": 55300 }, { "epoch": 0.63, "learning_rate": 4.368485941620338e-05, "loss": 0.2979, "step": 55320 }, { "epoch": 0.63, "learning_rate": 4.368257628511741e-05, "loss": 0.3008, "step": 55340 }, { "epoch": 0.63, "learning_rate": 4.368029315403144e-05, "loss": 0.2424, "step": 55360 }, { "epoch": 0.63, "learning_rate": 4.367801002294547e-05, "loss": 0.1186, "step": 55380 }, { "epoch": 0.63, "learning_rate": 4.36757268918595e-05, "loss": 0.2649, "step": 55400 }, { "epoch": 0.63, "learning_rate": 4.367344376077352e-05, "loss": 0.1601, "step": 55420 }, { "epoch": 0.63, "learning_rate": 4.367116062968756e-05, "loss": 0.1462, "step": 55440 }, { "epoch": 0.63, "learning_rate": 4.366887749860158e-05, "loss": 0.0726, "step": 55460 }, { "epoch": 0.63, "learning_rate": 4.366659436751561e-05, "loss": 0.3875, "step": 55480 }, { "epoch": 0.63, "learning_rate": 4.366431123642964e-05, "loss": 0.4155, "step": 55500 }, { "epoch": 0.63, "learning_rate": 4.366202810534367e-05, "loss": 0.0798, "step": 55520 }, { "epoch": 0.63, "learning_rate": 4.36597449742577e-05, "loss": 0.1642, "step": 55540 }, { "epoch": 0.63, "learning_rate": 4.3657461843171724e-05, "loss": 0.1078, "step": 55560 }, { "epoch": 0.63, "learning_rate": 4.365517871208576e-05, "loss": 0.1802, "step": 55580 }, { "epoch": 0.63, "learning_rate": 4.365289558099978e-05, "loss": 0.5405, "step": 55600 }, { "epoch": 0.63, "learning_rate": 4.365061244991381e-05, "loss": 0.2806, "step": 55620 }, { "epoch": 0.64, "learning_rate": 4.364832931882784e-05, "loss": 0.1424, "step": 55640 }, { "epoch": 0.64, "learning_rate": 4.364604618774187e-05, "loss": 0.4149, "step": 55660 }, { "epoch": 0.64, "learning_rate": 4.36437630566559e-05, "loss": 0.2151, "step": 55680 }, { "epoch": 0.64, "learning_rate": 4.3641479925569925e-05, "loss": 0.5121, "step": 55700 }, { "epoch": 0.64, "learning_rate": 4.3639196794483955e-05, "loss": 0.3701, "step": 55720 }, { "epoch": 0.64, "learning_rate": 4.363691366339799e-05, "loss": 0.0874, "step": 55740 }, { "epoch": 0.64, "learning_rate": 4.3634630532312014e-05, "loss": 0.0913, "step": 55760 }, { "epoch": 0.64, "learning_rate": 4.3632347401226044e-05, "loss": 0.4155, "step": 55780 }, { "epoch": 0.64, "learning_rate": 4.3630064270140066e-05, "loss": 0.2518, "step": 55800 }, { "epoch": 0.64, "learning_rate": 4.36277811390541e-05, "loss": 0.225, "step": 55820 }, { "epoch": 0.64, "learning_rate": 4.362549800796813e-05, "loss": 0.1747, "step": 55840 }, { "epoch": 0.64, "learning_rate": 4.3623214876882156e-05, "loss": 0.2536, "step": 55860 }, { "epoch": 0.64, "learning_rate": 4.362093174579619e-05, "loss": 0.2323, "step": 55880 }, { "epoch": 0.64, "learning_rate": 4.3618648614710215e-05, "loss": 0.1233, "step": 55900 }, { "epoch": 0.64, "learning_rate": 4.3616365483624245e-05, "loss": 0.1029, "step": 55920 }, { "epoch": 0.64, "learning_rate": 4.3614082352538274e-05, "loss": 0.2086, "step": 55940 }, { "epoch": 0.64, "learning_rate": 4.3611799221452304e-05, "loss": 0.1893, "step": 55960 }, { "epoch": 0.64, "learning_rate": 4.3609516090366334e-05, "loss": 0.2606, "step": 55980 }, { "epoch": 0.64, "learning_rate": 4.3607232959280357e-05, "loss": 0.2089, "step": 56000 }, { "epoch": 0.64, "learning_rate": 4.3604949828194386e-05, "loss": 0.1685, "step": 56020 }, { "epoch": 0.64, "learning_rate": 4.3602666697108416e-05, "loss": 0.23, "step": 56040 }, { "epoch": 0.64, "learning_rate": 4.3600383566022446e-05, "loss": 0.1427, "step": 56060 }, { "epoch": 0.64, "learning_rate": 4.3598100434936475e-05, "loss": 0.2631, "step": 56080 }, { "epoch": 0.64, "learning_rate": 4.35958173038505e-05, "loss": 0.2567, "step": 56100 }, { "epoch": 0.64, "learning_rate": 4.3593534172764535e-05, "loss": 0.5654, "step": 56120 }, { "epoch": 0.64, "learning_rate": 4.359125104167856e-05, "loss": 0.1471, "step": 56140 }, { "epoch": 0.64, "learning_rate": 4.358896791059259e-05, "loss": 0.4266, "step": 56160 }, { "epoch": 0.64, "learning_rate": 4.358668477950662e-05, "loss": 0.1186, "step": 56180 }, { "epoch": 0.64, "learning_rate": 4.3584401648420647e-05, "loss": 0.1618, "step": 56200 }, { "epoch": 0.64, "learning_rate": 4.3582118517334676e-05, "loss": 0.1192, "step": 56220 }, { "epoch": 0.64, "learning_rate": 4.35798353862487e-05, "loss": 0.2269, "step": 56240 }, { "epoch": 0.64, "learning_rate": 4.3577552255162736e-05, "loss": 0.144, "step": 56260 }, { "epoch": 0.64, "learning_rate": 4.357526912407676e-05, "loss": 0.2186, "step": 56280 }, { "epoch": 0.64, "learning_rate": 4.357298599299079e-05, "loss": 0.2241, "step": 56300 }, { "epoch": 0.64, "learning_rate": 4.357070286190482e-05, "loss": 0.5093, "step": 56320 }, { "epoch": 0.64, "learning_rate": 4.356841973081885e-05, "loss": 0.1499, "step": 56340 }, { "epoch": 0.64, "learning_rate": 4.356613659973288e-05, "loss": 0.2756, "step": 56360 }, { "epoch": 0.64, "learning_rate": 4.35638534686469e-05, "loss": 0.1168, "step": 56380 }, { "epoch": 0.64, "learning_rate": 4.356157033756093e-05, "loss": 0.2315, "step": 56400 }, { "epoch": 0.64, "learning_rate": 4.3559287206474966e-05, "loss": 0.2648, "step": 56420 }, { "epoch": 0.64, "learning_rate": 4.355700407538899e-05, "loss": 0.2131, "step": 56440 }, { "epoch": 0.64, "learning_rate": 4.355472094430302e-05, "loss": 0.1883, "step": 56460 }, { "epoch": 0.64, "learning_rate": 4.355243781321704e-05, "loss": 0.1727, "step": 56480 }, { "epoch": 0.64, "learning_rate": 4.355015468213108e-05, "loss": 0.168, "step": 56500 }, { "epoch": 0.65, "learning_rate": 4.354787155104511e-05, "loss": 0.1957, "step": 56520 }, { "epoch": 0.65, "learning_rate": 4.354558841995913e-05, "loss": 0.2914, "step": 56540 }, { "epoch": 0.65, "learning_rate": 4.354330528887317e-05, "loss": 0.2001, "step": 56560 }, { "epoch": 0.65, "learning_rate": 4.354102215778719e-05, "loss": 0.0896, "step": 56580 }, { "epoch": 0.65, "learning_rate": 4.353873902670122e-05, "loss": 0.1219, "step": 56600 }, { "epoch": 0.65, "learning_rate": 4.353645589561525e-05, "loss": 0.19, "step": 56620 }, { "epoch": 0.65, "learning_rate": 4.353417276452928e-05, "loss": 0.0542, "step": 56640 }, { "epoch": 0.65, "learning_rate": 4.353188963344331e-05, "loss": 0.1803, "step": 56660 }, { "epoch": 0.65, "learning_rate": 4.352960650235733e-05, "loss": 0.0365, "step": 56680 }, { "epoch": 0.65, "learning_rate": 4.352732337127136e-05, "loss": 0.2513, "step": 56700 }, { "epoch": 0.65, "learning_rate": 4.352504024018539e-05, "loss": 0.1961, "step": 56720 }, { "epoch": 0.65, "learning_rate": 4.352275710909942e-05, "loss": 0.3279, "step": 56740 }, { "epoch": 0.65, "learning_rate": 4.352047397801345e-05, "loss": 0.1139, "step": 56760 }, { "epoch": 0.65, "learning_rate": 4.3518190846927474e-05, "loss": 0.2508, "step": 56780 }, { "epoch": 0.65, "learning_rate": 4.351590771584151e-05, "loss": 0.1482, "step": 56800 }, { "epoch": 0.65, "learning_rate": 4.351362458475553e-05, "loss": 0.202, "step": 56820 }, { "epoch": 0.65, "learning_rate": 4.351134145366956e-05, "loss": 0.3097, "step": 56840 }, { "epoch": 0.65, "learning_rate": 4.35090583225836e-05, "loss": 0.3189, "step": 56860 }, { "epoch": 0.65, "learning_rate": 4.350677519149762e-05, "loss": 0.1733, "step": 56880 }, { "epoch": 0.65, "learning_rate": 4.350449206041165e-05, "loss": 0.1611, "step": 56900 }, { "epoch": 0.65, "learning_rate": 4.3502208929325675e-05, "loss": 0.1506, "step": 56920 }, { "epoch": 0.65, "learning_rate": 4.349992579823971e-05, "loss": 0.2126, "step": 56940 }, { "epoch": 0.65, "learning_rate": 4.349764266715374e-05, "loss": 0.3858, "step": 56960 }, { "epoch": 0.65, "learning_rate": 4.3495359536067764e-05, "loss": 0.3047, "step": 56980 }, { "epoch": 0.65, "learning_rate": 4.349307640498179e-05, "loss": 0.2221, "step": 57000 }, { "epoch": 0.65, "learning_rate": 4.349079327389582e-05, "loss": 0.1124, "step": 57020 }, { "epoch": 0.65, "learning_rate": 4.348851014280985e-05, "loss": 0.4232, "step": 57040 }, { "epoch": 0.65, "learning_rate": 4.348622701172388e-05, "loss": 0.3204, "step": 57060 }, { "epoch": 0.65, "learning_rate": 4.3483943880637905e-05, "loss": 0.116, "step": 57080 }, { "epoch": 0.65, "learning_rate": 4.348166074955194e-05, "loss": 0.1495, "step": 57100 }, { "epoch": 0.65, "learning_rate": 4.3479377618465965e-05, "loss": 0.0997, "step": 57120 }, { "epoch": 0.65, "learning_rate": 4.3477094487379994e-05, "loss": 0.1189, "step": 57140 }, { "epoch": 0.65, "learning_rate": 4.3474811356294024e-05, "loss": 0.1754, "step": 57160 }, { "epoch": 0.65, "learning_rate": 4.3472528225208054e-05, "loss": 0.1678, "step": 57180 }, { "epoch": 0.65, "learning_rate": 4.347024509412208e-05, "loss": 0.2091, "step": 57200 }, { "epoch": 0.65, "learning_rate": 4.3467961963036106e-05, "loss": 0.3482, "step": 57220 }, { "epoch": 0.65, "learning_rate": 4.346567883195014e-05, "loss": 0.2054, "step": 57240 }, { "epoch": 0.65, "learning_rate": 4.3463395700864166e-05, "loss": 0.1988, "step": 57260 }, { "epoch": 0.65, "learning_rate": 4.3461112569778195e-05, "loss": 0.2169, "step": 57280 }, { "epoch": 0.65, "learning_rate": 4.3458829438692225e-05, "loss": 0.3158, "step": 57300 }, { "epoch": 0.65, "learning_rate": 4.3456546307606255e-05, "loss": 0.1917, "step": 57320 }, { "epoch": 0.65, "learning_rate": 4.3454263176520284e-05, "loss": 0.3525, "step": 57340 }, { "epoch": 0.65, "learning_rate": 4.345198004543431e-05, "loss": 0.2114, "step": 57360 }, { "epoch": 0.66, "learning_rate": 4.344969691434834e-05, "loss": 0.3883, "step": 57380 }, { "epoch": 0.66, "learning_rate": 4.3447413783262367e-05, "loss": 0.6687, "step": 57400 }, { "epoch": 0.66, "learning_rate": 4.3445130652176396e-05, "loss": 0.3013, "step": 57420 }, { "epoch": 0.66, "learning_rate": 4.3442847521090426e-05, "loss": 0.1865, "step": 57440 }, { "epoch": 0.66, "learning_rate": 4.3440564390004456e-05, "loss": 0.3527, "step": 57460 }, { "epoch": 0.66, "learning_rate": 4.3438281258918485e-05, "loss": 0.2254, "step": 57480 }, { "epoch": 0.66, "learning_rate": 4.343599812783251e-05, "loss": 0.2446, "step": 57500 }, { "epoch": 0.66, "learning_rate": 4.343371499674654e-05, "loss": 0.1004, "step": 57520 }, { "epoch": 0.66, "learning_rate": 4.3431431865660574e-05, "loss": 0.1626, "step": 57540 }, { "epoch": 0.66, "learning_rate": 4.34291487345746e-05, "loss": 0.1901, "step": 57560 }, { "epoch": 0.66, "learning_rate": 4.342686560348863e-05, "loss": 0.2267, "step": 57580 }, { "epoch": 0.66, "learning_rate": 4.342458247240265e-05, "loss": 0.3725, "step": 57600 }, { "epoch": 0.66, "learning_rate": 4.3422299341316686e-05, "loss": 0.1182, "step": 57620 }, { "epoch": 0.66, "learning_rate": 4.3420016210230716e-05, "loss": 0.2045, "step": 57640 }, { "epoch": 0.66, "learning_rate": 4.341773307914474e-05, "loss": 0.2729, "step": 57660 }, { "epoch": 0.66, "learning_rate": 4.341544994805877e-05, "loss": 0.3926, "step": 57680 }, { "epoch": 0.66, "learning_rate": 4.34131668169728e-05, "loss": 0.1217, "step": 57700 }, { "epoch": 0.66, "learning_rate": 4.341088368588683e-05, "loss": 0.4261, "step": 57720 }, { "epoch": 0.66, "learning_rate": 4.340860055480086e-05, "loss": 0.0521, "step": 57740 }, { "epoch": 0.66, "learning_rate": 4.340631742371489e-05, "loss": 0.2476, "step": 57760 }, { "epoch": 0.66, "learning_rate": 4.340403429262892e-05, "loss": 0.1419, "step": 57780 }, { "epoch": 0.66, "learning_rate": 4.340175116154294e-05, "loss": 0.0879, "step": 57800 }, { "epoch": 0.66, "learning_rate": 4.339946803045697e-05, "loss": 0.1876, "step": 57820 }, { "epoch": 0.66, "learning_rate": 4.3397184899371e-05, "loss": 0.2103, "step": 57840 }, { "epoch": 0.66, "learning_rate": 4.339490176828503e-05, "loss": 0.3682, "step": 57860 }, { "epoch": 0.66, "learning_rate": 4.339261863719906e-05, "loss": 0.2293, "step": 57880 }, { "epoch": 0.66, "learning_rate": 4.339033550611308e-05, "loss": 0.1447, "step": 57900 }, { "epoch": 0.66, "learning_rate": 4.338805237502712e-05, "loss": 0.1431, "step": 57920 }, { "epoch": 0.66, "learning_rate": 4.338576924394114e-05, "loss": 0.1158, "step": 57940 }, { "epoch": 0.66, "learning_rate": 4.338348611285517e-05, "loss": 0.1297, "step": 57960 }, { "epoch": 0.66, "learning_rate": 4.33812029817692e-05, "loss": 0.089, "step": 57980 }, { "epoch": 0.66, "learning_rate": 4.337891985068323e-05, "loss": 0.3528, "step": 58000 }, { "epoch": 0.66, "learning_rate": 4.337663671959726e-05, "loss": 0.4588, "step": 58020 }, { "epoch": 0.66, "learning_rate": 4.337435358851128e-05, "loss": 0.155, "step": 58040 }, { "epoch": 0.66, "learning_rate": 4.337207045742532e-05, "loss": 0.2236, "step": 58060 }, { "epoch": 0.66, "learning_rate": 4.336978732633934e-05, "loss": 0.3604, "step": 58080 }, { "epoch": 0.66, "learning_rate": 4.336750419525337e-05, "loss": 0.2948, "step": 58100 }, { "epoch": 0.66, "learning_rate": 4.33652210641674e-05, "loss": 0.168, "step": 58120 }, { "epoch": 0.66, "learning_rate": 4.336293793308143e-05, "loss": 0.1351, "step": 58140 }, { "epoch": 0.66, "learning_rate": 4.336065480199546e-05, "loss": 0.0961, "step": 58160 }, { "epoch": 0.66, "learning_rate": 4.3358371670909484e-05, "loss": 0.3046, "step": 58180 }, { "epoch": 0.66, "learning_rate": 4.335608853982351e-05, "loss": 0.1748, "step": 58200 }, { "epoch": 0.66, "learning_rate": 4.335380540873755e-05, "loss": 0.2155, "step": 58220 }, { "epoch": 0.66, "learning_rate": 4.335152227765157e-05, "loss": 0.1091, "step": 58240 }, { "epoch": 0.67, "learning_rate": 4.33492391465656e-05, "loss": 0.1906, "step": 58260 }, { "epoch": 0.67, "learning_rate": 4.3346956015479625e-05, "loss": 0.1875, "step": 58280 }, { "epoch": 0.67, "learning_rate": 4.334467288439366e-05, "loss": 0.1183, "step": 58300 }, { "epoch": 0.67, "learning_rate": 4.334238975330769e-05, "loss": 0.4013, "step": 58320 }, { "epoch": 0.67, "learning_rate": 4.3340106622221714e-05, "loss": 0.0229, "step": 58340 }, { "epoch": 0.67, "learning_rate": 4.333782349113575e-05, "loss": 0.1453, "step": 58360 }, { "epoch": 0.67, "learning_rate": 4.3335540360049774e-05, "loss": 0.341, "step": 58380 }, { "epoch": 0.67, "learning_rate": 4.33332572289638e-05, "loss": 0.2918, "step": 58400 }, { "epoch": 0.67, "learning_rate": 4.333097409787783e-05, "loss": 0.2362, "step": 58420 }, { "epoch": 0.67, "learning_rate": 4.332869096679186e-05, "loss": 0.1772, "step": 58440 }, { "epoch": 0.67, "learning_rate": 4.332640783570589e-05, "loss": 0.2317, "step": 58460 }, { "epoch": 0.67, "learning_rate": 4.3324124704619915e-05, "loss": 0.0773, "step": 58480 }, { "epoch": 0.67, "learning_rate": 4.3321841573533945e-05, "loss": 0.2781, "step": 58500 }, { "epoch": 0.67, "learning_rate": 4.3319558442447975e-05, "loss": 0.544, "step": 58520 }, { "epoch": 0.67, "learning_rate": 4.3317275311362004e-05, "loss": 0.1427, "step": 58540 }, { "epoch": 0.67, "learning_rate": 4.3314992180276034e-05, "loss": 0.3516, "step": 58560 }, { "epoch": 0.67, "learning_rate": 4.331270904919006e-05, "loss": 0.2731, "step": 58580 }, { "epoch": 0.67, "learning_rate": 4.331042591810409e-05, "loss": 0.1919, "step": 58600 }, { "epoch": 0.67, "learning_rate": 4.3308142787018116e-05, "loss": 0.1195, "step": 58620 }, { "epoch": 0.67, "learning_rate": 4.3305859655932146e-05, "loss": 0.3083, "step": 58640 }, { "epoch": 0.67, "learning_rate": 4.3303576524846176e-05, "loss": 0.1291, "step": 58660 }, { "epoch": 0.67, "learning_rate": 4.3301293393760205e-05, "loss": 0.2774, "step": 58680 }, { "epoch": 0.67, "learning_rate": 4.3299010262674235e-05, "loss": 0.1885, "step": 58700 }, { "epoch": 0.67, "learning_rate": 4.329672713158826e-05, "loss": 0.1211, "step": 58720 }, { "epoch": 0.67, "learning_rate": 4.3294444000502294e-05, "loss": 0.1914, "step": 58740 }, { "epoch": 0.67, "learning_rate": 4.329216086941632e-05, "loss": 0.3424, "step": 58760 }, { "epoch": 0.67, "learning_rate": 4.328987773833035e-05, "loss": 0.0807, "step": 58780 }, { "epoch": 0.67, "learning_rate": 4.328759460724438e-05, "loss": 0.0952, "step": 58800 }, { "epoch": 0.67, "learning_rate": 4.3285311476158406e-05, "loss": 0.2713, "step": 58820 }, { "epoch": 0.67, "learning_rate": 4.3283028345072436e-05, "loss": 0.4425, "step": 58840 }, { "epoch": 0.67, "learning_rate": 4.328074521398646e-05, "loss": 0.2371, "step": 58860 }, { "epoch": 0.67, "learning_rate": 4.327846208290049e-05, "loss": 0.1756, "step": 58880 }, { "epoch": 0.67, "learning_rate": 4.3276178951814525e-05, "loss": 0.1749, "step": 58900 }, { "epoch": 0.67, "learning_rate": 4.327389582072855e-05, "loss": 0.0514, "step": 58920 }, { "epoch": 0.67, "learning_rate": 4.327161268964258e-05, "loss": 0.2311, "step": 58940 }, { "epoch": 0.67, "learning_rate": 4.32693295585566e-05, "loss": 0.3595, "step": 58960 }, { "epoch": 0.67, "learning_rate": 4.326704642747064e-05, "loss": 0.3019, "step": 58980 }, { "epoch": 0.67, "learning_rate": 4.326476329638467e-05, "loss": 0.3227, "step": 59000 }, { "epoch": 0.67, "learning_rate": 4.326248016529869e-05, "loss": 0.2815, "step": 59020 }, { "epoch": 0.67, "learning_rate": 4.3260197034212726e-05, "loss": 0.1095, "step": 59040 }, { "epoch": 0.67, "learning_rate": 4.325791390312675e-05, "loss": 0.2313, "step": 59060 }, { "epoch": 0.67, "learning_rate": 4.325563077204078e-05, "loss": 0.2234, "step": 59080 }, { "epoch": 0.67, "learning_rate": 4.325334764095481e-05, "loss": 0.3171, "step": 59100 }, { "epoch": 0.67, "learning_rate": 4.325106450986884e-05, "loss": 0.3376, "step": 59120 }, { "epoch": 0.68, "learning_rate": 4.324878137878287e-05, "loss": 0.1913, "step": 59140 }, { "epoch": 0.68, "learning_rate": 4.324649824769689e-05, "loss": 0.1885, "step": 59160 }, { "epoch": 0.68, "learning_rate": 4.324421511661092e-05, "loss": 0.1544, "step": 59180 }, { "epoch": 0.68, "learning_rate": 4.324193198552495e-05, "loss": 0.2292, "step": 59200 }, { "epoch": 0.68, "learning_rate": 4.323964885443898e-05, "loss": 0.139, "step": 59220 }, { "epoch": 0.68, "learning_rate": 4.323736572335301e-05, "loss": 0.5391, "step": 59240 }, { "epoch": 0.68, "learning_rate": 4.323508259226703e-05, "loss": 0.1018, "step": 59260 }, { "epoch": 0.68, "learning_rate": 4.323279946118107e-05, "loss": 0.0753, "step": 59280 }, { "epoch": 0.68, "learning_rate": 4.323051633009509e-05, "loss": 0.3188, "step": 59300 }, { "epoch": 0.68, "learning_rate": 4.322823319900912e-05, "loss": 0.4566, "step": 59320 }, { "epoch": 0.68, "learning_rate": 4.322595006792316e-05, "loss": 0.1387, "step": 59340 }, { "epoch": 0.68, "learning_rate": 4.322366693683718e-05, "loss": 0.0953, "step": 59360 }, { "epoch": 0.68, "learning_rate": 4.322138380575121e-05, "loss": 0.5192, "step": 59380 }, { "epoch": 0.68, "learning_rate": 4.321910067466523e-05, "loss": 0.2948, "step": 59400 }, { "epoch": 0.68, "learning_rate": 4.321681754357927e-05, "loss": 0.6115, "step": 59420 }, { "epoch": 0.68, "learning_rate": 4.32145344124933e-05, "loss": 0.3052, "step": 59440 }, { "epoch": 0.68, "learning_rate": 4.321225128140732e-05, "loss": 0.2326, "step": 59460 }, { "epoch": 0.68, "learning_rate": 4.320996815032135e-05, "loss": 0.1123, "step": 59480 }, { "epoch": 0.68, "learning_rate": 4.320768501923538e-05, "loss": 0.1094, "step": 59500 }, { "epoch": 0.68, "learning_rate": 4.320540188814941e-05, "loss": 0.1788, "step": 59520 }, { "epoch": 0.68, "learning_rate": 4.320311875706344e-05, "loss": 0.0751, "step": 59540 }, { "epoch": 0.68, "learning_rate": 4.3200835625977464e-05, "loss": 0.2296, "step": 59560 }, { "epoch": 0.68, "learning_rate": 4.31985524948915e-05, "loss": 0.3327, "step": 59580 }, { "epoch": 0.68, "learning_rate": 4.319626936380552e-05, "loss": 0.3054, "step": 59600 }, { "epoch": 0.68, "learning_rate": 4.319398623271955e-05, "loss": 0.2997, "step": 59620 }, { "epoch": 0.68, "learning_rate": 4.319170310163358e-05, "loss": 0.2205, "step": 59640 }, { "epoch": 0.68, "learning_rate": 4.318941997054761e-05, "loss": 0.2414, "step": 59660 }, { "epoch": 0.68, "learning_rate": 4.318713683946164e-05, "loss": 0.2213, "step": 59680 }, { "epoch": 0.68, "learning_rate": 4.3184853708375665e-05, "loss": 0.2444, "step": 59700 }, { "epoch": 0.68, "learning_rate": 4.31825705772897e-05, "loss": 0.0835, "step": 59720 }, { "epoch": 0.68, "learning_rate": 4.3180287446203724e-05, "loss": 0.2586, "step": 59740 }, { "epoch": 0.68, "learning_rate": 4.3178004315117754e-05, "loss": 0.2909, "step": 59760 }, { "epoch": 0.68, "learning_rate": 4.3175721184031784e-05, "loss": 0.098, "step": 59780 }, { "epoch": 0.68, "learning_rate": 4.317343805294581e-05, "loss": 0.0411, "step": 59800 }, { "epoch": 0.68, "learning_rate": 4.317115492185984e-05, "loss": 0.1567, "step": 59820 }, { "epoch": 0.68, "learning_rate": 4.3168871790773866e-05, "loss": 0.2702, "step": 59840 }, { "epoch": 0.68, "learning_rate": 4.3166588659687896e-05, "loss": 0.1174, "step": 59860 }, { "epoch": 0.68, "learning_rate": 4.3164305528601925e-05, "loss": 0.7292, "step": 59880 }, { "epoch": 0.68, "learning_rate": 4.3162022397515955e-05, "loss": 0.3915, "step": 59900 }, { "epoch": 0.68, "learning_rate": 4.3159739266429985e-05, "loss": 0.1718, "step": 59920 }, { "epoch": 0.68, "learning_rate": 4.3157456135344014e-05, "loss": 0.4242, "step": 59940 }, { "epoch": 0.68, "learning_rate": 4.3155173004258044e-05, "loss": 0.4418, "step": 59960 }, { "epoch": 0.68, "learning_rate": 4.315288987317207e-05, "loss": 0.4777, "step": 59980 }, { "epoch": 0.68, "learning_rate": 4.3150606742086097e-05, "loss": 0.1333, "step": 60000 }, { "epoch": 0.69, "learning_rate": 4.314832361100013e-05, "loss": 0.1708, "step": 60020 }, { "epoch": 0.69, "learning_rate": 4.3146040479914156e-05, "loss": 0.1016, "step": 60040 }, { "epoch": 0.69, "learning_rate": 4.3143757348828186e-05, "loss": 0.2343, "step": 60060 }, { "epoch": 0.69, "learning_rate": 4.314147421774221e-05, "loss": 0.347, "step": 60080 }, { "epoch": 0.69, "learning_rate": 4.3139191086656245e-05, "loss": 0.4232, "step": 60100 }, { "epoch": 0.69, "learning_rate": 4.3136907955570275e-05, "loss": 0.2412, "step": 60120 }, { "epoch": 0.69, "learning_rate": 4.31346248244843e-05, "loss": 0.0833, "step": 60140 }, { "epoch": 0.69, "learning_rate": 4.313234169339833e-05, "loss": 0.2546, "step": 60160 }, { "epoch": 0.69, "learning_rate": 4.313005856231236e-05, "loss": 0.341, "step": 60180 }, { "epoch": 0.69, "learning_rate": 4.312777543122639e-05, "loss": 0.3153, "step": 60200 }, { "epoch": 0.69, "learning_rate": 4.3125492300140416e-05, "loss": 0.1645, "step": 60220 }, { "epoch": 0.69, "learning_rate": 4.3123209169054446e-05, "loss": 0.4471, "step": 60240 }, { "epoch": 0.69, "learning_rate": 4.3120926037968476e-05, "loss": 0.1596, "step": 60260 }, { "epoch": 0.69, "learning_rate": 4.31186429068825e-05, "loss": 0.23, "step": 60280 }, { "epoch": 0.69, "learning_rate": 4.311635977579653e-05, "loss": 0.2107, "step": 60300 }, { "epoch": 0.69, "learning_rate": 4.311407664471056e-05, "loss": 0.1172, "step": 60320 }, { "epoch": 0.69, "learning_rate": 4.311179351362459e-05, "loss": 0.1819, "step": 60340 }, { "epoch": 0.69, "learning_rate": 4.310951038253862e-05, "loss": 0.1169, "step": 60360 }, { "epoch": 0.69, "learning_rate": 4.310722725145264e-05, "loss": 0.1573, "step": 60380 }, { "epoch": 0.69, "learning_rate": 4.310494412036668e-05, "loss": 0.3077, "step": 60400 }, { "epoch": 0.69, "learning_rate": 4.31026609892807e-05, "loss": 0.1357, "step": 60420 }, { "epoch": 0.69, "learning_rate": 4.310037785819473e-05, "loss": 0.034, "step": 60440 }, { "epoch": 0.69, "learning_rate": 4.309809472710876e-05, "loss": 0.0775, "step": 60460 }, { "epoch": 0.69, "learning_rate": 4.309581159602279e-05, "loss": 0.2763, "step": 60480 }, { "epoch": 0.69, "learning_rate": 4.309352846493682e-05, "loss": 0.173, "step": 60500 }, { "epoch": 0.69, "learning_rate": 4.309124533385084e-05, "loss": 0.2684, "step": 60520 }, { "epoch": 0.69, "learning_rate": 4.308896220276488e-05, "loss": 0.3466, "step": 60540 }, { "epoch": 0.69, "learning_rate": 4.30866790716789e-05, "loss": 0.2296, "step": 60560 }, { "epoch": 0.69, "learning_rate": 4.308439594059293e-05, "loss": 0.196, "step": 60580 }, { "epoch": 0.69, "learning_rate": 4.308211280950696e-05, "loss": 0.1451, "step": 60600 }, { "epoch": 0.69, "learning_rate": 4.307982967842099e-05, "loss": 0.2715, "step": 60620 }, { "epoch": 0.69, "learning_rate": 4.307754654733502e-05, "loss": 0.0701, "step": 60640 }, { "epoch": 0.69, "learning_rate": 4.307526341624904e-05, "loss": 0.2464, "step": 60660 }, { "epoch": 0.69, "learning_rate": 4.307298028516307e-05, "loss": 0.3318, "step": 60680 }, { "epoch": 0.69, "learning_rate": 4.307069715407711e-05, "loss": 0.0474, "step": 60700 }, { "epoch": 0.69, "learning_rate": 4.306841402299113e-05, "loss": 0.1687, "step": 60720 }, { "epoch": 0.69, "learning_rate": 4.306613089190516e-05, "loss": 0.12, "step": 60740 }, { "epoch": 0.69, "learning_rate": 4.3063847760819184e-05, "loss": 0.0779, "step": 60760 }, { "epoch": 0.69, "learning_rate": 4.306156462973322e-05, "loss": 0.3565, "step": 60780 }, { "epoch": 0.69, "learning_rate": 4.305928149864725e-05, "loss": 0.2832, "step": 60800 }, { "epoch": 0.69, "learning_rate": 4.305699836756127e-05, "loss": 0.1174, "step": 60820 }, { "epoch": 0.69, "learning_rate": 4.305471523647531e-05, "loss": 0.2425, "step": 60840 }, { "epoch": 0.69, "learning_rate": 4.305243210538933e-05, "loss": 0.2878, "step": 60860 }, { "epoch": 0.69, "learning_rate": 4.305014897430336e-05, "loss": 0.2095, "step": 60880 }, { "epoch": 0.7, "learning_rate": 4.304786584321739e-05, "loss": 0.331, "step": 60900 }, { "epoch": 0.7, "learning_rate": 4.304558271213142e-05, "loss": 0.1906, "step": 60920 }, { "epoch": 0.7, "learning_rate": 4.304329958104545e-05, "loss": 0.1292, "step": 60940 }, { "epoch": 0.7, "learning_rate": 4.3041016449959474e-05, "loss": 0.2002, "step": 60960 }, { "epoch": 0.7, "learning_rate": 4.3038733318873504e-05, "loss": 0.376, "step": 60980 }, { "epoch": 0.7, "learning_rate": 4.303645018778753e-05, "loss": 0.2131, "step": 61000 }, { "epoch": 0.7, "learning_rate": 4.303416705670156e-05, "loss": 0.2984, "step": 61020 }, { "epoch": 0.7, "learning_rate": 4.303188392561559e-05, "loss": 0.2484, "step": 61040 }, { "epoch": 0.7, "learning_rate": 4.3029600794529616e-05, "loss": 0.5938, "step": 61060 }, { "epoch": 0.7, "learning_rate": 4.302731766344365e-05, "loss": 0.1987, "step": 61080 }, { "epoch": 0.7, "learning_rate": 4.3025034532357675e-05, "loss": 0.4329, "step": 61100 }, { "epoch": 0.7, "learning_rate": 4.3022751401271705e-05, "loss": 0.1308, "step": 61120 }, { "epoch": 0.7, "learning_rate": 4.3020468270185734e-05, "loss": 0.1888, "step": 61140 }, { "epoch": 0.7, "learning_rate": 4.3018185139099764e-05, "loss": 0.0939, "step": 61160 }, { "epoch": 0.7, "learning_rate": 4.3015902008013794e-05, "loss": 0.1713, "step": 61180 }, { "epoch": 0.7, "learning_rate": 4.3013618876927817e-05, "loss": 0.2873, "step": 61200 }, { "epoch": 0.7, "learning_rate": 4.301133574584185e-05, "loss": 0.2019, "step": 61220 }, { "epoch": 0.7, "learning_rate": 4.3009052614755876e-05, "loss": 0.3477, "step": 61240 }, { "epoch": 0.7, "learning_rate": 4.3006769483669906e-05, "loss": 0.2551, "step": 61260 }, { "epoch": 0.7, "learning_rate": 4.3004486352583935e-05, "loss": 0.1468, "step": 61280 }, { "epoch": 0.7, "learning_rate": 4.3002203221497965e-05, "loss": 0.3717, "step": 61300 }, { "epoch": 0.7, "learning_rate": 4.2999920090411995e-05, "loss": 0.2216, "step": 61320 }, { "epoch": 0.7, "learning_rate": 4.299763695932602e-05, "loss": 0.1098, "step": 61340 }, { "epoch": 0.7, "learning_rate": 4.299535382824005e-05, "loss": 0.1245, "step": 61360 }, { "epoch": 0.7, "learning_rate": 4.2993070697154084e-05, "loss": 0.2212, "step": 61380 }, { "epoch": 0.7, "learning_rate": 4.299078756606811e-05, "loss": 0.3202, "step": 61400 }, { "epoch": 0.7, "learning_rate": 4.2988504434982136e-05, "loss": 0.4724, "step": 61420 }, { "epoch": 0.7, "learning_rate": 4.298622130389616e-05, "loss": 0.0715, "step": 61440 }, { "epoch": 0.7, "learning_rate": 4.2983938172810196e-05, "loss": 0.3284, "step": 61460 }, { "epoch": 0.7, "learning_rate": 4.2981655041724225e-05, "loss": 0.2294, "step": 61480 }, { "epoch": 0.7, "learning_rate": 4.297937191063825e-05, "loss": 0.1472, "step": 61500 }, { "epoch": 0.7, "learning_rate": 4.2977088779552285e-05, "loss": 0.1619, "step": 61520 }, { "epoch": 0.7, "learning_rate": 4.297480564846631e-05, "loss": 0.1639, "step": 61540 }, { "epoch": 0.7, "learning_rate": 4.297252251738034e-05, "loss": 0.2079, "step": 61560 }, { "epoch": 0.7, "learning_rate": 4.297023938629437e-05, "loss": 0.1844, "step": 61580 }, { "epoch": 0.7, "learning_rate": 4.29679562552084e-05, "loss": 0.1742, "step": 61600 }, { "epoch": 0.7, "learning_rate": 4.2965673124122426e-05, "loss": 0.3588, "step": 61620 }, { "epoch": 0.7, "learning_rate": 4.296338999303645e-05, "loss": 0.1477, "step": 61640 }, { "epoch": 0.7, "learning_rate": 4.296110686195048e-05, "loss": 0.2169, "step": 61660 }, { "epoch": 0.7, "learning_rate": 4.295882373086451e-05, "loss": 0.2998, "step": 61680 }, { "epoch": 0.7, "learning_rate": 4.295654059977854e-05, "loss": 0.3848, "step": 61700 }, { "epoch": 0.7, "learning_rate": 4.295425746869257e-05, "loss": 0.1535, "step": 61720 }, { "epoch": 0.7, "learning_rate": 4.295197433760659e-05, "loss": 0.3887, "step": 61740 }, { "epoch": 0.71, "learning_rate": 4.294969120652063e-05, "loss": 0.1387, "step": 61760 }, { "epoch": 0.71, "learning_rate": 4.294740807543465e-05, "loss": 0.5255, "step": 61780 }, { "epoch": 0.71, "learning_rate": 4.294512494434868e-05, "loss": 0.2052, "step": 61800 }, { "epoch": 0.71, "learning_rate": 4.2942841813262716e-05, "loss": 0.3125, "step": 61820 }, { "epoch": 0.71, "learning_rate": 4.294055868217674e-05, "loss": 0.1834, "step": 61840 }, { "epoch": 0.71, "learning_rate": 4.293827555109077e-05, "loss": 0.223, "step": 61860 }, { "epoch": 0.71, "learning_rate": 4.293599242000479e-05, "loss": 0.0656, "step": 61880 }, { "epoch": 0.71, "learning_rate": 4.293370928891883e-05, "loss": 0.1653, "step": 61900 }, { "epoch": 0.71, "learning_rate": 4.293142615783286e-05, "loss": 0.296, "step": 61920 }, { "epoch": 0.71, "learning_rate": 4.292914302674688e-05, "loss": 0.0429, "step": 61940 }, { "epoch": 0.71, "learning_rate": 4.292685989566091e-05, "loss": 0.0819, "step": 61960 }, { "epoch": 0.71, "learning_rate": 4.292457676457494e-05, "loss": 0.3243, "step": 61980 }, { "epoch": 0.71, "learning_rate": 4.292229363348897e-05, "loss": 0.6354, "step": 62000 }, { "epoch": 0.71, "learning_rate": 4.2920010502403e-05, "loss": 0.0957, "step": 62020 }, { "epoch": 0.71, "learning_rate": 4.291772737131702e-05, "loss": 0.6422, "step": 62040 }, { "epoch": 0.71, "learning_rate": 4.291544424023106e-05, "loss": 0.3118, "step": 62060 }, { "epoch": 0.71, "learning_rate": 4.291316110914508e-05, "loss": 0.67, "step": 62080 }, { "epoch": 0.71, "learning_rate": 4.291087797805911e-05, "loss": 0.136, "step": 62100 }, { "epoch": 0.71, "learning_rate": 4.290859484697314e-05, "loss": 0.1355, "step": 62120 }, { "epoch": 0.71, "learning_rate": 4.290631171588717e-05, "loss": 0.2393, "step": 62140 }, { "epoch": 0.71, "learning_rate": 4.29040285848012e-05, "loss": 0.2584, "step": 62160 }, { "epoch": 0.71, "learning_rate": 4.2901745453715224e-05, "loss": 0.1367, "step": 62180 }, { "epoch": 0.71, "learning_rate": 4.289946232262926e-05, "loss": 0.0772, "step": 62200 }, { "epoch": 0.71, "learning_rate": 4.289717919154328e-05, "loss": 0.2051, "step": 62220 }, { "epoch": 0.71, "learning_rate": 4.289489606045731e-05, "loss": 0.2497, "step": 62240 }, { "epoch": 0.71, "learning_rate": 4.289261292937134e-05, "loss": 0.3714, "step": 62260 }, { "epoch": 0.71, "learning_rate": 4.289032979828537e-05, "loss": 0.1057, "step": 62280 }, { "epoch": 0.71, "learning_rate": 4.28880466671994e-05, "loss": 0.1361, "step": 62300 }, { "epoch": 0.71, "learning_rate": 4.2885763536113425e-05, "loss": 0.2016, "step": 62320 }, { "epoch": 0.71, "learning_rate": 4.2883480405027454e-05, "loss": 0.2316, "step": 62340 }, { "epoch": 0.71, "learning_rate": 4.2881197273941484e-05, "loss": 0.3338, "step": 62360 }, { "epoch": 0.71, "learning_rate": 4.2878914142855514e-05, "loss": 0.2382, "step": 62380 }, { "epoch": 0.71, "learning_rate": 4.287663101176954e-05, "loss": 0.524, "step": 62400 }, { "epoch": 0.71, "learning_rate": 4.287434788068357e-05, "loss": 0.1421, "step": 62420 }, { "epoch": 0.71, "learning_rate": 4.28720647495976e-05, "loss": 0.2898, "step": 62440 }, { "epoch": 0.71, "learning_rate": 4.2869781618511626e-05, "loss": 0.2508, "step": 62460 }, { "epoch": 0.71, "learning_rate": 4.2867498487425655e-05, "loss": 0.161, "step": 62480 }, { "epoch": 0.71, "learning_rate": 4.286521535633969e-05, "loss": 0.0724, "step": 62500 }, { "epoch": 0.71, "learning_rate": 4.2862932225253715e-05, "loss": 0.2155, "step": 62520 }, { "epoch": 0.71, "learning_rate": 4.2860649094167744e-05, "loss": 0.5088, "step": 62540 }, { "epoch": 0.71, "learning_rate": 4.285836596308177e-05, "loss": 0.2969, "step": 62560 }, { "epoch": 0.71, "learning_rate": 4.2856082831995804e-05, "loss": 0.0658, "step": 62580 }, { "epoch": 0.71, "learning_rate": 4.2853799700909833e-05, "loss": 0.216, "step": 62600 }, { "epoch": 0.71, "learning_rate": 4.2851516569823856e-05, "loss": 0.1677, "step": 62620 }, { "epoch": 0.72, "learning_rate": 4.2849233438737886e-05, "loss": 0.2722, "step": 62640 }, { "epoch": 0.72, "learning_rate": 4.2846950307651916e-05, "loss": 0.0939, "step": 62660 }, { "epoch": 0.72, "learning_rate": 4.2844667176565945e-05, "loss": 0.3411, "step": 62680 }, { "epoch": 0.72, "learning_rate": 4.2842384045479975e-05, "loss": 0.2025, "step": 62700 }, { "epoch": 0.72, "learning_rate": 4.2840100914394005e-05, "loss": 0.1543, "step": 62720 }, { "epoch": 0.72, "learning_rate": 4.2837817783308034e-05, "loss": 0.1558, "step": 62740 }, { "epoch": 0.72, "learning_rate": 4.283553465222206e-05, "loss": 0.551, "step": 62760 }, { "epoch": 0.72, "learning_rate": 4.283325152113609e-05, "loss": 0.1756, "step": 62780 }, { "epoch": 0.72, "learning_rate": 4.283096839005012e-05, "loss": 0.3755, "step": 62800 }, { "epoch": 0.72, "learning_rate": 4.2828685258964146e-05, "loss": 0.0472, "step": 62820 }, { "epoch": 0.72, "learning_rate": 4.2826402127878176e-05, "loss": 0.242, "step": 62840 }, { "epoch": 0.72, "learning_rate": 4.28241189967922e-05, "loss": 0.3758, "step": 62860 }, { "epoch": 0.72, "learning_rate": 4.2821835865706235e-05, "loss": 0.4974, "step": 62880 }, { "epoch": 0.72, "learning_rate": 4.281955273462026e-05, "loss": 0.2881, "step": 62900 }, { "epoch": 0.72, "learning_rate": 4.281726960353429e-05, "loss": 0.1252, "step": 62920 }, { "epoch": 0.72, "learning_rate": 4.281498647244832e-05, "loss": 0.151, "step": 62940 }, { "epoch": 0.72, "learning_rate": 4.281270334136235e-05, "loss": 0.2417, "step": 62960 }, { "epoch": 0.72, "learning_rate": 4.281042021027638e-05, "loss": 0.1121, "step": 62980 }, { "epoch": 0.72, "learning_rate": 4.28081370791904e-05, "loss": 0.1488, "step": 63000 }, { "epoch": 0.72, "learning_rate": 4.2805853948104436e-05, "loss": 0.195, "step": 63020 }, { "epoch": 0.72, "learning_rate": 4.280357081701846e-05, "loss": 0.3825, "step": 63040 }, { "epoch": 0.72, "learning_rate": 4.280128768593249e-05, "loss": 0.1935, "step": 63060 }, { "epoch": 0.72, "learning_rate": 4.279900455484652e-05, "loss": 0.1693, "step": 63080 }, { "epoch": 0.72, "learning_rate": 4.279672142376055e-05, "loss": 0.1804, "step": 63100 }, { "epoch": 0.72, "learning_rate": 4.279443829267458e-05, "loss": 0.189, "step": 63120 }, { "epoch": 0.72, "learning_rate": 4.27921551615886e-05, "loss": 0.1329, "step": 63140 }, { "epoch": 0.72, "learning_rate": 4.278987203050263e-05, "loss": 0.1324, "step": 63160 }, { "epoch": 0.72, "learning_rate": 4.278758889941667e-05, "loss": 0.3488, "step": 63180 }, { "epoch": 0.72, "learning_rate": 4.278530576833069e-05, "loss": 0.1703, "step": 63200 }, { "epoch": 0.72, "learning_rate": 4.278302263724472e-05, "loss": 0.1702, "step": 63220 }, { "epoch": 0.72, "learning_rate": 4.278073950615874e-05, "loss": 0.3981, "step": 63240 }, { "epoch": 0.72, "learning_rate": 4.277845637507278e-05, "loss": 0.4388, "step": 63260 }, { "epoch": 0.72, "learning_rate": 4.277617324398681e-05, "loss": 0.165, "step": 63280 }, { "epoch": 0.72, "learning_rate": 4.277389011290083e-05, "loss": 0.1923, "step": 63300 }, { "epoch": 0.72, "learning_rate": 4.277160698181487e-05, "loss": 0.1422, "step": 63320 }, { "epoch": 0.72, "learning_rate": 4.276932385072889e-05, "loss": 0.1188, "step": 63340 }, { "epoch": 0.72, "learning_rate": 4.276704071964292e-05, "loss": 0.1535, "step": 63360 }, { "epoch": 0.72, "learning_rate": 4.276475758855695e-05, "loss": 0.116, "step": 63380 }, { "epoch": 0.72, "learning_rate": 4.276247445747098e-05, "loss": 0.2497, "step": 63400 }, { "epoch": 0.72, "learning_rate": 4.276019132638501e-05, "loss": 0.3348, "step": 63420 }, { "epoch": 0.72, "learning_rate": 4.275790819529903e-05, "loss": 0.2112, "step": 63440 }, { "epoch": 0.72, "learning_rate": 4.275562506421306e-05, "loss": 0.3126, "step": 63460 }, { "epoch": 0.72, "learning_rate": 4.275334193312709e-05, "loss": 0.2668, "step": 63480 }, { "epoch": 0.72, "learning_rate": 4.275105880204112e-05, "loss": 0.2419, "step": 63500 }, { "epoch": 0.73, "learning_rate": 4.274877567095515e-05, "loss": 0.4768, "step": 63520 }, { "epoch": 0.73, "learning_rate": 4.2746492539869174e-05, "loss": 0.2829, "step": 63540 }, { "epoch": 0.73, "learning_rate": 4.274420940878321e-05, "loss": 0.1484, "step": 63560 }, { "epoch": 0.73, "learning_rate": 4.2741926277697234e-05, "loss": 0.1313, "step": 63580 }, { "epoch": 0.73, "learning_rate": 4.273964314661126e-05, "loss": 0.1568, "step": 63600 }, { "epoch": 0.73, "learning_rate": 4.273736001552529e-05, "loss": 0.0695, "step": 63620 }, { "epoch": 0.73, "learning_rate": 4.273507688443932e-05, "loss": 0.2756, "step": 63640 }, { "epoch": 0.73, "learning_rate": 4.273279375335335e-05, "loss": 0.1946, "step": 63660 }, { "epoch": 0.73, "learning_rate": 4.2730510622267375e-05, "loss": 0.1776, "step": 63680 }, { "epoch": 0.73, "learning_rate": 4.272822749118141e-05, "loss": 0.1508, "step": 63700 }, { "epoch": 0.73, "learning_rate": 4.2725944360095435e-05, "loss": 0.3556, "step": 63720 }, { "epoch": 0.73, "learning_rate": 4.2723661229009464e-05, "loss": 0.1745, "step": 63740 }, { "epoch": 0.73, "learning_rate": 4.2721378097923494e-05, "loss": 0.0535, "step": 63760 }, { "epoch": 0.73, "learning_rate": 4.2719094966837524e-05, "loss": 0.2743, "step": 63780 }, { "epoch": 0.73, "learning_rate": 4.2716811835751553e-05, "loss": 0.0953, "step": 63800 }, { "epoch": 0.73, "learning_rate": 4.2714528704665576e-05, "loss": 0.4211, "step": 63820 }, { "epoch": 0.73, "learning_rate": 4.2712245573579606e-05, "loss": 0.0559, "step": 63840 }, { "epoch": 0.73, "learning_rate": 4.270996244249364e-05, "loss": 0.2425, "step": 63860 }, { "epoch": 0.73, "learning_rate": 4.2707679311407665e-05, "loss": 0.1077, "step": 63880 }, { "epoch": 0.73, "learning_rate": 4.2705396180321695e-05, "loss": 0.1932, "step": 63900 }, { "epoch": 0.73, "learning_rate": 4.270311304923572e-05, "loss": 0.1377, "step": 63920 }, { "epoch": 0.73, "learning_rate": 4.2700829918149754e-05, "loss": 0.1683, "step": 63940 }, { "epoch": 0.73, "learning_rate": 4.2698546787063784e-05, "loss": 0.3827, "step": 63960 }, { "epoch": 0.73, "learning_rate": 4.269626365597781e-05, "loss": 0.2527, "step": 63980 }, { "epoch": 0.73, "learning_rate": 4.2693980524891843e-05, "loss": 0.1944, "step": 64000 }, { "epoch": 0.73, "learning_rate": 4.2691697393805866e-05, "loss": 0.2672, "step": 64020 }, { "epoch": 0.73, "learning_rate": 4.2689414262719896e-05, "loss": 0.2939, "step": 64040 }, { "epoch": 0.73, "learning_rate": 4.2687131131633926e-05, "loss": 0.3712, "step": 64060 }, { "epoch": 0.73, "learning_rate": 4.2684848000547955e-05, "loss": 0.1347, "step": 64080 }, { "epoch": 0.73, "learning_rate": 4.2682564869461985e-05, "loss": 0.2384, "step": 64100 }, { "epoch": 0.73, "learning_rate": 4.268028173837601e-05, "loss": 0.3593, "step": 64120 }, { "epoch": 0.73, "learning_rate": 4.267799860729004e-05, "loss": 0.1768, "step": 64140 }, { "epoch": 0.73, "learning_rate": 4.267571547620407e-05, "loss": 0.2422, "step": 64160 }, { "epoch": 0.73, "learning_rate": 4.26734323451181e-05, "loss": 0.0784, "step": 64180 }, { "epoch": 0.73, "learning_rate": 4.267114921403213e-05, "loss": 0.1552, "step": 64200 }, { "epoch": 0.73, "learning_rate": 4.266886608294615e-05, "loss": 0.0524, "step": 64220 }, { "epoch": 0.73, "learning_rate": 4.2666582951860186e-05, "loss": 0.1242, "step": 64240 }, { "epoch": 0.73, "learning_rate": 4.266429982077421e-05, "loss": 0.1113, "step": 64260 }, { "epoch": 0.73, "learning_rate": 4.266201668968824e-05, "loss": 0.1571, "step": 64280 }, { "epoch": 0.73, "learning_rate": 4.265973355860227e-05, "loss": 0.3376, "step": 64300 }, { "epoch": 0.73, "learning_rate": 4.26574504275163e-05, "loss": 0.1352, "step": 64320 }, { "epoch": 0.73, "learning_rate": 4.265516729643033e-05, "loss": 0.3496, "step": 64340 }, { "epoch": 0.73, "learning_rate": 4.265288416534435e-05, "loss": 0.3092, "step": 64360 }, { "epoch": 0.73, "learning_rate": 4.265060103425839e-05, "loss": 0.1481, "step": 64380 }, { "epoch": 0.74, "learning_rate": 4.264831790317241e-05, "loss": 0.3096, "step": 64400 }, { "epoch": 0.74, "learning_rate": 4.264603477208644e-05, "loss": 0.2739, "step": 64420 }, { "epoch": 0.74, "learning_rate": 4.264375164100047e-05, "loss": 0.1033, "step": 64440 }, { "epoch": 0.74, "learning_rate": 4.26414685099145e-05, "loss": 0.1969, "step": 64460 }, { "epoch": 0.74, "learning_rate": 4.263918537882853e-05, "loss": 0.2324, "step": 64480 }, { "epoch": 0.74, "learning_rate": 4.263690224774256e-05, "loss": 0.326, "step": 64500 }, { "epoch": 0.74, "learning_rate": 4.263461911665658e-05, "loss": 0.1432, "step": 64520 }, { "epoch": 0.74, "learning_rate": 4.263233598557062e-05, "loss": 0.1422, "step": 64540 }, { "epoch": 0.74, "learning_rate": 4.263005285448464e-05, "loss": 0.3164, "step": 64560 }, { "epoch": 0.74, "learning_rate": 4.262776972339867e-05, "loss": 0.3651, "step": 64580 }, { "epoch": 0.74, "learning_rate": 4.26254865923127e-05, "loss": 0.3182, "step": 64600 }, { "epoch": 0.74, "learning_rate": 4.262320346122673e-05, "loss": 0.066, "step": 64620 }, { "epoch": 0.74, "learning_rate": 4.262092033014076e-05, "loss": 0.3415, "step": 64640 }, { "epoch": 0.74, "learning_rate": 4.261863719905478e-05, "loss": 0.282, "step": 64660 }, { "epoch": 0.74, "learning_rate": 4.261635406796882e-05, "loss": 0.2143, "step": 64680 }, { "epoch": 0.74, "learning_rate": 4.261407093688284e-05, "loss": 0.2823, "step": 64700 }, { "epoch": 0.74, "learning_rate": 4.261178780579687e-05, "loss": 0.1522, "step": 64720 }, { "epoch": 0.74, "learning_rate": 4.26095046747109e-05, "loss": 0.2673, "step": 64740 }, { "epoch": 0.74, "learning_rate": 4.260722154362493e-05, "loss": 0.1861, "step": 64760 }, { "epoch": 0.74, "learning_rate": 4.260493841253896e-05, "loss": 0.3323, "step": 64780 }, { "epoch": 0.74, "learning_rate": 4.260265528145298e-05, "loss": 0.1499, "step": 64800 }, { "epoch": 0.74, "learning_rate": 4.260037215036701e-05, "loss": 0.2785, "step": 64820 }, { "epoch": 0.74, "learning_rate": 4.259808901928104e-05, "loss": 0.1627, "step": 64840 }, { "epoch": 0.74, "learning_rate": 4.259580588819507e-05, "loss": 0.2963, "step": 64860 }, { "epoch": 0.74, "learning_rate": 4.25935227571091e-05, "loss": 0.2116, "step": 64880 }, { "epoch": 0.74, "learning_rate": 4.259123962602313e-05, "loss": 0.1887, "step": 64900 }, { "epoch": 0.74, "learning_rate": 4.258895649493716e-05, "loss": 0.1692, "step": 64920 }, { "epoch": 0.74, "learning_rate": 4.2586673363851184e-05, "loss": 0.2654, "step": 64940 }, { "epoch": 0.74, "learning_rate": 4.2584390232765214e-05, "loss": 0.0854, "step": 64960 }, { "epoch": 0.74, "learning_rate": 4.258210710167925e-05, "loss": 0.2032, "step": 64980 }, { "epoch": 0.74, "learning_rate": 4.257982397059327e-05, "loss": 0.1599, "step": 65000 }, { "epoch": 0.74, "learning_rate": 4.25775408395073e-05, "loss": 0.1502, "step": 65020 }, { "epoch": 0.74, "learning_rate": 4.2575257708421326e-05, "loss": 0.1542, "step": 65040 }, { "epoch": 0.74, "learning_rate": 4.257297457733536e-05, "loss": 0.2426, "step": 65060 }, { "epoch": 0.74, "learning_rate": 4.257069144624939e-05, "loss": 0.0819, "step": 65080 }, { "epoch": 0.74, "learning_rate": 4.2568408315163415e-05, "loss": 0.4295, "step": 65100 }, { "epoch": 0.74, "learning_rate": 4.2566125184077445e-05, "loss": 0.4094, "step": 65120 }, { "epoch": 0.74, "learning_rate": 4.2563842052991474e-05, "loss": 0.1597, "step": 65140 }, { "epoch": 0.74, "learning_rate": 4.2561558921905504e-05, "loss": 0.3274, "step": 65160 }, { "epoch": 0.74, "learning_rate": 4.2559275790819534e-05, "loss": 0.1584, "step": 65180 }, { "epoch": 0.74, "learning_rate": 4.2556992659733563e-05, "loss": 0.2546, "step": 65200 }, { "epoch": 0.74, "learning_rate": 4.255470952864759e-05, "loss": 0.1044, "step": 65220 }, { "epoch": 0.74, "learning_rate": 4.2552426397561616e-05, "loss": 0.3219, "step": 65240 }, { "epoch": 0.74, "learning_rate": 4.2550143266475646e-05, "loss": 0.0824, "step": 65260 }, { "epoch": 0.75, "learning_rate": 4.2547860135389675e-05, "loss": 0.5888, "step": 65280 }, { "epoch": 0.75, "learning_rate": 4.2545577004303705e-05, "loss": 0.1577, "step": 65300 }, { "epoch": 0.75, "learning_rate": 4.2543293873217735e-05, "loss": 0.0556, "step": 65320 }, { "epoch": 0.75, "learning_rate": 4.254101074213176e-05, "loss": 0.3723, "step": 65340 }, { "epoch": 0.75, "learning_rate": 4.2538727611045794e-05, "loss": 0.0629, "step": 65360 }, { "epoch": 0.75, "learning_rate": 4.253644447995982e-05, "loss": 0.1267, "step": 65380 }, { "epoch": 0.75, "learning_rate": 4.253416134887385e-05, "loss": 0.2269, "step": 65400 }, { "epoch": 0.75, "learning_rate": 4.2531878217787876e-05, "loss": 0.2618, "step": 65420 }, { "epoch": 0.75, "learning_rate": 4.2529595086701906e-05, "loss": 0.0974, "step": 65440 }, { "epoch": 0.75, "learning_rate": 4.2527311955615936e-05, "loss": 0.1438, "step": 65460 }, { "epoch": 0.75, "learning_rate": 4.252502882452996e-05, "loss": 0.2461, "step": 65480 }, { "epoch": 0.75, "learning_rate": 4.2522745693443995e-05, "loss": 0.2021, "step": 65500 }, { "epoch": 0.75, "learning_rate": 4.252046256235802e-05, "loss": 0.24, "step": 65520 }, { "epoch": 0.75, "learning_rate": 4.251817943127205e-05, "loss": 0.2111, "step": 65540 }, { "epoch": 0.75, "learning_rate": 4.251589630018608e-05, "loss": 0.4852, "step": 65560 }, { "epoch": 0.75, "learning_rate": 4.251361316910011e-05, "loss": 0.2209, "step": 65580 }, { "epoch": 0.75, "learning_rate": 4.251133003801414e-05, "loss": 0.2771, "step": 65600 }, { "epoch": 0.75, "learning_rate": 4.250904690692816e-05, "loss": 0.5096, "step": 65620 }, { "epoch": 0.75, "learning_rate": 4.250676377584219e-05, "loss": 0.0761, "step": 65640 }, { "epoch": 0.75, "learning_rate": 4.2504480644756226e-05, "loss": 0.4558, "step": 65660 }, { "epoch": 0.75, "learning_rate": 4.250219751367025e-05, "loss": 0.3459, "step": 65680 }, { "epoch": 0.75, "learning_rate": 4.249991438258428e-05, "loss": 0.2561, "step": 65700 }, { "epoch": 0.75, "learning_rate": 4.24976312514983e-05, "loss": 0.1794, "step": 65720 }, { "epoch": 0.75, "learning_rate": 4.249534812041234e-05, "loss": 0.266, "step": 65740 }, { "epoch": 0.75, "learning_rate": 4.249306498932637e-05, "loss": 0.3059, "step": 65760 }, { "epoch": 0.75, "learning_rate": 4.249078185824039e-05, "loss": 0.1425, "step": 65780 }, { "epoch": 0.75, "learning_rate": 4.248849872715443e-05, "loss": 0.0602, "step": 65800 }, { "epoch": 0.75, "learning_rate": 4.248621559606845e-05, "loss": 0.1676, "step": 65820 }, { "epoch": 0.75, "learning_rate": 4.248393246498248e-05, "loss": 0.2775, "step": 65840 }, { "epoch": 0.75, "learning_rate": 4.248164933389651e-05, "loss": 0.1049, "step": 65860 }, { "epoch": 0.75, "learning_rate": 4.247936620281054e-05, "loss": 0.2934, "step": 65880 }, { "epoch": 0.75, "learning_rate": 4.247708307172457e-05, "loss": 0.1447, "step": 65900 }, { "epoch": 0.75, "learning_rate": 4.247479994063859e-05, "loss": 0.1825, "step": 65920 }, { "epoch": 0.75, "learning_rate": 4.247251680955262e-05, "loss": 0.1368, "step": 65940 }, { "epoch": 0.75, "learning_rate": 4.247023367846665e-05, "loss": 0.5314, "step": 65960 }, { "epoch": 0.75, "learning_rate": 4.246795054738068e-05, "loss": 0.2275, "step": 65980 }, { "epoch": 0.75, "learning_rate": 4.246566741629471e-05, "loss": 0.1474, "step": 66000 }, { "epoch": 0.75, "learning_rate": 4.246338428520873e-05, "loss": 0.1509, "step": 66020 }, { "epoch": 0.75, "learning_rate": 4.246110115412277e-05, "loss": 0.112, "step": 66040 }, { "epoch": 0.75, "learning_rate": 4.245881802303679e-05, "loss": 0.319, "step": 66060 }, { "epoch": 0.75, "learning_rate": 4.245653489195082e-05, "loss": 0.0913, "step": 66080 }, { "epoch": 0.75, "learning_rate": 4.245425176086485e-05, "loss": 0.3626, "step": 66100 }, { "epoch": 0.75, "learning_rate": 4.245196862977888e-05, "loss": 0.3145, "step": 66120 }, { "epoch": 0.76, "learning_rate": 4.244968549869291e-05, "loss": 0.1224, "step": 66140 }, { "epoch": 0.76, "learning_rate": 4.2447402367606934e-05, "loss": 0.2178, "step": 66160 }, { "epoch": 0.76, "learning_rate": 4.244511923652097e-05, "loss": 0.1119, "step": 66180 }, { "epoch": 0.76, "learning_rate": 4.244283610543499e-05, "loss": 0.181, "step": 66200 }, { "epoch": 0.76, "learning_rate": 4.244055297434902e-05, "loss": 0.3112, "step": 66220 }, { "epoch": 0.76, "learning_rate": 4.243826984326305e-05, "loss": 0.2123, "step": 66240 }, { "epoch": 0.76, "learning_rate": 4.243598671217708e-05, "loss": 0.2372, "step": 66260 }, { "epoch": 0.76, "learning_rate": 4.243370358109111e-05, "loss": 0.3751, "step": 66280 }, { "epoch": 0.76, "learning_rate": 4.2431420450005135e-05, "loss": 0.3967, "step": 66300 }, { "epoch": 0.76, "learning_rate": 4.2429137318919165e-05, "loss": 0.382, "step": 66320 }, { "epoch": 0.76, "learning_rate": 4.24268541878332e-05, "loss": 0.1801, "step": 66340 }, { "epoch": 0.76, "learning_rate": 4.2424571056747224e-05, "loss": 0.2931, "step": 66360 }, { "epoch": 0.76, "learning_rate": 4.2422287925661254e-05, "loss": 0.274, "step": 66380 }, { "epoch": 0.76, "learning_rate": 4.242000479457528e-05, "loss": 0.1662, "step": 66400 }, { "epoch": 0.76, "learning_rate": 4.241772166348931e-05, "loss": 0.1715, "step": 66420 }, { "epoch": 0.76, "learning_rate": 4.241543853240334e-05, "loss": 0.2512, "step": 66440 }, { "epoch": 0.76, "learning_rate": 4.2413155401317366e-05, "loss": 0.2337, "step": 66460 }, { "epoch": 0.76, "learning_rate": 4.24108722702314e-05, "loss": 0.0897, "step": 66480 }, { "epoch": 0.76, "learning_rate": 4.2408589139145425e-05, "loss": 0.225, "step": 66500 }, { "epoch": 0.76, "learning_rate": 4.2406306008059455e-05, "loss": 0.1655, "step": 66520 }, { "epoch": 0.76, "learning_rate": 4.2404022876973484e-05, "loss": 0.1383, "step": 66540 }, { "epoch": 0.76, "learning_rate": 4.2401739745887514e-05, "loss": 0.4149, "step": 66560 }, { "epoch": 0.76, "learning_rate": 4.2399456614801544e-05, "loss": 0.1347, "step": 66580 }, { "epoch": 0.76, "learning_rate": 4.239717348371557e-05, "loss": 0.0793, "step": 66600 }, { "epoch": 0.76, "learning_rate": 4.2394890352629596e-05, "loss": 0.2508, "step": 66620 }, { "epoch": 0.76, "learning_rate": 4.2392607221543626e-05, "loss": 0.1116, "step": 66640 }, { "epoch": 0.76, "learning_rate": 4.2390324090457656e-05, "loss": 0.2451, "step": 66660 }, { "epoch": 0.76, "learning_rate": 4.2388040959371685e-05, "loss": 0.2448, "step": 66680 }, { "epoch": 0.76, "learning_rate": 4.238575782828571e-05, "loss": 0.148, "step": 66700 }, { "epoch": 0.76, "learning_rate": 4.2383474697199745e-05, "loss": 0.176, "step": 66720 }, { "epoch": 0.76, "learning_rate": 4.238119156611377e-05, "loss": 0.2211, "step": 66740 }, { "epoch": 0.76, "learning_rate": 4.23789084350278e-05, "loss": 0.0565, "step": 66760 }, { "epoch": 0.76, "learning_rate": 4.237662530394183e-05, "loss": 0.3224, "step": 66780 }, { "epoch": 0.76, "learning_rate": 4.237434217285586e-05, "loss": 0.3802, "step": 66800 }, { "epoch": 0.76, "learning_rate": 4.2372059041769886e-05, "loss": 0.1016, "step": 66820 }, { "epoch": 0.76, "learning_rate": 4.236977591068391e-05, "loss": 0.1796, "step": 66840 }, { "epoch": 0.76, "learning_rate": 4.2367492779597946e-05, "loss": 0.1966, "step": 66860 }, { "epoch": 0.76, "learning_rate": 4.236520964851197e-05, "loss": 0.2501, "step": 66880 }, { "epoch": 0.76, "learning_rate": 4.2362926517426e-05, "loss": 0.2362, "step": 66900 }, { "epoch": 0.76, "learning_rate": 4.236064338634003e-05, "loss": 0.4162, "step": 66920 }, { "epoch": 0.76, "learning_rate": 4.235836025525406e-05, "loss": 0.1868, "step": 66940 }, { "epoch": 0.76, "learning_rate": 4.235607712416809e-05, "loss": 0.3828, "step": 66960 }, { "epoch": 0.76, "learning_rate": 4.235379399308211e-05, "loss": 0.2018, "step": 66980 }, { "epoch": 0.76, "learning_rate": 4.235151086199614e-05, "loss": 0.301, "step": 67000 }, { "epoch": 0.77, "learning_rate": 4.2349227730910176e-05, "loss": 0.2425, "step": 67020 }, { "epoch": 0.77, "learning_rate": 4.23469445998242e-05, "loss": 0.1171, "step": 67040 }, { "epoch": 0.77, "learning_rate": 4.234466146873823e-05, "loss": 0.2674, "step": 67060 }, { "epoch": 0.77, "learning_rate": 4.234237833765226e-05, "loss": 0.0569, "step": 67080 }, { "epoch": 0.77, "learning_rate": 4.234009520656629e-05, "loss": 0.2781, "step": 67100 }, { "epoch": 0.77, "learning_rate": 4.233781207548032e-05, "loss": 0.4137, "step": 67120 }, { "epoch": 0.77, "learning_rate": 4.233552894439434e-05, "loss": 0.2163, "step": 67140 }, { "epoch": 0.77, "learning_rate": 4.233324581330838e-05, "loss": 0.1883, "step": 67160 }, { "epoch": 0.77, "learning_rate": 4.23309626822224e-05, "loss": 0.2534, "step": 67180 }, { "epoch": 0.77, "learning_rate": 4.232867955113643e-05, "loss": 0.1287, "step": 67200 }, { "epoch": 0.77, "learning_rate": 4.232639642005046e-05, "loss": 0.3429, "step": 67220 }, { "epoch": 0.77, "learning_rate": 4.232411328896449e-05, "loss": 0.1203, "step": 67240 }, { "epoch": 0.77, "learning_rate": 4.232183015787852e-05, "loss": 0.0996, "step": 67260 }, { "epoch": 0.77, "learning_rate": 4.231954702679254e-05, "loss": 0.3202, "step": 67280 }, { "epoch": 0.77, "learning_rate": 4.231726389570657e-05, "loss": 0.2775, "step": 67300 }, { "epoch": 0.77, "learning_rate": 4.23149807646206e-05, "loss": 0.328, "step": 67320 }, { "epoch": 0.77, "learning_rate": 4.231269763353463e-05, "loss": 0.2351, "step": 67340 }, { "epoch": 0.77, "learning_rate": 4.231041450244866e-05, "loss": 0.1515, "step": 67360 }, { "epoch": 0.77, "learning_rate": 4.230813137136269e-05, "loss": 0.0653, "step": 67380 }, { "epoch": 0.77, "learning_rate": 4.230584824027672e-05, "loss": 0.5713, "step": 67400 }, { "epoch": 0.77, "learning_rate": 4.230356510919074e-05, "loss": 0.155, "step": 67420 }, { "epoch": 0.77, "learning_rate": 4.230128197810477e-05, "loss": 0.116, "step": 67440 }, { "epoch": 0.77, "learning_rate": 4.229899884701881e-05, "loss": 0.2329, "step": 67460 }, { "epoch": 0.77, "learning_rate": 4.229671571593283e-05, "loss": 0.1962, "step": 67480 }, { "epoch": 0.77, "learning_rate": 4.229443258484686e-05, "loss": 0.2254, "step": 67500 }, { "epoch": 0.77, "learning_rate": 4.2292149453760885e-05, "loss": 0.1554, "step": 67520 }, { "epoch": 0.77, "learning_rate": 4.228986632267492e-05, "loss": 0.132, "step": 67540 }, { "epoch": 0.77, "learning_rate": 4.228758319158895e-05, "loss": 0.118, "step": 67560 }, { "epoch": 0.77, "learning_rate": 4.2285300060502974e-05, "loss": 0.3113, "step": 67580 }, { "epoch": 0.77, "learning_rate": 4.2283016929417003e-05, "loss": 0.1547, "step": 67600 }, { "epoch": 0.77, "learning_rate": 4.228073379833103e-05, "loss": 0.3458, "step": 67620 }, { "epoch": 0.77, "learning_rate": 4.227845066724506e-05, "loss": 0.412, "step": 67640 }, { "epoch": 0.77, "learning_rate": 4.227616753615909e-05, "loss": 0.1657, "step": 67660 }, { "epoch": 0.77, "learning_rate": 4.227388440507312e-05, "loss": 0.1284, "step": 67680 }, { "epoch": 0.77, "learning_rate": 4.227160127398715e-05, "loss": 0.2079, "step": 67700 }, { "epoch": 0.77, "learning_rate": 4.2269318142901175e-05, "loss": 0.0794, "step": 67720 }, { "epoch": 0.77, "learning_rate": 4.2267035011815204e-05, "loss": 0.1643, "step": 67740 }, { "epoch": 0.77, "learning_rate": 4.2264751880729234e-05, "loss": 0.201, "step": 67760 }, { "epoch": 0.77, "learning_rate": 4.2262468749643264e-05, "loss": 0.1115, "step": 67780 }, { "epoch": 0.77, "learning_rate": 4.2260185618557293e-05, "loss": 0.1814, "step": 67800 }, { "epoch": 0.77, "learning_rate": 4.2257902487471316e-05, "loss": 0.3733, "step": 67820 }, { "epoch": 0.77, "learning_rate": 4.225561935638535e-05, "loss": 0.3565, "step": 67840 }, { "epoch": 0.77, "learning_rate": 4.2253336225299376e-05, "loss": 0.1617, "step": 67860 }, { "epoch": 0.77, "learning_rate": 4.2251053094213405e-05, "loss": 0.0927, "step": 67880 }, { "epoch": 0.78, "learning_rate": 4.2248769963127435e-05, "loss": 0.1577, "step": 67900 }, { "epoch": 0.78, "learning_rate": 4.2246486832041465e-05, "loss": 0.2308, "step": 67920 }, { "epoch": 0.78, "learning_rate": 4.2244203700955494e-05, "loss": 0.3174, "step": 67940 }, { "epoch": 0.78, "learning_rate": 4.224192056986952e-05, "loss": 0.3761, "step": 67960 }, { "epoch": 0.78, "learning_rate": 4.2239637438783554e-05, "loss": 0.2589, "step": 67980 }, { "epoch": 0.78, "learning_rate": 4.223735430769758e-05, "loss": 0.2512, "step": 68000 }, { "epoch": 0.78, "learning_rate": 4.2235071176611606e-05, "loss": 0.1363, "step": 68020 }, { "epoch": 0.78, "learning_rate": 4.2232788045525636e-05, "loss": 0.2425, "step": 68040 }, { "epoch": 0.78, "learning_rate": 4.2230504914439666e-05, "loss": 0.1195, "step": 68060 }, { "epoch": 0.78, "learning_rate": 4.2228221783353695e-05, "loss": 0.216, "step": 68080 }, { "epoch": 0.78, "learning_rate": 4.222593865226772e-05, "loss": 0.134, "step": 68100 }, { "epoch": 0.78, "learning_rate": 4.222365552118175e-05, "loss": 0.377, "step": 68120 }, { "epoch": 0.78, "learning_rate": 4.2221372390095785e-05, "loss": 0.2742, "step": 68140 }, { "epoch": 0.78, "learning_rate": 4.221908925900981e-05, "loss": 0.1335, "step": 68160 }, { "epoch": 0.78, "learning_rate": 4.221680612792384e-05, "loss": 0.1649, "step": 68180 }, { "epoch": 0.78, "learning_rate": 4.221452299683786e-05, "loss": 0.2193, "step": 68200 }, { "epoch": 0.78, "learning_rate": 4.2212239865751896e-05, "loss": 0.3997, "step": 68220 }, { "epoch": 0.78, "learning_rate": 4.2209956734665926e-05, "loss": 0.1062, "step": 68240 }, { "epoch": 0.78, "learning_rate": 4.220767360357995e-05, "loss": 0.1184, "step": 68260 }, { "epoch": 0.78, "learning_rate": 4.2205390472493986e-05, "loss": 0.1124, "step": 68280 }, { "epoch": 0.78, "learning_rate": 4.220310734140801e-05, "loss": 0.087, "step": 68300 }, { "epoch": 0.78, "learning_rate": 4.220082421032204e-05, "loss": 0.2814, "step": 68320 }, { "epoch": 0.78, "learning_rate": 4.219854107923607e-05, "loss": 0.0773, "step": 68340 }, { "epoch": 0.78, "learning_rate": 4.21962579481501e-05, "loss": 0.1819, "step": 68360 }, { "epoch": 0.78, "learning_rate": 4.219397481706413e-05, "loss": 0.2758, "step": 68380 }, { "epoch": 0.78, "learning_rate": 4.219169168597815e-05, "loss": 0.0872, "step": 68400 }, { "epoch": 0.78, "learning_rate": 4.218940855489218e-05, "loss": 0.1984, "step": 68420 }, { "epoch": 0.78, "learning_rate": 4.218712542380621e-05, "loss": 0.131, "step": 68440 }, { "epoch": 0.78, "learning_rate": 4.218484229272024e-05, "loss": 0.1436, "step": 68460 }, { "epoch": 0.78, "learning_rate": 4.218255916163427e-05, "loss": 0.462, "step": 68480 }, { "epoch": 0.78, "learning_rate": 4.218027603054829e-05, "loss": 0.2221, "step": 68500 }, { "epoch": 0.78, "learning_rate": 4.217799289946233e-05, "loss": 0.0485, "step": 68520 }, { "epoch": 0.78, "learning_rate": 4.217570976837635e-05, "loss": 0.3412, "step": 68540 }, { "epoch": 0.78, "learning_rate": 4.217342663729038e-05, "loss": 0.3702, "step": 68560 }, { "epoch": 0.78, "learning_rate": 4.217114350620441e-05, "loss": 0.1646, "step": 68580 }, { "epoch": 0.78, "learning_rate": 4.216886037511844e-05, "loss": 0.2461, "step": 68600 }, { "epoch": 0.78, "learning_rate": 4.216657724403247e-05, "loss": 0.2948, "step": 68620 }, { "epoch": 0.78, "learning_rate": 4.216429411294649e-05, "loss": 0.4762, "step": 68640 }, { "epoch": 0.78, "learning_rate": 4.216201098186053e-05, "loss": 0.165, "step": 68660 }, { "epoch": 0.78, "learning_rate": 4.215972785077455e-05, "loss": 0.2408, "step": 68680 }, { "epoch": 0.78, "learning_rate": 4.215744471968858e-05, "loss": 0.1817, "step": 68700 }, { "epoch": 0.78, "learning_rate": 4.215516158860261e-05, "loss": 0.179, "step": 68720 }, { "epoch": 0.78, "learning_rate": 4.215287845751664e-05, "loss": 0.083, "step": 68740 }, { "epoch": 0.78, "learning_rate": 4.215059532643067e-05, "loss": 0.0885, "step": 68760 }, { "epoch": 0.79, "learning_rate": 4.2148312195344694e-05, "loss": 0.1196, "step": 68780 }, { "epoch": 0.79, "learning_rate": 4.2146029064258723e-05, "loss": 0.5999, "step": 68800 }, { "epoch": 0.79, "learning_rate": 4.214374593317276e-05, "loss": 0.2474, "step": 68820 }, { "epoch": 0.79, "learning_rate": 4.214146280208678e-05, "loss": 0.2015, "step": 68840 }, { "epoch": 0.79, "learning_rate": 4.213917967100081e-05, "loss": 0.1565, "step": 68860 }, { "epoch": 0.79, "learning_rate": 4.2136896539914835e-05, "loss": 0.3394, "step": 68880 }, { "epoch": 0.79, "learning_rate": 4.213461340882887e-05, "loss": 0.1524, "step": 68900 }, { "epoch": 0.79, "learning_rate": 4.21323302777429e-05, "loss": 0.1123, "step": 68920 }, { "epoch": 0.79, "learning_rate": 4.2130047146656924e-05, "loss": 0.0464, "step": 68940 }, { "epoch": 0.79, "learning_rate": 4.212776401557096e-05, "loss": 0.0667, "step": 68960 }, { "epoch": 0.79, "learning_rate": 4.2125480884484984e-05, "loss": 0.2416, "step": 68980 }, { "epoch": 0.79, "learning_rate": 4.2123197753399013e-05, "loss": 0.1765, "step": 69000 }, { "epoch": 0.79, "learning_rate": 4.212091462231304e-05, "loss": 0.1152, "step": 69020 }, { "epoch": 0.79, "learning_rate": 4.211863149122707e-05, "loss": 0.1268, "step": 69040 }, { "epoch": 0.79, "learning_rate": 4.21163483601411e-05, "loss": 0.346, "step": 69060 }, { "epoch": 0.79, "learning_rate": 4.2114065229055125e-05, "loss": 0.1455, "step": 69080 }, { "epoch": 0.79, "learning_rate": 4.2111782097969155e-05, "loss": 0.1652, "step": 69100 }, { "epoch": 0.79, "learning_rate": 4.2109498966883185e-05, "loss": 0.1749, "step": 69120 }, { "epoch": 0.79, "learning_rate": 4.2107215835797214e-05, "loss": 0.2224, "step": 69140 }, { "epoch": 0.79, "learning_rate": 4.2104932704711244e-05, "loss": 0.0877, "step": 69160 }, { "epoch": 0.79, "learning_rate": 4.210264957362527e-05, "loss": 0.2163, "step": 69180 }, { "epoch": 0.79, "learning_rate": 4.2100366442539304e-05, "loss": 0.1865, "step": 69200 }, { "epoch": 0.79, "learning_rate": 4.2098083311453326e-05, "loss": 0.2199, "step": 69220 }, { "epoch": 0.79, "learning_rate": 4.2095800180367356e-05, "loss": 0.3129, "step": 69240 }, { "epoch": 0.79, "learning_rate": 4.2093517049281386e-05, "loss": 0.1192, "step": 69260 }, { "epoch": 0.79, "learning_rate": 4.2091233918195415e-05, "loss": 0.2589, "step": 69280 }, { "epoch": 0.79, "learning_rate": 4.2088950787109445e-05, "loss": 0.2316, "step": 69300 }, { "epoch": 0.79, "learning_rate": 4.208666765602347e-05, "loss": 0.2634, "step": 69320 }, { "epoch": 0.79, "learning_rate": 4.2084384524937505e-05, "loss": 0.1303, "step": 69340 }, { "epoch": 0.79, "learning_rate": 4.208210139385153e-05, "loss": 0.221, "step": 69360 }, { "epoch": 0.79, "learning_rate": 4.207981826276556e-05, "loss": 0.118, "step": 69380 }, { "epoch": 0.79, "learning_rate": 4.207753513167959e-05, "loss": 0.185, "step": 69400 }, { "epoch": 0.79, "learning_rate": 4.2075252000593616e-05, "loss": 0.3562, "step": 69420 }, { "epoch": 0.79, "learning_rate": 4.2072968869507646e-05, "loss": 0.1185, "step": 69440 }, { "epoch": 0.79, "learning_rate": 4.207068573842167e-05, "loss": 0.2553, "step": 69460 }, { "epoch": 0.79, "learning_rate": 4.20684026073357e-05, "loss": 0.3883, "step": 69480 }, { "epoch": 0.79, "learning_rate": 4.2066119476249735e-05, "loss": 0.2062, "step": 69500 }, { "epoch": 0.79, "learning_rate": 4.206383634516376e-05, "loss": 0.3394, "step": 69520 }, { "epoch": 0.79, "learning_rate": 4.206155321407779e-05, "loss": 0.2153, "step": 69540 }, { "epoch": 0.79, "learning_rate": 4.205927008299182e-05, "loss": 0.227, "step": 69560 }, { "epoch": 0.79, "learning_rate": 4.205698695190585e-05, "loss": 0.3187, "step": 69580 }, { "epoch": 0.79, "learning_rate": 4.205470382081988e-05, "loss": 0.1455, "step": 69600 }, { "epoch": 0.79, "learning_rate": 4.20524206897339e-05, "loss": 0.2702, "step": 69620 }, { "epoch": 0.79, "learning_rate": 4.2050137558647936e-05, "loss": 0.1371, "step": 69640 }, { "epoch": 0.8, "learning_rate": 4.204785442756196e-05, "loss": 0.2751, "step": 69660 }, { "epoch": 0.8, "learning_rate": 4.204557129647599e-05, "loss": 0.1901, "step": 69680 }, { "epoch": 0.8, "learning_rate": 4.204328816539002e-05, "loss": 0.1588, "step": 69700 }, { "epoch": 0.8, "learning_rate": 4.204100503430405e-05, "loss": 0.2877, "step": 69720 }, { "epoch": 0.8, "learning_rate": 4.203872190321808e-05, "loss": 0.2123, "step": 69740 }, { "epoch": 0.8, "learning_rate": 4.20364387721321e-05, "loss": 0.5655, "step": 69760 }, { "epoch": 0.8, "learning_rate": 4.203415564104613e-05, "loss": 0.1497, "step": 69780 }, { "epoch": 0.8, "learning_rate": 4.203187250996016e-05, "loss": 0.2704, "step": 69800 }, { "epoch": 0.8, "learning_rate": 4.202958937887419e-05, "loss": 0.3045, "step": 69820 }, { "epoch": 0.8, "learning_rate": 4.202730624778822e-05, "loss": 0.1369, "step": 69840 }, { "epoch": 0.8, "learning_rate": 4.202502311670225e-05, "loss": 0.1597, "step": 69860 }, { "epoch": 0.8, "learning_rate": 4.202273998561628e-05, "loss": 0.2479, "step": 69880 }, { "epoch": 0.8, "learning_rate": 4.20204568545303e-05, "loss": 0.1518, "step": 69900 }, { "epoch": 0.8, "learning_rate": 4.201817372344433e-05, "loss": 0.2824, "step": 69920 }, { "epoch": 0.8, "learning_rate": 4.201589059235837e-05, "loss": 0.166, "step": 69940 }, { "epoch": 0.8, "learning_rate": 4.201360746127239e-05, "loss": 0.298, "step": 69960 }, { "epoch": 0.8, "learning_rate": 4.201132433018642e-05, "loss": 0.2176, "step": 69980 }, { "epoch": 0.8, "learning_rate": 4.200904119910044e-05, "loss": 0.2966, "step": 70000 }, { "epoch": 0.8, "learning_rate": 4.200675806801448e-05, "loss": 0.2203, "step": 70020 }, { "epoch": 0.8, "learning_rate": 4.200447493692851e-05, "loss": 0.2064, "step": 70040 }, { "epoch": 0.8, "learning_rate": 4.200219180584253e-05, "loss": 0.315, "step": 70060 }, { "epoch": 0.8, "learning_rate": 4.199990867475656e-05, "loss": 0.1802, "step": 70080 }, { "epoch": 0.8, "learning_rate": 4.199762554367059e-05, "loss": 0.1871, "step": 70100 }, { "epoch": 0.8, "learning_rate": 4.199534241258462e-05, "loss": 0.3847, "step": 70120 }, { "epoch": 0.8, "learning_rate": 4.199305928149865e-05, "loss": 0.4324, "step": 70140 }, { "epoch": 0.8, "learning_rate": 4.199077615041268e-05, "loss": 0.4428, "step": 70160 }, { "epoch": 0.8, "learning_rate": 4.198849301932671e-05, "loss": 0.7656, "step": 70180 }, { "epoch": 0.8, "learning_rate": 4.1986209888240733e-05, "loss": 0.2336, "step": 70200 }, { "epoch": 0.8, "learning_rate": 4.198392675715476e-05, "loss": 0.4128, "step": 70220 }, { "epoch": 0.8, "learning_rate": 4.198164362606879e-05, "loss": 0.3577, "step": 70240 }, { "epoch": 0.8, "learning_rate": 4.197936049498282e-05, "loss": 0.1466, "step": 70260 }, { "epoch": 0.8, "learning_rate": 4.197707736389685e-05, "loss": 0.0438, "step": 70280 }, { "epoch": 0.8, "learning_rate": 4.1974794232810875e-05, "loss": 0.1753, "step": 70300 }, { "epoch": 0.8, "learning_rate": 4.197251110172491e-05, "loss": 0.1912, "step": 70320 }, { "epoch": 0.8, "learning_rate": 4.1970227970638934e-05, "loss": 0.1564, "step": 70340 }, { "epoch": 0.8, "learning_rate": 4.1967944839552964e-05, "loss": 0.1379, "step": 70360 }, { "epoch": 0.8, "learning_rate": 4.1965661708466994e-05, "loss": 0.5648, "step": 70380 }, { "epoch": 0.8, "learning_rate": 4.1963378577381023e-05, "loss": 0.0941, "step": 70400 }, { "epoch": 0.8, "learning_rate": 4.196109544629505e-05, "loss": 0.2343, "step": 70420 }, { "epoch": 0.8, "learning_rate": 4.1958812315209076e-05, "loss": 0.3542, "step": 70440 }, { "epoch": 0.8, "learning_rate": 4.195652918412311e-05, "loss": 0.3454, "step": 70460 }, { "epoch": 0.8, "learning_rate": 4.1954246053037135e-05, "loss": 0.4854, "step": 70480 }, { "epoch": 0.8, "learning_rate": 4.1951962921951165e-05, "loss": 0.2111, "step": 70500 }, { "epoch": 0.81, "learning_rate": 4.1949679790865195e-05, "loss": 0.094, "step": 70520 }, { "epoch": 0.81, "learning_rate": 4.1947396659779224e-05, "loss": 0.0986, "step": 70540 }, { "epoch": 0.81, "learning_rate": 4.1945113528693254e-05, "loss": 0.083, "step": 70560 }, { "epoch": 0.81, "learning_rate": 4.194283039760728e-05, "loss": 0.1641, "step": 70580 }, { "epoch": 0.81, "learning_rate": 4.194054726652131e-05, "loss": 0.3775, "step": 70600 }, { "epoch": 0.81, "learning_rate": 4.193826413543534e-05, "loss": 0.1444, "step": 70620 }, { "epoch": 0.81, "learning_rate": 4.1935981004349366e-05, "loss": 0.309, "step": 70640 }, { "epoch": 0.81, "learning_rate": 4.1933697873263396e-05, "loss": 0.2051, "step": 70660 }, { "epoch": 0.81, "learning_rate": 4.193141474217742e-05, "loss": 0.1855, "step": 70680 }, { "epoch": 0.81, "learning_rate": 4.1929131611091455e-05, "loss": 0.0735, "step": 70700 }, { "epoch": 0.81, "learning_rate": 4.1926848480005485e-05, "loss": 0.1817, "step": 70720 }, { "epoch": 0.81, "learning_rate": 4.192456534891951e-05, "loss": 0.1309, "step": 70740 }, { "epoch": 0.81, "learning_rate": 4.192228221783354e-05, "loss": 0.3234, "step": 70760 }, { "epoch": 0.81, "learning_rate": 4.191999908674757e-05, "loss": 0.2684, "step": 70780 }, { "epoch": 0.81, "learning_rate": 4.19177159556616e-05, "loss": 0.1163, "step": 70800 }, { "epoch": 0.81, "learning_rate": 4.1915432824575627e-05, "loss": 0.2754, "step": 70820 }, { "epoch": 0.81, "learning_rate": 4.1913149693489656e-05, "loss": 0.1617, "step": 70840 }, { "epoch": 0.81, "learning_rate": 4.1910866562403686e-05, "loss": 0.1251, "step": 70860 }, { "epoch": 0.81, "learning_rate": 4.190858343131771e-05, "loss": 0.3071, "step": 70880 }, { "epoch": 0.81, "learning_rate": 4.190630030023174e-05, "loss": 0.3174, "step": 70900 }, { "epoch": 0.81, "learning_rate": 4.190401716914577e-05, "loss": 0.1313, "step": 70920 }, { "epoch": 0.81, "learning_rate": 4.19017340380598e-05, "loss": 0.099, "step": 70940 }, { "epoch": 0.81, "learning_rate": 4.189945090697383e-05, "loss": 0.1533, "step": 70960 }, { "epoch": 0.81, "learning_rate": 4.189716777588785e-05, "loss": 0.1888, "step": 70980 }, { "epoch": 0.81, "learning_rate": 4.189488464480189e-05, "loss": 0.2469, "step": 71000 }, { "epoch": 0.81, "learning_rate": 4.189260151371591e-05, "loss": 0.2338, "step": 71020 }, { "epoch": 0.81, "learning_rate": 4.189031838262994e-05, "loss": 0.3496, "step": 71040 }, { "epoch": 0.81, "learning_rate": 4.188803525154397e-05, "loss": 0.425, "step": 71060 }, { "epoch": 0.81, "learning_rate": 4.1885752120458e-05, "loss": 0.0933, "step": 71080 }, { "epoch": 0.81, "learning_rate": 4.188346898937203e-05, "loss": 0.2837, "step": 71100 }, { "epoch": 0.81, "learning_rate": 4.188118585828605e-05, "loss": 0.1043, "step": 71120 }, { "epoch": 0.81, "learning_rate": 4.187890272720009e-05, "loss": 0.2344, "step": 71140 }, { "epoch": 0.81, "learning_rate": 4.187661959611411e-05, "loss": 0.2375, "step": 71160 }, { "epoch": 0.81, "learning_rate": 4.187433646502814e-05, "loss": 0.1931, "step": 71180 }, { "epoch": 0.81, "learning_rate": 4.187205333394217e-05, "loss": 0.2928, "step": 71200 }, { "epoch": 0.81, "learning_rate": 4.18697702028562e-05, "loss": 0.375, "step": 71220 }, { "epoch": 0.81, "learning_rate": 4.186748707177023e-05, "loss": 0.293, "step": 71240 }, { "epoch": 0.81, "learning_rate": 4.186520394068425e-05, "loss": 0.1556, "step": 71260 }, { "epoch": 0.81, "learning_rate": 4.186292080959828e-05, "loss": 0.055, "step": 71280 }, { "epoch": 0.81, "learning_rate": 4.186063767851232e-05, "loss": 0.3979, "step": 71300 }, { "epoch": 0.81, "learning_rate": 4.185835454742634e-05, "loss": 0.1499, "step": 71320 }, { "epoch": 0.81, "learning_rate": 4.185607141634037e-05, "loss": 0.1815, "step": 71340 }, { "epoch": 0.81, "learning_rate": 4.1853788285254394e-05, "loss": 0.1647, "step": 71360 }, { "epoch": 0.81, "learning_rate": 4.185150515416843e-05, "loss": 0.1508, "step": 71380 }, { "epoch": 0.82, "learning_rate": 4.184922202308246e-05, "loss": 0.1555, "step": 71400 }, { "epoch": 0.82, "learning_rate": 4.184693889199648e-05, "loss": 0.1495, "step": 71420 }, { "epoch": 0.82, "learning_rate": 4.184465576091052e-05, "loss": 0.3533, "step": 71440 }, { "epoch": 0.82, "learning_rate": 4.184237262982454e-05, "loss": 0.1488, "step": 71460 }, { "epoch": 0.82, "learning_rate": 4.184008949873857e-05, "loss": 0.2771, "step": 71480 }, { "epoch": 0.82, "learning_rate": 4.18378063676526e-05, "loss": 0.078, "step": 71500 }, { "epoch": 0.82, "learning_rate": 4.183552323656663e-05, "loss": 0.4321, "step": 71520 }, { "epoch": 0.82, "learning_rate": 4.183324010548066e-05, "loss": 0.1012, "step": 71540 }, { "epoch": 0.82, "learning_rate": 4.1830956974394684e-05, "loss": 0.354, "step": 71560 }, { "epoch": 0.82, "learning_rate": 4.1828673843308714e-05, "loss": 0.2029, "step": 71580 }, { "epoch": 0.82, "learning_rate": 4.1826390712222743e-05, "loss": 0.0768, "step": 71600 }, { "epoch": 0.82, "learning_rate": 4.182410758113677e-05, "loss": 0.1937, "step": 71620 }, { "epoch": 0.82, "learning_rate": 4.18218244500508e-05, "loss": 0.4806, "step": 71640 }, { "epoch": 0.82, "learning_rate": 4.1819541318964826e-05, "loss": 0.1199, "step": 71660 }, { "epoch": 0.82, "learning_rate": 4.181725818787886e-05, "loss": 0.1076, "step": 71680 }, { "epoch": 0.82, "learning_rate": 4.1814975056792885e-05, "loss": 0.1831, "step": 71700 }, { "epoch": 0.82, "learning_rate": 4.1812691925706915e-05, "loss": 0.281, "step": 71720 }, { "epoch": 0.82, "learning_rate": 4.1810408794620944e-05, "loss": 0.1393, "step": 71740 }, { "epoch": 0.82, "learning_rate": 4.1808125663534974e-05, "loss": 0.1449, "step": 71760 }, { "epoch": 0.82, "learning_rate": 4.1805842532449004e-05, "loss": 0.2602, "step": 71780 }, { "epoch": 0.82, "learning_rate": 4.180355940136303e-05, "loss": 0.1847, "step": 71800 }, { "epoch": 0.82, "learning_rate": 4.180127627027706e-05, "loss": 0.1829, "step": 71820 }, { "epoch": 0.82, "learning_rate": 4.1798993139191086e-05, "loss": 0.1281, "step": 71840 }, { "epoch": 0.82, "learning_rate": 4.1796710008105116e-05, "loss": 0.2386, "step": 71860 }, { "epoch": 0.82, "learning_rate": 4.1794426877019145e-05, "loss": 0.3127, "step": 71880 }, { "epoch": 0.82, "learning_rate": 4.1792143745933175e-05, "loss": 0.5943, "step": 71900 }, { "epoch": 0.82, "learning_rate": 4.1789860614847205e-05, "loss": 0.1209, "step": 71920 }, { "epoch": 0.82, "learning_rate": 4.178757748376123e-05, "loss": 0.207, "step": 71940 }, { "epoch": 0.82, "learning_rate": 4.178529435267526e-05, "loss": 0.3329, "step": 71960 }, { "epoch": 0.82, "learning_rate": 4.1783011221589294e-05, "loss": 0.152, "step": 71980 }, { "epoch": 0.82, "learning_rate": 4.178072809050332e-05, "loss": 0.1155, "step": 72000 }, { "epoch": 0.82, "learning_rate": 4.1778444959417346e-05, "loss": 0.1639, "step": 72020 }, { "epoch": 0.82, "learning_rate": 4.1776161828331376e-05, "loss": 0.6307, "step": 72040 }, { "epoch": 0.82, "learning_rate": 4.1773878697245406e-05, "loss": 0.2756, "step": 72060 }, { "epoch": 0.82, "learning_rate": 4.1771595566159436e-05, "loss": 0.1047, "step": 72080 }, { "epoch": 0.82, "learning_rate": 4.176931243507346e-05, "loss": 0.1272, "step": 72100 }, { "epoch": 0.82, "learning_rate": 4.1767029303987495e-05, "loss": 0.0474, "step": 72120 }, { "epoch": 0.82, "learning_rate": 4.176474617290152e-05, "loss": 0.1976, "step": 72140 }, { "epoch": 0.82, "learning_rate": 4.176246304181555e-05, "loss": 0.2103, "step": 72160 }, { "epoch": 0.82, "learning_rate": 4.176017991072958e-05, "loss": 0.3277, "step": 72180 }, { "epoch": 0.82, "learning_rate": 4.175789677964361e-05, "loss": 0.1125, "step": 72200 }, { "epoch": 0.82, "learning_rate": 4.1755613648557637e-05, "loss": 0.2741, "step": 72220 }, { "epoch": 0.82, "learning_rate": 4.175333051747166e-05, "loss": 0.1021, "step": 72240 }, { "epoch": 0.82, "learning_rate": 4.175104738638569e-05, "loss": 0.2468, "step": 72260 }, { "epoch": 0.83, "learning_rate": 4.174876425529972e-05, "loss": 0.1739, "step": 72280 }, { "epoch": 0.83, "learning_rate": 4.174648112421375e-05, "loss": 0.1638, "step": 72300 }, { "epoch": 0.83, "learning_rate": 4.174419799312778e-05, "loss": 0.219, "step": 72320 }, { "epoch": 0.83, "learning_rate": 4.174191486204181e-05, "loss": 0.1998, "step": 72340 }, { "epoch": 0.83, "learning_rate": 4.173963173095584e-05, "loss": 0.1314, "step": 72360 }, { "epoch": 0.83, "learning_rate": 4.173734859986986e-05, "loss": 0.2287, "step": 72380 }, { "epoch": 0.83, "learning_rate": 4.173506546878389e-05, "loss": 0.1263, "step": 72400 }, { "epoch": 0.83, "learning_rate": 4.1732782337697927e-05, "loss": 0.0537, "step": 72420 }, { "epoch": 0.83, "learning_rate": 4.173049920661195e-05, "loss": 0.2221, "step": 72440 }, { "epoch": 0.83, "learning_rate": 4.172821607552598e-05, "loss": 0.1495, "step": 72460 }, { "epoch": 0.83, "learning_rate": 4.172593294444e-05, "loss": 0.3079, "step": 72480 }, { "epoch": 0.83, "learning_rate": 4.172364981335404e-05, "loss": 0.1837, "step": 72500 }, { "epoch": 0.83, "learning_rate": 4.172136668226807e-05, "loss": 0.0455, "step": 72520 }, { "epoch": 0.83, "learning_rate": 4.171908355118209e-05, "loss": 0.3836, "step": 72540 }, { "epoch": 0.83, "learning_rate": 4.171680042009612e-05, "loss": 0.4028, "step": 72560 }, { "epoch": 0.83, "learning_rate": 4.171451728901015e-05, "loss": 0.3468, "step": 72580 }, { "epoch": 0.83, "learning_rate": 4.171223415792418e-05, "loss": 0.3525, "step": 72600 }, { "epoch": 0.83, "learning_rate": 4.170995102683821e-05, "loss": 0.3062, "step": 72620 }, { "epoch": 0.83, "learning_rate": 4.170766789575224e-05, "loss": 0.1429, "step": 72640 }, { "epoch": 0.83, "learning_rate": 4.170538476466627e-05, "loss": 0.3352, "step": 72660 }, { "epoch": 0.83, "learning_rate": 4.170310163358029e-05, "loss": 0.0601, "step": 72680 }, { "epoch": 0.83, "learning_rate": 4.170081850249432e-05, "loss": 0.1301, "step": 72700 }, { "epoch": 0.83, "learning_rate": 4.169853537140835e-05, "loss": 0.1745, "step": 72720 }, { "epoch": 0.83, "learning_rate": 4.169625224032238e-05, "loss": 0.1285, "step": 72740 }, { "epoch": 0.83, "learning_rate": 4.169396910923641e-05, "loss": 0.2425, "step": 72760 }, { "epoch": 0.83, "learning_rate": 4.1691685978150434e-05, "loss": 0.424, "step": 72780 }, { "epoch": 0.83, "learning_rate": 4.168940284706447e-05, "loss": 0.232, "step": 72800 }, { "epoch": 0.83, "learning_rate": 4.168711971597849e-05, "loss": 0.5908, "step": 72820 }, { "epoch": 0.83, "learning_rate": 4.168483658489252e-05, "loss": 0.134, "step": 72840 }, { "epoch": 0.83, "learning_rate": 4.168255345380655e-05, "loss": 0.1836, "step": 72860 }, { "epoch": 0.83, "learning_rate": 4.168027032272058e-05, "loss": 0.4484, "step": 72880 }, { "epoch": 0.83, "learning_rate": 4.167798719163461e-05, "loss": 0.0775, "step": 72900 }, { "epoch": 0.83, "learning_rate": 4.1675704060548635e-05, "loss": 0.5633, "step": 72920 }, { "epoch": 0.83, "learning_rate": 4.167342092946267e-05, "loss": 0.1603, "step": 72940 }, { "epoch": 0.83, "learning_rate": 4.1671137798376694e-05, "loss": 0.2131, "step": 72960 }, { "epoch": 0.83, "learning_rate": 4.1668854667290724e-05, "loss": 0.1959, "step": 72980 }, { "epoch": 0.83, "learning_rate": 4.1666571536204754e-05, "loss": 0.0814, "step": 73000 }, { "epoch": 0.83, "learning_rate": 4.166428840511878e-05, "loss": 0.2337, "step": 73020 }, { "epoch": 0.83, "learning_rate": 4.166200527403281e-05, "loss": 0.3066, "step": 73040 }, { "epoch": 0.83, "learning_rate": 4.1659722142946836e-05, "loss": 0.1443, "step": 73060 }, { "epoch": 0.83, "learning_rate": 4.1657439011860865e-05, "loss": 0.0978, "step": 73080 }, { "epoch": 0.83, "learning_rate": 4.16551558807749e-05, "loss": 0.1843, "step": 73100 }, { "epoch": 0.83, "learning_rate": 4.1652872749688925e-05, "loss": 0.1824, "step": 73120 }, { "epoch": 0.83, "learning_rate": 4.1650589618602955e-05, "loss": 0.1087, "step": 73140 }, { "epoch": 0.84, "learning_rate": 4.164830648751698e-05, "loss": 0.37, "step": 73160 }, { "epoch": 0.84, "learning_rate": 4.1646023356431014e-05, "loss": 0.1757, "step": 73180 }, { "epoch": 0.84, "learning_rate": 4.1643740225345044e-05, "loss": 0.1741, "step": 73200 }, { "epoch": 0.84, "learning_rate": 4.1641457094259066e-05, "loss": 0.5237, "step": 73220 }, { "epoch": 0.84, "learning_rate": 4.1639173963173096e-05, "loss": 0.4851, "step": 73240 }, { "epoch": 0.84, "learning_rate": 4.1636890832087126e-05, "loss": 0.1816, "step": 73260 }, { "epoch": 0.84, "learning_rate": 4.1634607701001156e-05, "loss": 0.408, "step": 73280 }, { "epoch": 0.84, "learning_rate": 4.1632324569915185e-05, "loss": 0.3081, "step": 73300 }, { "epoch": 0.84, "learning_rate": 4.1630041438829215e-05, "loss": 0.0843, "step": 73320 }, { "epoch": 0.84, "learning_rate": 4.1627758307743245e-05, "loss": 0.1719, "step": 73340 }, { "epoch": 0.84, "learning_rate": 4.162547517665727e-05, "loss": 0.2168, "step": 73360 }, { "epoch": 0.84, "learning_rate": 4.16231920455713e-05, "loss": 0.2244, "step": 73380 }, { "epoch": 0.84, "learning_rate": 4.162090891448533e-05, "loss": 0.2226, "step": 73400 }, { "epoch": 0.84, "learning_rate": 4.1618625783399357e-05, "loss": 0.3159, "step": 73420 }, { "epoch": 0.84, "learning_rate": 4.1616342652313386e-05, "loss": 0.3057, "step": 73440 }, { "epoch": 0.84, "learning_rate": 4.161405952122741e-05, "loss": 0.173, "step": 73460 }, { "epoch": 0.84, "learning_rate": 4.1611776390141446e-05, "loss": 0.2172, "step": 73480 }, { "epoch": 0.84, "learning_rate": 4.160949325905547e-05, "loss": 0.4254, "step": 73500 }, { "epoch": 0.84, "learning_rate": 4.16072101279695e-05, "loss": 0.2828, "step": 73520 }, { "epoch": 0.84, "learning_rate": 4.160492699688353e-05, "loss": 0.3989, "step": 73540 }, { "epoch": 0.84, "learning_rate": 4.160264386579756e-05, "loss": 0.1672, "step": 73560 }, { "epoch": 0.84, "learning_rate": 4.160036073471159e-05, "loss": 0.0772, "step": 73580 }, { "epoch": 0.84, "learning_rate": 4.159807760362561e-05, "loss": 0.2934, "step": 73600 }, { "epoch": 0.84, "learning_rate": 4.1595794472539647e-05, "loss": 0.208, "step": 73620 }, { "epoch": 0.84, "learning_rate": 4.159351134145367e-05, "loss": 0.3783, "step": 73640 }, { "epoch": 0.84, "learning_rate": 4.15912282103677e-05, "loss": 0.1632, "step": 73660 }, { "epoch": 0.84, "learning_rate": 4.158894507928173e-05, "loss": 0.427, "step": 73680 }, { "epoch": 0.84, "learning_rate": 4.158666194819576e-05, "loss": 0.1873, "step": 73700 }, { "epoch": 0.84, "learning_rate": 4.158437881710979e-05, "loss": 0.2988, "step": 73720 }, { "epoch": 0.84, "learning_rate": 4.158209568602381e-05, "loss": 0.2428, "step": 73740 }, { "epoch": 0.84, "learning_rate": 4.157981255493784e-05, "loss": 0.1569, "step": 73760 }, { "epoch": 0.84, "learning_rate": 4.157752942385188e-05, "loss": 0.2711, "step": 73780 }, { "epoch": 0.84, "learning_rate": 4.15752462927659e-05, "loss": 0.2047, "step": 73800 }, { "epoch": 0.84, "learning_rate": 4.157296316167993e-05, "loss": 0.1651, "step": 73820 }, { "epoch": 0.84, "learning_rate": 4.157068003059395e-05, "loss": 0.112, "step": 73840 }, { "epoch": 0.84, "learning_rate": 4.156839689950799e-05, "loss": 0.2866, "step": 73860 }, { "epoch": 0.84, "learning_rate": 4.156611376842202e-05, "loss": 0.2717, "step": 73880 }, { "epoch": 0.84, "learning_rate": 4.156383063733604e-05, "loss": 0.2136, "step": 73900 }, { "epoch": 0.84, "learning_rate": 4.156154750625008e-05, "loss": 0.2321, "step": 73920 }, { "epoch": 0.84, "learning_rate": 4.15592643751641e-05, "loss": 0.3107, "step": 73940 }, { "epoch": 0.84, "learning_rate": 4.155698124407813e-05, "loss": 0.2731, "step": 73960 }, { "epoch": 0.84, "learning_rate": 4.155469811299216e-05, "loss": 0.3219, "step": 73980 }, { "epoch": 0.84, "learning_rate": 4.155241498190619e-05, "loss": 0.1708, "step": 74000 }, { "epoch": 0.84, "learning_rate": 4.155013185082022e-05, "loss": 0.396, "step": 74020 }, { "epoch": 0.85, "learning_rate": 4.154784871973424e-05, "loss": 0.5085, "step": 74040 }, { "epoch": 0.85, "learning_rate": 4.154556558864827e-05, "loss": 0.1899, "step": 74060 }, { "epoch": 0.85, "learning_rate": 4.15432824575623e-05, "loss": 0.1266, "step": 74080 }, { "epoch": 0.85, "learning_rate": 4.154099932647633e-05, "loss": 0.204, "step": 74100 }, { "epoch": 0.85, "learning_rate": 4.153871619539036e-05, "loss": 0.3244, "step": 74120 }, { "epoch": 0.85, "learning_rate": 4.1536433064304384e-05, "loss": 0.1837, "step": 74140 }, { "epoch": 0.85, "learning_rate": 4.153414993321842e-05, "loss": 0.1276, "step": 74160 }, { "epoch": 0.85, "learning_rate": 4.1531866802132444e-05, "loss": 0.258, "step": 74180 }, { "epoch": 0.85, "learning_rate": 4.1529583671046474e-05, "loss": 0.2824, "step": 74200 }, { "epoch": 0.85, "learning_rate": 4.15273005399605e-05, "loss": 0.3686, "step": 74220 }, { "epoch": 0.85, "learning_rate": 4.152501740887453e-05, "loss": 0.2101, "step": 74240 }, { "epoch": 0.85, "learning_rate": 4.152273427778856e-05, "loss": 0.0843, "step": 74260 }, { "epoch": 0.85, "learning_rate": 4.1520451146702585e-05, "loss": 0.5199, "step": 74280 }, { "epoch": 0.85, "learning_rate": 4.151816801561662e-05, "loss": 0.3118, "step": 74300 }, { "epoch": 0.85, "learning_rate": 4.1515884884530645e-05, "loss": 0.1706, "step": 74320 }, { "epoch": 0.85, "learning_rate": 4.1513601753444675e-05, "loss": 0.1166, "step": 74340 }, { "epoch": 0.85, "learning_rate": 4.1511318622358704e-05, "loss": 0.1283, "step": 74360 }, { "epoch": 0.85, "learning_rate": 4.1509035491272734e-05, "loss": 0.2289, "step": 74380 }, { "epoch": 0.85, "learning_rate": 4.1506752360186764e-05, "loss": 0.1899, "step": 74400 }, { "epoch": 0.85, "learning_rate": 4.1504469229100786e-05, "loss": 0.2428, "step": 74420 }, { "epoch": 0.85, "learning_rate": 4.1502186098014816e-05, "loss": 0.0362, "step": 74440 }, { "epoch": 0.85, "learning_rate": 4.149990296692885e-05, "loss": 0.4753, "step": 74460 }, { "epoch": 0.85, "learning_rate": 4.1497619835842876e-05, "loss": 0.5526, "step": 74480 }, { "epoch": 0.85, "learning_rate": 4.1495336704756905e-05, "loss": 0.2798, "step": 74500 }, { "epoch": 0.85, "learning_rate": 4.1493053573670935e-05, "loss": 0.247, "step": 74520 }, { "epoch": 0.85, "learning_rate": 4.1490770442584965e-05, "loss": 0.1552, "step": 74540 }, { "epoch": 0.85, "learning_rate": 4.1488487311498994e-05, "loss": 0.3896, "step": 74560 }, { "epoch": 0.85, "learning_rate": 4.148620418041302e-05, "loss": 0.1761, "step": 74580 }, { "epoch": 0.85, "learning_rate": 4.1483921049327054e-05, "loss": 0.3334, "step": 74600 }, { "epoch": 0.85, "learning_rate": 4.1481637918241077e-05, "loss": 0.3885, "step": 74620 }, { "epoch": 0.85, "learning_rate": 4.1479354787155106e-05, "loss": 0.1134, "step": 74640 }, { "epoch": 0.85, "learning_rate": 4.1477071656069136e-05, "loss": 0.1032, "step": 74660 }, { "epoch": 0.85, "learning_rate": 4.1474788524983166e-05, "loss": 0.377, "step": 74680 }, { "epoch": 0.85, "learning_rate": 4.1472505393897195e-05, "loss": 0.3051, "step": 74700 }, { "epoch": 0.85, "learning_rate": 4.147022226281122e-05, "loss": 0.3183, "step": 74720 }, { "epoch": 0.85, "learning_rate": 4.146793913172525e-05, "loss": 0.1038, "step": 74740 }, { "epoch": 0.85, "learning_rate": 4.146565600063928e-05, "loss": 0.3008, "step": 74760 }, { "epoch": 0.85, "learning_rate": 4.146337286955331e-05, "loss": 0.2091, "step": 74780 }, { "epoch": 0.85, "learning_rate": 4.146108973846734e-05, "loss": 0.1554, "step": 74800 }, { "epoch": 0.85, "learning_rate": 4.1458806607381367e-05, "loss": 0.3235, "step": 74820 }, { "epoch": 0.85, "learning_rate": 4.1456523476295396e-05, "loss": 0.278, "step": 74840 }, { "epoch": 0.85, "learning_rate": 4.145424034520942e-05, "loss": 0.2111, "step": 74860 }, { "epoch": 0.85, "learning_rate": 4.145195721412345e-05, "loss": 0.0817, "step": 74880 }, { "epoch": 0.86, "learning_rate": 4.144967408303748e-05, "loss": 0.3323, "step": 74900 }, { "epoch": 0.86, "learning_rate": 4.144739095195151e-05, "loss": 0.1254, "step": 74920 }, { "epoch": 0.86, "learning_rate": 4.144510782086554e-05, "loss": 0.1437, "step": 74940 }, { "epoch": 0.86, "learning_rate": 4.144282468977956e-05, "loss": 0.2006, "step": 74960 }, { "epoch": 0.86, "learning_rate": 4.14405415586936e-05, "loss": 0.2457, "step": 74980 }, { "epoch": 0.86, "learning_rate": 4.143825842760763e-05, "loss": 0.2212, "step": 75000 }, { "epoch": 0.86, "learning_rate": 4.143597529652165e-05, "loss": 0.1682, "step": 75020 }, { "epoch": 0.86, "learning_rate": 4.143369216543568e-05, "loss": 0.3171, "step": 75040 }, { "epoch": 0.86, "learning_rate": 4.143140903434971e-05, "loss": 0.2303, "step": 75060 }, { "epoch": 0.86, "learning_rate": 4.142912590326374e-05, "loss": 0.0435, "step": 75080 }, { "epoch": 0.86, "learning_rate": 4.142684277217777e-05, "loss": 0.3691, "step": 75100 }, { "epoch": 0.86, "learning_rate": 4.14245596410918e-05, "loss": 0.121, "step": 75120 }, { "epoch": 0.86, "learning_rate": 4.142227651000583e-05, "loss": 0.182, "step": 75140 }, { "epoch": 0.86, "learning_rate": 4.141999337891985e-05, "loss": 0.3146, "step": 75160 }, { "epoch": 0.86, "learning_rate": 4.141771024783388e-05, "loss": 0.0898, "step": 75180 }, { "epoch": 0.86, "learning_rate": 4.141542711674791e-05, "loss": 0.3065, "step": 75200 }, { "epoch": 0.86, "learning_rate": 4.141314398566194e-05, "loss": 0.2539, "step": 75220 }, { "epoch": 0.86, "learning_rate": 4.141086085457597e-05, "loss": 0.3641, "step": 75240 }, { "epoch": 0.86, "learning_rate": 4.140857772348999e-05, "loss": 0.1679, "step": 75260 }, { "epoch": 0.86, "learning_rate": 4.140629459240403e-05, "loss": 0.3509, "step": 75280 }, { "epoch": 0.86, "learning_rate": 4.140401146131805e-05, "loss": 0.2172, "step": 75300 }, { "epoch": 0.86, "learning_rate": 4.140172833023208e-05, "loss": 0.1657, "step": 75320 }, { "epoch": 0.86, "learning_rate": 4.139944519914611e-05, "loss": 0.2065, "step": 75340 }, { "epoch": 0.86, "learning_rate": 4.139716206806014e-05, "loss": 0.4141, "step": 75360 }, { "epoch": 0.86, "learning_rate": 4.139487893697417e-05, "loss": 0.6588, "step": 75380 }, { "epoch": 0.86, "learning_rate": 4.1392595805888193e-05, "loss": 0.1965, "step": 75400 }, { "epoch": 0.86, "learning_rate": 4.139031267480223e-05, "loss": 0.288, "step": 75420 }, { "epoch": 0.86, "learning_rate": 4.138802954371625e-05, "loss": 0.1468, "step": 75440 }, { "epoch": 0.86, "learning_rate": 4.138574641263028e-05, "loss": 0.3798, "step": 75460 }, { "epoch": 0.86, "learning_rate": 4.138346328154431e-05, "loss": 0.4294, "step": 75480 }, { "epoch": 0.86, "learning_rate": 4.138118015045834e-05, "loss": 0.299, "step": 75500 }, { "epoch": 0.86, "learning_rate": 4.137889701937237e-05, "loss": 0.1638, "step": 75520 }, { "epoch": 0.86, "learning_rate": 4.1376613888286394e-05, "loss": 0.1777, "step": 75540 }, { "epoch": 0.86, "learning_rate": 4.1374330757200424e-05, "loss": 0.3193, "step": 75560 }, { "epoch": 0.86, "learning_rate": 4.137204762611446e-05, "loss": 0.0968, "step": 75580 }, { "epoch": 0.86, "learning_rate": 4.1369764495028484e-05, "loss": 0.1151, "step": 75600 }, { "epoch": 0.86, "learning_rate": 4.136748136394251e-05, "loss": 0.2748, "step": 75620 }, { "epoch": 0.86, "learning_rate": 4.1365198232856536e-05, "loss": 0.2378, "step": 75640 }, { "epoch": 0.86, "learning_rate": 4.136291510177057e-05, "loss": 0.1515, "step": 75660 }, { "epoch": 0.86, "learning_rate": 4.13606319706846e-05, "loss": 0.0988, "step": 75680 }, { "epoch": 0.86, "learning_rate": 4.1358348839598625e-05, "loss": 0.2809, "step": 75700 }, { "epoch": 0.86, "learning_rate": 4.1356065708512655e-05, "loss": 0.4688, "step": 75720 }, { "epoch": 0.86, "learning_rate": 4.1353782577426685e-05, "loss": 0.3019, "step": 75740 }, { "epoch": 0.86, "learning_rate": 4.1351499446340714e-05, "loss": 0.3537, "step": 75760 }, { "epoch": 0.87, "learning_rate": 4.1349216315254744e-05, "loss": 0.2706, "step": 75780 }, { "epoch": 0.87, "learning_rate": 4.1346933184168774e-05, "loss": 0.2137, "step": 75800 }, { "epoch": 0.87, "learning_rate": 4.13446500530828e-05, "loss": 0.1683, "step": 75820 }, { "epoch": 0.87, "learning_rate": 4.1342366921996826e-05, "loss": 0.5623, "step": 75840 }, { "epoch": 0.87, "learning_rate": 4.1340083790910856e-05, "loss": 0.1886, "step": 75860 }, { "epoch": 0.87, "learning_rate": 4.1337800659824886e-05, "loss": 0.3056, "step": 75880 }, { "epoch": 0.87, "learning_rate": 4.1335517528738915e-05, "loss": 0.2742, "step": 75900 }, { "epoch": 0.87, "learning_rate": 4.1333234397652945e-05, "loss": 0.22, "step": 75920 }, { "epoch": 0.87, "learning_rate": 4.133095126656697e-05, "loss": 0.2001, "step": 75940 }, { "epoch": 0.87, "learning_rate": 4.1328668135481004e-05, "loss": 0.3609, "step": 75960 }, { "epoch": 0.87, "learning_rate": 4.132638500439503e-05, "loss": 0.2433, "step": 75980 }, { "epoch": 0.87, "learning_rate": 4.132410187330906e-05, "loss": 0.185, "step": 76000 }, { "epoch": 0.87, "learning_rate": 4.1321818742223087e-05, "loss": 0.122, "step": 76020 }, { "epoch": 0.87, "learning_rate": 4.1319535611137116e-05, "loss": 0.0389, "step": 76040 }, { "epoch": 0.87, "learning_rate": 4.1317252480051146e-05, "loss": 0.2973, "step": 76060 }, { "epoch": 0.87, "learning_rate": 4.131496934896517e-05, "loss": 0.4328, "step": 76080 }, { "epoch": 0.87, "learning_rate": 4.1312686217879205e-05, "loss": 0.4472, "step": 76100 }, { "epoch": 0.87, "learning_rate": 4.131040308679323e-05, "loss": 0.3061, "step": 76120 }, { "epoch": 0.87, "learning_rate": 4.130811995570726e-05, "loss": 0.3169, "step": 76140 }, { "epoch": 0.87, "learning_rate": 4.130583682462129e-05, "loss": 0.1582, "step": 76160 }, { "epoch": 0.87, "learning_rate": 4.130355369353532e-05, "loss": 0.1668, "step": 76180 }, { "epoch": 0.87, "learning_rate": 4.130127056244935e-05, "loss": 0.2583, "step": 76200 }, { "epoch": 0.87, "learning_rate": 4.129898743136337e-05, "loss": 0.228, "step": 76220 }, { "epoch": 0.87, "learning_rate": 4.12967043002774e-05, "loss": 0.2873, "step": 76240 }, { "epoch": 0.87, "learning_rate": 4.1294421169191436e-05, "loss": 0.2081, "step": 76260 }, { "epoch": 0.87, "learning_rate": 4.129213803810546e-05, "loss": 0.4539, "step": 76280 }, { "epoch": 0.87, "learning_rate": 4.128985490701949e-05, "loss": 0.2025, "step": 76300 }, { "epoch": 0.87, "learning_rate": 4.128757177593351e-05, "loss": 0.2981, "step": 76320 }, { "epoch": 0.87, "learning_rate": 4.128528864484755e-05, "loss": 0.0979, "step": 76340 }, { "epoch": 0.87, "learning_rate": 4.128300551376158e-05, "loss": 0.2505, "step": 76360 }, { "epoch": 0.87, "learning_rate": 4.12807223826756e-05, "loss": 0.0988, "step": 76380 }, { "epoch": 0.87, "learning_rate": 4.127843925158964e-05, "loss": 0.4123, "step": 76400 }, { "epoch": 0.87, "learning_rate": 4.127615612050366e-05, "loss": 0.7099, "step": 76420 }, { "epoch": 0.87, "learning_rate": 4.127387298941769e-05, "loss": 0.1434, "step": 76440 }, { "epoch": 0.87, "learning_rate": 4.127158985833172e-05, "loss": 0.2312, "step": 76460 }, { "epoch": 0.87, "learning_rate": 4.126930672724575e-05, "loss": 0.3346, "step": 76480 }, { "epoch": 0.87, "learning_rate": 4.126702359615978e-05, "loss": 0.1377, "step": 76500 }, { "epoch": 0.87, "learning_rate": 4.12647404650738e-05, "loss": 0.1388, "step": 76520 }, { "epoch": 0.87, "learning_rate": 4.126245733398783e-05, "loss": 0.1023, "step": 76540 }, { "epoch": 0.87, "learning_rate": 4.126017420290186e-05, "loss": 0.3312, "step": 76560 }, { "epoch": 0.87, "learning_rate": 4.125789107181589e-05, "loss": 0.1598, "step": 76580 }, { "epoch": 0.87, "learning_rate": 4.125560794072992e-05, "loss": 0.1927, "step": 76600 }, { "epoch": 0.87, "learning_rate": 4.125332480964394e-05, "loss": 0.3559, "step": 76620 }, { "epoch": 0.87, "learning_rate": 4.125104167855798e-05, "loss": 0.1069, "step": 76640 }, { "epoch": 0.88, "learning_rate": 4.1248758547472e-05, "loss": 0.0688, "step": 76660 }, { "epoch": 0.88, "learning_rate": 4.124647541638603e-05, "loss": 0.298, "step": 76680 }, { "epoch": 0.88, "learning_rate": 4.124419228530006e-05, "loss": 0.4177, "step": 76700 }, { "epoch": 0.88, "learning_rate": 4.124190915421409e-05, "loss": 0.1948, "step": 76720 }, { "epoch": 0.88, "learning_rate": 4.123962602312812e-05, "loss": 0.2373, "step": 76740 }, { "epoch": 0.88, "learning_rate": 4.1237342892042144e-05, "loss": 0.132, "step": 76760 }, { "epoch": 0.88, "learning_rate": 4.123505976095618e-05, "loss": 0.1369, "step": 76780 }, { "epoch": 0.88, "learning_rate": 4.1232776629870204e-05, "loss": 0.287, "step": 76800 }, { "epoch": 0.88, "learning_rate": 4.123049349878423e-05, "loss": 0.2554, "step": 76820 }, { "epoch": 0.88, "learning_rate": 4.122821036769826e-05, "loss": 0.3586, "step": 76840 }, { "epoch": 0.88, "learning_rate": 4.122592723661229e-05, "loss": 0.4717, "step": 76860 }, { "epoch": 0.88, "learning_rate": 4.122364410552632e-05, "loss": 0.4372, "step": 76880 }, { "epoch": 0.88, "learning_rate": 4.1221360974440345e-05, "loss": 0.1574, "step": 76900 }, { "epoch": 0.88, "learning_rate": 4.1219077843354375e-05, "loss": 0.1398, "step": 76920 }, { "epoch": 0.88, "learning_rate": 4.121679471226841e-05, "loss": 0.1522, "step": 76940 }, { "epoch": 0.88, "learning_rate": 4.1214511581182434e-05, "loss": 0.1009, "step": 76960 }, { "epoch": 0.88, "learning_rate": 4.1212228450096464e-05, "loss": 0.2098, "step": 76980 }, { "epoch": 0.88, "learning_rate": 4.1209945319010494e-05, "loss": 0.4325, "step": 77000 }, { "epoch": 0.88, "learning_rate": 4.120766218792452e-05, "loss": 0.3158, "step": 77020 }, { "epoch": 0.88, "learning_rate": 4.120537905683855e-05, "loss": 0.8798, "step": 77040 }, { "epoch": 0.88, "learning_rate": 4.1203095925752576e-05, "loss": 0.1091, "step": 77060 }, { "epoch": 0.88, "learning_rate": 4.120081279466661e-05, "loss": 0.0652, "step": 77080 }, { "epoch": 0.88, "learning_rate": 4.1198529663580635e-05, "loss": 0.288, "step": 77100 }, { "epoch": 0.88, "learning_rate": 4.1196246532494665e-05, "loss": 0.1724, "step": 77120 }, { "epoch": 0.88, "learning_rate": 4.1193963401408695e-05, "loss": 0.2559, "step": 77140 }, { "epoch": 0.88, "learning_rate": 4.1191680270322724e-05, "loss": 0.1253, "step": 77160 }, { "epoch": 0.88, "learning_rate": 4.1189397139236754e-05, "loss": 0.4757, "step": 77180 }, { "epoch": 0.88, "learning_rate": 4.118711400815078e-05, "loss": 0.2668, "step": 77200 }, { "epoch": 0.88, "learning_rate": 4.1184830877064807e-05, "loss": 0.3063, "step": 77220 }, { "epoch": 0.88, "learning_rate": 4.1182547745978836e-05, "loss": 0.2642, "step": 77240 }, { "epoch": 0.88, "learning_rate": 4.1180264614892866e-05, "loss": 0.1643, "step": 77260 }, { "epoch": 0.88, "learning_rate": 4.1177981483806896e-05, "loss": 0.1807, "step": 77280 }, { "epoch": 0.88, "learning_rate": 4.1175698352720925e-05, "loss": 0.025, "step": 77300 }, { "epoch": 0.88, "learning_rate": 4.1173415221634955e-05, "loss": 0.2563, "step": 77320 }, { "epoch": 0.88, "learning_rate": 4.117113209054898e-05, "loss": 0.2184, "step": 77340 }, { "epoch": 0.88, "learning_rate": 4.116884895946301e-05, "loss": 0.1133, "step": 77360 }, { "epoch": 0.88, "learning_rate": 4.116656582837704e-05, "loss": 0.2656, "step": 77380 }, { "epoch": 0.88, "learning_rate": 4.116428269729107e-05, "loss": 0.3189, "step": 77400 }, { "epoch": 0.88, "learning_rate": 4.1161999566205097e-05, "loss": 0.2278, "step": 77420 }, { "epoch": 0.88, "learning_rate": 4.115971643511912e-05, "loss": 0.1332, "step": 77440 }, { "epoch": 0.88, "learning_rate": 4.1157433304033156e-05, "loss": 0.1641, "step": 77460 }, { "epoch": 0.88, "learning_rate": 4.115515017294718e-05, "loss": 0.2528, "step": 77480 }, { "epoch": 0.88, "learning_rate": 4.115286704186121e-05, "loss": 0.1581, "step": 77500 }, { "epoch": 0.88, "learning_rate": 4.115058391077524e-05, "loss": 0.3321, "step": 77520 }, { "epoch": 0.89, "learning_rate": 4.114830077968927e-05, "loss": 0.1186, "step": 77540 }, { "epoch": 0.89, "learning_rate": 4.11460176486033e-05, "loss": 0.1505, "step": 77560 }, { "epoch": 0.89, "learning_rate": 4.114373451751732e-05, "loss": 0.2427, "step": 77580 }, { "epoch": 0.89, "learning_rate": 4.114145138643136e-05, "loss": 0.1916, "step": 77600 }, { "epoch": 0.89, "learning_rate": 4.113916825534539e-05, "loss": 0.217, "step": 77620 }, { "epoch": 0.89, "learning_rate": 4.113688512425941e-05, "loss": 0.1533, "step": 77640 }, { "epoch": 0.89, "learning_rate": 4.113460199317344e-05, "loss": 0.1977, "step": 77660 }, { "epoch": 0.89, "learning_rate": 4.113231886208747e-05, "loss": 0.1918, "step": 77680 }, { "epoch": 0.89, "learning_rate": 4.11300357310015e-05, "loss": 0.1945, "step": 77700 }, { "epoch": 0.89, "learning_rate": 4.112775259991553e-05, "loss": 0.2191, "step": 77720 }, { "epoch": 0.89, "learning_rate": 4.112546946882955e-05, "loss": 0.1507, "step": 77740 }, { "epoch": 0.89, "learning_rate": 4.112318633774359e-05, "loss": 0.1921, "step": 77760 }, { "epoch": 0.89, "learning_rate": 4.112090320665761e-05, "loss": 0.1106, "step": 77780 }, { "epoch": 0.89, "learning_rate": 4.111862007557164e-05, "loss": 0.1838, "step": 77800 }, { "epoch": 0.89, "learning_rate": 4.111633694448567e-05, "loss": 0.2184, "step": 77820 }, { "epoch": 0.89, "learning_rate": 4.11140538133997e-05, "loss": 0.0949, "step": 77840 }, { "epoch": 0.89, "learning_rate": 4.111177068231373e-05, "loss": 0.2063, "step": 77860 }, { "epoch": 0.89, "learning_rate": 4.110948755122775e-05, "loss": 0.0536, "step": 77880 }, { "epoch": 0.89, "learning_rate": 4.110720442014178e-05, "loss": 0.2594, "step": 77900 }, { "epoch": 0.89, "learning_rate": 4.110492128905581e-05, "loss": 0.0605, "step": 77920 }, { "epoch": 0.89, "learning_rate": 4.110263815796984e-05, "loss": 0.1506, "step": 77940 }, { "epoch": 0.89, "learning_rate": 4.110035502688387e-05, "loss": 0.1717, "step": 77960 }, { "epoch": 0.89, "learning_rate": 4.10980718957979e-05, "loss": 0.2772, "step": 77980 }, { "epoch": 0.89, "learning_rate": 4.109578876471193e-05, "loss": 0.2217, "step": 78000 }, { "epoch": 0.89, "learning_rate": 4.109350563362595e-05, "loss": 0.3997, "step": 78020 }, { "epoch": 0.89, "learning_rate": 4.109122250253998e-05, "loss": 0.0984, "step": 78040 }, { "epoch": 0.89, "learning_rate": 4.108893937145402e-05, "loss": 0.2188, "step": 78060 }, { "epoch": 0.89, "learning_rate": 4.108665624036804e-05, "loss": 0.2811, "step": 78080 }, { "epoch": 0.89, "learning_rate": 4.108437310928207e-05, "loss": 0.3436, "step": 78100 }, { "epoch": 0.89, "learning_rate": 4.1082089978196095e-05, "loss": 0.1424, "step": 78120 }, { "epoch": 0.89, "learning_rate": 4.107980684711013e-05, "loss": 0.1111, "step": 78140 }, { "epoch": 0.89, "learning_rate": 4.107752371602416e-05, "loss": 0.1775, "step": 78160 }, { "epoch": 0.89, "learning_rate": 4.1075240584938184e-05, "loss": 0.2657, "step": 78180 }, { "epoch": 0.89, "learning_rate": 4.1072957453852214e-05, "loss": 0.3297, "step": 78200 }, { "epoch": 0.89, "learning_rate": 4.107067432276624e-05, "loss": 0.3545, "step": 78220 }, { "epoch": 0.89, "learning_rate": 4.106839119168027e-05, "loss": 0.1383, "step": 78240 }, { "epoch": 0.89, "learning_rate": 4.10661080605943e-05, "loss": 0.464, "step": 78260 }, { "epoch": 0.89, "learning_rate": 4.106382492950833e-05, "loss": 0.0877, "step": 78280 }, { "epoch": 0.89, "learning_rate": 4.106154179842236e-05, "loss": 0.2048, "step": 78300 }, { "epoch": 0.89, "learning_rate": 4.1059258667336385e-05, "loss": 0.5301, "step": 78320 }, { "epoch": 0.89, "learning_rate": 4.1056975536250415e-05, "loss": 0.3248, "step": 78340 }, { "epoch": 0.89, "learning_rate": 4.1054692405164444e-05, "loss": 0.1638, "step": 78360 }, { "epoch": 0.89, "learning_rate": 4.1052409274078474e-05, "loss": 0.1771, "step": 78380 }, { "epoch": 0.89, "learning_rate": 4.1050126142992504e-05, "loss": 0.3089, "step": 78400 }, { "epoch": 0.9, "learning_rate": 4.1047843011906527e-05, "loss": 0.3275, "step": 78420 }, { "epoch": 0.9, "learning_rate": 4.104555988082056e-05, "loss": 0.1556, "step": 78440 }, { "epoch": 0.9, "learning_rate": 4.1043276749734586e-05, "loss": 0.5912, "step": 78460 }, { "epoch": 0.9, "learning_rate": 4.1040993618648616e-05, "loss": 0.1492, "step": 78480 }, { "epoch": 0.9, "learning_rate": 4.1038710487562645e-05, "loss": 0.4337, "step": 78500 }, { "epoch": 0.9, "learning_rate": 4.1036427356476675e-05, "loss": 0.1351, "step": 78520 }, { "epoch": 0.9, "learning_rate": 4.1034144225390705e-05, "loss": 0.3569, "step": 78540 }, { "epoch": 0.9, "learning_rate": 4.103186109430473e-05, "loss": 0.241, "step": 78560 }, { "epoch": 0.9, "learning_rate": 4.1029577963218764e-05, "loss": 0.1886, "step": 78580 }, { "epoch": 0.9, "learning_rate": 4.102729483213279e-05, "loss": 0.226, "step": 78600 }, { "epoch": 0.9, "learning_rate": 4.1025011701046817e-05, "loss": 0.2333, "step": 78620 }, { "epoch": 0.9, "learning_rate": 4.1022728569960846e-05, "loss": 0.2988, "step": 78640 }, { "epoch": 0.9, "learning_rate": 4.1020445438874876e-05, "loss": 0.1725, "step": 78660 }, { "epoch": 0.9, "learning_rate": 4.1018162307788906e-05, "loss": 0.4181, "step": 78680 }, { "epoch": 0.9, "learning_rate": 4.101587917670293e-05, "loss": 0.1274, "step": 78700 }, { "epoch": 0.9, "learning_rate": 4.101359604561696e-05, "loss": 0.1362, "step": 78720 }, { "epoch": 0.9, "learning_rate": 4.1011312914530995e-05, "loss": 0.3766, "step": 78740 }, { "epoch": 0.9, "learning_rate": 4.100902978344502e-05, "loss": 0.3366, "step": 78760 }, { "epoch": 0.9, "learning_rate": 4.100674665235905e-05, "loss": 0.0721, "step": 78780 }, { "epoch": 0.9, "learning_rate": 4.100446352127307e-05, "loss": 0.1742, "step": 78800 }, { "epoch": 0.9, "learning_rate": 4.100218039018711e-05, "loss": 0.1568, "step": 78820 }, { "epoch": 0.9, "learning_rate": 4.0999897259101136e-05, "loss": 0.2908, "step": 78840 }, { "epoch": 0.9, "learning_rate": 4.099761412801516e-05, "loss": 0.216, "step": 78860 }, { "epoch": 0.9, "learning_rate": 4.0995330996929196e-05, "loss": 0.1503, "step": 78880 }, { "epoch": 0.9, "learning_rate": 4.099304786584322e-05, "loss": 0.1044, "step": 78900 }, { "epoch": 0.9, "learning_rate": 4.099076473475725e-05, "loss": 0.1455, "step": 78920 }, { "epoch": 0.9, "learning_rate": 4.098848160367128e-05, "loss": 0.4213, "step": 78940 }, { "epoch": 0.9, "learning_rate": 4.098619847258531e-05, "loss": 0.0841, "step": 78960 }, { "epoch": 0.9, "learning_rate": 4.098391534149934e-05, "loss": 0.404, "step": 78980 }, { "epoch": 0.9, "learning_rate": 4.098163221041336e-05, "loss": 0.152, "step": 79000 }, { "epoch": 0.9, "learning_rate": 4.097934907932739e-05, "loss": 0.6016, "step": 79020 }, { "epoch": 0.9, "learning_rate": 4.097706594824142e-05, "loss": 0.0151, "step": 79040 }, { "epoch": 0.9, "learning_rate": 4.097478281715545e-05, "loss": 0.1603, "step": 79060 }, { "epoch": 0.9, "learning_rate": 4.097249968606948e-05, "loss": 0.1132, "step": 79080 }, { "epoch": 0.9, "learning_rate": 4.09702165549835e-05, "loss": 0.1228, "step": 79100 }, { "epoch": 0.9, "learning_rate": 4.096793342389754e-05, "loss": 0.3116, "step": 79120 }, { "epoch": 0.9, "learning_rate": 4.096565029281156e-05, "loss": 0.3063, "step": 79140 }, { "epoch": 0.9, "learning_rate": 4.096336716172559e-05, "loss": 0.1042, "step": 79160 }, { "epoch": 0.9, "learning_rate": 4.096108403063962e-05, "loss": 0.1618, "step": 79180 }, { "epoch": 0.9, "learning_rate": 4.095880089955365e-05, "loss": 0.5133, "step": 79200 }, { "epoch": 0.9, "learning_rate": 4.095651776846768e-05, "loss": 0.0927, "step": 79220 }, { "epoch": 0.9, "learning_rate": 4.09542346373817e-05, "loss": 0.109, "step": 79240 }, { "epoch": 0.9, "learning_rate": 4.095195150629574e-05, "loss": 0.125, "step": 79260 }, { "epoch": 0.91, "learning_rate": 4.094966837520976e-05, "loss": 0.3549, "step": 79280 }, { "epoch": 0.91, "learning_rate": 4.094738524412379e-05, "loss": 0.1826, "step": 79300 }, { "epoch": 0.91, "learning_rate": 4.094510211303782e-05, "loss": 0.2821, "step": 79320 }, { "epoch": 0.91, "learning_rate": 4.094281898195185e-05, "loss": 0.2366, "step": 79340 }, { "epoch": 0.91, "learning_rate": 4.094053585086588e-05, "loss": 0.1813, "step": 79360 }, { "epoch": 0.91, "learning_rate": 4.0938252719779904e-05, "loss": 0.3153, "step": 79380 }, { "epoch": 0.91, "learning_rate": 4.0935969588693934e-05, "loss": 0.2585, "step": 79400 }, { "epoch": 0.91, "learning_rate": 4.093368645760797e-05, "loss": 0.1479, "step": 79420 }, { "epoch": 0.91, "learning_rate": 4.093140332652199e-05, "loss": 0.2293, "step": 79440 }, { "epoch": 0.91, "learning_rate": 4.092912019543602e-05, "loss": 0.2843, "step": 79460 }, { "epoch": 0.91, "learning_rate": 4.092683706435005e-05, "loss": 0.2884, "step": 79480 }, { "epoch": 0.91, "learning_rate": 4.092455393326408e-05, "loss": 0.2025, "step": 79500 }, { "epoch": 0.91, "learning_rate": 4.092227080217811e-05, "loss": 0.0665, "step": 79520 }, { "epoch": 0.91, "learning_rate": 4.0919987671092135e-05, "loss": 0.3229, "step": 79540 }, { "epoch": 0.91, "learning_rate": 4.091770454000617e-05, "loss": 0.2705, "step": 79560 }, { "epoch": 0.91, "learning_rate": 4.0915421408920194e-05, "loss": 0.5607, "step": 79580 }, { "epoch": 0.91, "learning_rate": 4.0913138277834224e-05, "loss": 0.4711, "step": 79600 }, { "epoch": 0.91, "learning_rate": 4.091085514674825e-05, "loss": 0.0872, "step": 79620 }, { "epoch": 0.91, "learning_rate": 4.090857201566228e-05, "loss": 0.087, "step": 79640 }, { "epoch": 0.91, "learning_rate": 4.090628888457631e-05, "loss": 0.3834, "step": 79660 }, { "epoch": 0.91, "learning_rate": 4.0904005753490336e-05, "loss": 0.162, "step": 79680 }, { "epoch": 0.91, "learning_rate": 4.0901722622404365e-05, "loss": 0.242, "step": 79700 }, { "epoch": 0.91, "learning_rate": 4.0899439491318395e-05, "loss": 0.2433, "step": 79720 }, { "epoch": 0.91, "learning_rate": 4.0897156360232425e-05, "loss": 0.2541, "step": 79740 }, { "epoch": 0.91, "learning_rate": 4.0894873229146454e-05, "loss": 0.3393, "step": 79760 }, { "epoch": 0.91, "learning_rate": 4.0892590098060484e-05, "loss": 0.3552, "step": 79780 }, { "epoch": 0.91, "learning_rate": 4.0890306966974514e-05, "loss": 0.2059, "step": 79800 }, { "epoch": 0.91, "learning_rate": 4.0888023835888537e-05, "loss": 0.3088, "step": 79820 }, { "epoch": 0.91, "learning_rate": 4.0885740704802566e-05, "loss": 0.3433, "step": 79840 }, { "epoch": 0.91, "learning_rate": 4.0883457573716596e-05, "loss": 0.3447, "step": 79860 }, { "epoch": 0.91, "learning_rate": 4.0881174442630626e-05, "loss": 0.1279, "step": 79880 }, { "epoch": 0.91, "learning_rate": 4.0878891311544655e-05, "loss": 0.1864, "step": 79900 }, { "epoch": 0.91, "learning_rate": 4.087660818045868e-05, "loss": 0.3501, "step": 79920 }, { "epoch": 0.91, "learning_rate": 4.0874325049372715e-05, "loss": 0.1271, "step": 79940 }, { "epoch": 0.91, "learning_rate": 4.087204191828674e-05, "loss": 0.2952, "step": 79960 }, { "epoch": 0.91, "learning_rate": 4.086975878720077e-05, "loss": 0.1417, "step": 79980 }, { "epoch": 0.91, "learning_rate": 4.08674756561148e-05, "loss": 0.2658, "step": 80000 }, { "epoch": 0.91, "learning_rate": 4.086519252502883e-05, "loss": 0.1036, "step": 80020 }, { "epoch": 0.91, "learning_rate": 4.0862909393942856e-05, "loss": 0.1001, "step": 80040 }, { "epoch": 0.91, "learning_rate": 4.086062626285688e-05, "loss": 0.0518, "step": 80060 }, { "epoch": 0.91, "learning_rate": 4.0858343131770916e-05, "loss": 0.1893, "step": 80080 }, { "epoch": 0.91, "learning_rate": 4.0856060000684945e-05, "loss": 0.4506, "step": 80100 }, { "epoch": 0.91, "learning_rate": 4.085377686959897e-05, "loss": 0.1583, "step": 80120 }, { "epoch": 0.91, "learning_rate": 4.0851493738513e-05, "loss": 0.1489, "step": 80140 }, { "epoch": 0.92, "learning_rate": 4.084921060742703e-05, "loss": 0.1826, "step": 80160 }, { "epoch": 0.92, "learning_rate": 4.084692747634106e-05, "loss": 0.1154, "step": 80180 }, { "epoch": 0.92, "learning_rate": 4.084464434525509e-05, "loss": 0.1688, "step": 80200 }, { "epoch": 0.92, "learning_rate": 4.084236121416911e-05, "loss": 0.1984, "step": 80220 }, { "epoch": 0.92, "learning_rate": 4.0840078083083146e-05, "loss": 0.0937, "step": 80240 }, { "epoch": 0.92, "learning_rate": 4.083779495199717e-05, "loss": 0.3176, "step": 80260 }, { "epoch": 0.92, "learning_rate": 4.08355118209112e-05, "loss": 0.4357, "step": 80280 }, { "epoch": 0.92, "learning_rate": 4.083322868982523e-05, "loss": 0.1849, "step": 80300 }, { "epoch": 0.92, "learning_rate": 4.083094555873926e-05, "loss": 0.1872, "step": 80320 }, { "epoch": 0.92, "learning_rate": 4.082866242765329e-05, "loss": 0.2604, "step": 80340 }, { "epoch": 0.92, "learning_rate": 4.082637929656731e-05, "loss": 0.0486, "step": 80360 }, { "epoch": 0.92, "learning_rate": 4.082409616548134e-05, "loss": 0.2445, "step": 80380 }, { "epoch": 0.92, "learning_rate": 4.082181303439537e-05, "loss": 0.3879, "step": 80400 }, { "epoch": 0.92, "learning_rate": 4.08195299033094e-05, "loss": 0.2993, "step": 80420 }, { "epoch": 0.92, "learning_rate": 4.081724677222343e-05, "loss": 0.2451, "step": 80440 }, { "epoch": 0.92, "learning_rate": 4.081496364113746e-05, "loss": 0.03, "step": 80460 }, { "epoch": 0.92, "learning_rate": 4.081268051005149e-05, "loss": 0.208, "step": 80480 }, { "epoch": 0.92, "learning_rate": 4.081039737896551e-05, "loss": 0.137, "step": 80500 }, { "epoch": 0.92, "learning_rate": 4.080811424787954e-05, "loss": 0.1493, "step": 80520 }, { "epoch": 0.92, "learning_rate": 4.080583111679358e-05, "loss": 0.1474, "step": 80540 }, { "epoch": 0.92, "learning_rate": 4.08035479857076e-05, "loss": 0.1849, "step": 80560 }, { "epoch": 0.92, "learning_rate": 4.080126485462163e-05, "loss": 0.3229, "step": 80580 }, { "epoch": 0.92, "learning_rate": 4.0798981723535654e-05, "loss": 0.5316, "step": 80600 }, { "epoch": 0.92, "learning_rate": 4.079669859244969e-05, "loss": 0.3052, "step": 80620 }, { "epoch": 0.92, "learning_rate": 4.079441546136372e-05, "loss": 0.0852, "step": 80640 }, { "epoch": 0.92, "learning_rate": 4.079213233027774e-05, "loss": 0.1655, "step": 80660 }, { "epoch": 0.92, "learning_rate": 4.078984919919177e-05, "loss": 0.1113, "step": 80680 }, { "epoch": 0.92, "learning_rate": 4.07875660681058e-05, "loss": 0.2113, "step": 80700 }, { "epoch": 0.92, "learning_rate": 4.078528293701983e-05, "loss": 0.1348, "step": 80720 }, { "epoch": 0.92, "learning_rate": 4.078299980593386e-05, "loss": 0.069, "step": 80740 }, { "epoch": 0.92, "learning_rate": 4.078071667484789e-05, "loss": 0.397, "step": 80760 }, { "epoch": 0.92, "learning_rate": 4.077843354376192e-05, "loss": 0.2228, "step": 80780 }, { "epoch": 0.92, "learning_rate": 4.0776150412675944e-05, "loss": 0.2026, "step": 80800 }, { "epoch": 0.92, "learning_rate": 4.077386728158997e-05, "loss": 0.2125, "step": 80820 }, { "epoch": 0.92, "learning_rate": 4.0771584150504e-05, "loss": 0.4391, "step": 80840 }, { "epoch": 0.92, "learning_rate": 4.076930101941803e-05, "loss": 0.4121, "step": 80860 }, { "epoch": 0.92, "learning_rate": 4.076701788833206e-05, "loss": 0.1971, "step": 80880 }, { "epoch": 0.92, "learning_rate": 4.0764734757246085e-05, "loss": 0.1322, "step": 80900 }, { "epoch": 0.92, "learning_rate": 4.076245162616012e-05, "loss": 0.2325, "step": 80920 }, { "epoch": 0.92, "learning_rate": 4.0760168495074145e-05, "loss": 0.1484, "step": 80940 }, { "epoch": 0.92, "learning_rate": 4.0757885363988174e-05, "loss": 0.1118, "step": 80960 }, { "epoch": 0.92, "learning_rate": 4.0755602232902204e-05, "loss": 0.176, "step": 80980 }, { "epoch": 0.92, "learning_rate": 4.0753319101816234e-05, "loss": 0.2869, "step": 81000 }, { "epoch": 0.92, "learning_rate": 4.075103597073026e-05, "loss": 0.2264, "step": 81020 }, { "epoch": 0.93, "learning_rate": 4.0748752839644286e-05, "loss": 0.2995, "step": 81040 }, { "epoch": 0.93, "learning_rate": 4.074646970855832e-05, "loss": 0.1765, "step": 81060 }, { "epoch": 0.93, "learning_rate": 4.0744186577472346e-05, "loss": 0.306, "step": 81080 }, { "epoch": 0.93, "learning_rate": 4.0741903446386375e-05, "loss": 0.1682, "step": 81100 }, { "epoch": 0.93, "learning_rate": 4.0739620315300405e-05, "loss": 0.2382, "step": 81120 }, { "epoch": 0.93, "learning_rate": 4.0737337184214435e-05, "loss": 0.2109, "step": 81140 }, { "epoch": 0.93, "learning_rate": 4.0735054053128464e-05, "loss": 0.0569, "step": 81160 }, { "epoch": 0.93, "learning_rate": 4.073277092204249e-05, "loss": 0.2023, "step": 81180 }, { "epoch": 0.93, "learning_rate": 4.073048779095652e-05, "loss": 0.0953, "step": 81200 }, { "epoch": 0.93, "learning_rate": 4.072820465987055e-05, "loss": 0.6648, "step": 81220 }, { "epoch": 0.93, "learning_rate": 4.0725921528784576e-05, "loss": 0.3965, "step": 81240 }, { "epoch": 0.93, "learning_rate": 4.0723638397698606e-05, "loss": 0.5226, "step": 81260 }, { "epoch": 0.93, "learning_rate": 4.072135526661263e-05, "loss": 0.3292, "step": 81280 }, { "epoch": 0.93, "learning_rate": 4.0719072135526665e-05, "loss": 0.2229, "step": 81300 }, { "epoch": 0.93, "learning_rate": 4.0716789004440695e-05, "loss": 0.1514, "step": 81320 }, { "epoch": 0.93, "learning_rate": 4.071450587335472e-05, "loss": 0.2273, "step": 81340 }, { "epoch": 0.93, "learning_rate": 4.0712222742268754e-05, "loss": 0.0923, "step": 81360 }, { "epoch": 0.93, "learning_rate": 4.070993961118278e-05, "loss": 0.1833, "step": 81380 }, { "epoch": 0.93, "learning_rate": 4.070765648009681e-05, "loss": 0.1905, "step": 81400 }, { "epoch": 0.93, "learning_rate": 4.070537334901084e-05, "loss": 0.1835, "step": 81420 }, { "epoch": 0.93, "learning_rate": 4.0703090217924866e-05, "loss": 0.1733, "step": 81440 }, { "epoch": 0.93, "learning_rate": 4.0700807086838896e-05, "loss": 0.1627, "step": 81460 }, { "epoch": 0.93, "learning_rate": 4.069852395575292e-05, "loss": 0.3341, "step": 81480 }, { "epoch": 0.93, "learning_rate": 4.069624082466695e-05, "loss": 0.0845, "step": 81500 }, { "epoch": 0.93, "learning_rate": 4.069395769358098e-05, "loss": 0.1894, "step": 81520 }, { "epoch": 0.93, "learning_rate": 4.069167456249501e-05, "loss": 0.2863, "step": 81540 }, { "epoch": 0.93, "learning_rate": 4.068939143140904e-05, "loss": 0.1174, "step": 81560 }, { "epoch": 0.93, "learning_rate": 4.068710830032306e-05, "loss": 0.3412, "step": 81580 }, { "epoch": 0.93, "learning_rate": 4.06848251692371e-05, "loss": 0.2614, "step": 81600 }, { "epoch": 0.93, "learning_rate": 4.068254203815112e-05, "loss": 0.143, "step": 81620 }, { "epoch": 0.93, "learning_rate": 4.068025890706515e-05, "loss": 0.1167, "step": 81640 }, { "epoch": 0.93, "learning_rate": 4.067797577597918e-05, "loss": 0.3605, "step": 81660 }, { "epoch": 0.93, "learning_rate": 4.067569264489321e-05, "loss": 0.1281, "step": 81680 }, { "epoch": 0.93, "learning_rate": 4.067340951380724e-05, "loss": 0.1891, "step": 81700 }, { "epoch": 0.93, "learning_rate": 4.067112638272126e-05, "loss": 0.0935, "step": 81720 }, { "epoch": 0.93, "learning_rate": 4.06688432516353e-05, "loss": 0.1558, "step": 81740 }, { "epoch": 0.93, "learning_rate": 4.066656012054932e-05, "loss": 0.1705, "step": 81760 }, { "epoch": 0.93, "learning_rate": 4.066427698946335e-05, "loss": 0.2869, "step": 81780 }, { "epoch": 0.93, "learning_rate": 4.066199385837738e-05, "loss": 0.1929, "step": 81800 }, { "epoch": 0.93, "learning_rate": 4.065971072729141e-05, "loss": 0.2201, "step": 81820 }, { "epoch": 0.93, "learning_rate": 4.065742759620544e-05, "loss": 0.0613, "step": 81840 }, { "epoch": 0.93, "learning_rate": 4.065514446511946e-05, "loss": 0.1631, "step": 81860 }, { "epoch": 0.93, "learning_rate": 4.065286133403349e-05, "loss": 0.3422, "step": 81880 }, { "epoch": 0.93, "learning_rate": 4.065057820294753e-05, "loss": 0.1355, "step": 81900 }, { "epoch": 0.94, "learning_rate": 4.064829507186155e-05, "loss": 0.2454, "step": 81920 }, { "epoch": 0.94, "learning_rate": 4.064601194077558e-05, "loss": 0.3519, "step": 81940 }, { "epoch": 0.94, "learning_rate": 4.064372880968961e-05, "loss": 0.2943, "step": 81960 }, { "epoch": 0.94, "learning_rate": 4.064144567860364e-05, "loss": 0.4517, "step": 81980 }, { "epoch": 0.94, "learning_rate": 4.063916254751767e-05, "loss": 0.2611, "step": 82000 }, { "epoch": 0.94, "learning_rate": 4.063687941643169e-05, "loss": 0.1939, "step": 82020 }, { "epoch": 0.94, "learning_rate": 4.063459628534573e-05, "loss": 0.2668, "step": 82040 }, { "epoch": 0.94, "learning_rate": 4.063231315425975e-05, "loss": 0.1062, "step": 82060 }, { "epoch": 0.94, "learning_rate": 4.063003002317378e-05, "loss": 0.2281, "step": 82080 }, { "epoch": 0.94, "learning_rate": 4.062774689208781e-05, "loss": 0.119, "step": 82100 }, { "epoch": 0.94, "learning_rate": 4.062546376100184e-05, "loss": 0.3257, "step": 82120 }, { "epoch": 0.94, "learning_rate": 4.062318062991587e-05, "loss": 0.2642, "step": 82140 }, { "epoch": 0.94, "learning_rate": 4.0620897498829894e-05, "loss": 0.4408, "step": 82160 }, { "epoch": 0.94, "learning_rate": 4.0618614367743924e-05, "loss": 0.4043, "step": 82180 }, { "epoch": 0.94, "learning_rate": 4.0616331236657954e-05, "loss": 0.1674, "step": 82200 }, { "epoch": 0.94, "learning_rate": 4.061404810557198e-05, "loss": 0.2624, "step": 82220 }, { "epoch": 0.94, "learning_rate": 4.061176497448601e-05, "loss": 0.1302, "step": 82240 }, { "epoch": 0.94, "learning_rate": 4.060948184340004e-05, "loss": 0.2058, "step": 82260 }, { "epoch": 0.94, "learning_rate": 4.060719871231407e-05, "loss": 0.1129, "step": 82280 }, { "epoch": 0.94, "learning_rate": 4.0604915581228095e-05, "loss": 0.1785, "step": 82300 }, { "epoch": 0.94, "learning_rate": 4.0602632450142125e-05, "loss": 0.1328, "step": 82320 }, { "epoch": 0.94, "learning_rate": 4.0600349319056155e-05, "loss": 0.496, "step": 82340 }, { "epoch": 0.94, "learning_rate": 4.0598066187970184e-05, "loss": 0.1513, "step": 82360 }, { "epoch": 0.94, "learning_rate": 4.0595783056884214e-05, "loss": 0.241, "step": 82380 }, { "epoch": 0.94, "learning_rate": 4.059349992579824e-05, "loss": 0.1312, "step": 82400 }, { "epoch": 0.94, "learning_rate": 4.059121679471227e-05, "loss": 0.0595, "step": 82420 }, { "epoch": 0.94, "learning_rate": 4.0588933663626296e-05, "loss": 0.1991, "step": 82440 }, { "epoch": 0.94, "learning_rate": 4.0586650532540326e-05, "loss": 0.2414, "step": 82460 }, { "epoch": 0.94, "learning_rate": 4.0584367401454356e-05, "loss": 0.1521, "step": 82480 }, { "epoch": 0.94, "learning_rate": 4.0582084270368385e-05, "loss": 0.1128, "step": 82500 }, { "epoch": 0.94, "learning_rate": 4.0579801139282415e-05, "loss": 0.1685, "step": 82520 }, { "epoch": 0.94, "learning_rate": 4.057751800819644e-05, "loss": 0.122, "step": 82540 }, { "epoch": 0.94, "learning_rate": 4.0575234877110474e-05, "loss": 0.0359, "step": 82560 }, { "epoch": 0.94, "learning_rate": 4.0572951746024504e-05, "loss": 0.2865, "step": 82580 }, { "epoch": 0.94, "learning_rate": 4.057066861493853e-05, "loss": 0.2351, "step": 82600 }, { "epoch": 0.94, "learning_rate": 4.056838548385256e-05, "loss": 0.4371, "step": 82620 }, { "epoch": 0.94, "learning_rate": 4.0566102352766586e-05, "loss": 0.1134, "step": 82640 }, { "epoch": 0.94, "learning_rate": 4.0563819221680616e-05, "loss": 0.1523, "step": 82660 }, { "epoch": 0.94, "learning_rate": 4.0561536090594646e-05, "loss": 0.284, "step": 82680 }, { "epoch": 0.94, "learning_rate": 4.055925295950867e-05, "loss": 0.1234, "step": 82700 }, { "epoch": 0.94, "learning_rate": 4.0556969828422705e-05, "loss": 0.0702, "step": 82720 }, { "epoch": 0.94, "learning_rate": 4.055468669733673e-05, "loss": 0.24, "step": 82740 }, { "epoch": 0.94, "learning_rate": 4.055240356625076e-05, "loss": 0.1335, "step": 82760 }, { "epoch": 0.94, "learning_rate": 4.055012043516479e-05, "loss": 0.4584, "step": 82780 }, { "epoch": 0.95, "learning_rate": 4.054783730407882e-05, "loss": 0.3157, "step": 82800 }, { "epoch": 0.95, "learning_rate": 4.054555417299285e-05, "loss": 0.313, "step": 82820 }, { "epoch": 0.95, "learning_rate": 4.054327104190687e-05, "loss": 0.1856, "step": 82840 }, { "epoch": 0.95, "learning_rate": 4.05409879108209e-05, "loss": 0.3649, "step": 82860 }, { "epoch": 0.95, "learning_rate": 4.053870477973493e-05, "loss": 0.1596, "step": 82880 }, { "epoch": 0.95, "learning_rate": 4.053642164864896e-05, "loss": 0.3059, "step": 82900 }, { "epoch": 0.95, "learning_rate": 4.053413851756299e-05, "loss": 0.1328, "step": 82920 }, { "epoch": 0.95, "learning_rate": 4.053185538647702e-05, "loss": 0.2201, "step": 82940 }, { "epoch": 0.95, "learning_rate": 4.052957225539105e-05, "loss": 0.4139, "step": 82960 }, { "epoch": 0.95, "learning_rate": 4.052728912430507e-05, "loss": 0.0847, "step": 82980 }, { "epoch": 0.95, "learning_rate": 4.05250059932191e-05, "loss": 0.2374, "step": 83000 }, { "epoch": 0.95, "learning_rate": 4.052272286213314e-05, "loss": 0.2921, "step": 83020 }, { "epoch": 0.95, "learning_rate": 4.052043973104716e-05, "loss": 0.09, "step": 83040 }, { "epoch": 0.95, "learning_rate": 4.051815659996119e-05, "loss": 0.0507, "step": 83060 }, { "epoch": 0.95, "learning_rate": 4.051587346887521e-05, "loss": 0.1784, "step": 83080 }, { "epoch": 0.95, "learning_rate": 4.051359033778925e-05, "loss": 0.5294, "step": 83100 }, { "epoch": 0.95, "learning_rate": 4.051130720670328e-05, "loss": 0.2185, "step": 83120 }, { "epoch": 0.95, "learning_rate": 4.05090240756173e-05, "loss": 0.3108, "step": 83140 }, { "epoch": 0.95, "learning_rate": 4.050674094453133e-05, "loss": 0.1718, "step": 83160 }, { "epoch": 0.95, "learning_rate": 4.050445781344536e-05, "loss": 0.2061, "step": 83180 }, { "epoch": 0.95, "learning_rate": 4.050217468235939e-05, "loss": 0.1601, "step": 83200 }, { "epoch": 0.95, "learning_rate": 4.049989155127342e-05, "loss": 0.3125, "step": 83220 }, { "epoch": 0.95, "learning_rate": 4.049760842018745e-05, "loss": 0.3264, "step": 83240 }, { "epoch": 0.95, "learning_rate": 4.049532528910148e-05, "loss": 0.1834, "step": 83260 }, { "epoch": 0.95, "learning_rate": 4.04930421580155e-05, "loss": 0.153, "step": 83280 }, { "epoch": 0.95, "learning_rate": 4.049075902692953e-05, "loss": 0.2075, "step": 83300 }, { "epoch": 0.95, "learning_rate": 4.048847589584356e-05, "loss": 0.1766, "step": 83320 }, { "epoch": 0.95, "learning_rate": 4.048619276475759e-05, "loss": 0.1814, "step": 83340 }, { "epoch": 0.95, "learning_rate": 4.048390963367162e-05, "loss": 0.2249, "step": 83360 }, { "epoch": 0.95, "learning_rate": 4.0481626502585644e-05, "loss": 0.1507, "step": 83380 }, { "epoch": 0.95, "learning_rate": 4.047934337149968e-05, "loss": 0.1683, "step": 83400 }, { "epoch": 0.95, "learning_rate": 4.04770602404137e-05, "loss": 0.1281, "step": 83420 }, { "epoch": 0.95, "learning_rate": 4.047477710932773e-05, "loss": 0.1281, "step": 83440 }, { "epoch": 0.95, "learning_rate": 4.047249397824176e-05, "loss": 0.199, "step": 83460 }, { "epoch": 0.95, "learning_rate": 4.047021084715579e-05, "loss": 0.3118, "step": 83480 }, { "epoch": 0.95, "learning_rate": 4.046792771606982e-05, "loss": 0.1722, "step": 83500 }, { "epoch": 0.95, "learning_rate": 4.0465644584983845e-05, "loss": 0.3391, "step": 83520 }, { "epoch": 0.95, "learning_rate": 4.046336145389788e-05, "loss": 0.2174, "step": 83540 }, { "epoch": 0.95, "learning_rate": 4.0461078322811904e-05, "loss": 0.1519, "step": 83560 }, { "epoch": 0.95, "learning_rate": 4.0458795191725934e-05, "loss": 0.3122, "step": 83580 }, { "epoch": 0.95, "learning_rate": 4.0456512060639964e-05, "loss": 0.1788, "step": 83600 }, { "epoch": 0.95, "learning_rate": 4.045422892955399e-05, "loss": 0.2587, "step": 83620 }, { "epoch": 0.95, "learning_rate": 4.045194579846802e-05, "loss": 0.1847, "step": 83640 }, { "epoch": 0.96, "learning_rate": 4.0449662667382046e-05, "loss": 0.2185, "step": 83660 }, { "epoch": 0.96, "learning_rate": 4.0447379536296076e-05, "loss": 0.3046, "step": 83680 }, { "epoch": 0.96, "learning_rate": 4.044509640521011e-05, "loss": 0.1244, "step": 83700 }, { "epoch": 0.96, "learning_rate": 4.0442813274124135e-05, "loss": 0.1247, "step": 83720 }, { "epoch": 0.96, "learning_rate": 4.0440530143038165e-05, "loss": 0.2098, "step": 83740 }, { "epoch": 0.96, "learning_rate": 4.043824701195219e-05, "loss": 0.2144, "step": 83760 }, { "epoch": 0.96, "learning_rate": 4.0435963880866224e-05, "loss": 0.3615, "step": 83780 }, { "epoch": 0.96, "learning_rate": 4.0433680749780254e-05, "loss": 0.2011, "step": 83800 }, { "epoch": 0.96, "learning_rate": 4.043139761869428e-05, "loss": 0.1745, "step": 83820 }, { "epoch": 0.96, "learning_rate": 4.042911448760831e-05, "loss": 0.219, "step": 83840 }, { "epoch": 0.96, "learning_rate": 4.0426831356522336e-05, "loss": 0.5542, "step": 83860 }, { "epoch": 0.96, "learning_rate": 4.0424548225436366e-05, "loss": 0.1333, "step": 83880 }, { "epoch": 0.96, "learning_rate": 4.0422265094350395e-05, "loss": 0.5166, "step": 83900 }, { "epoch": 0.96, "learning_rate": 4.0419981963264425e-05, "loss": 0.1629, "step": 83920 }, { "epoch": 0.96, "learning_rate": 4.0417698832178455e-05, "loss": 0.2422, "step": 83940 }, { "epoch": 0.96, "learning_rate": 4.041541570109248e-05, "loss": 0.0544, "step": 83960 }, { "epoch": 0.96, "learning_rate": 4.041313257000651e-05, "loss": 0.1508, "step": 83980 }, { "epoch": 0.96, "learning_rate": 4.041084943892054e-05, "loss": 0.7907, "step": 84000 }, { "epoch": 0.96, "learning_rate": 4.040856630783457e-05, "loss": 0.0698, "step": 84020 }, { "epoch": 0.96, "learning_rate": 4.0406283176748596e-05, "loss": 0.3014, "step": 84040 }, { "epoch": 0.96, "learning_rate": 4.040400004566262e-05, "loss": 0.219, "step": 84060 }, { "epoch": 0.96, "learning_rate": 4.0401716914576656e-05, "loss": 0.1973, "step": 84080 }, { "epoch": 0.96, "learning_rate": 4.039943378349068e-05, "loss": 0.0994, "step": 84100 }, { "epoch": 0.96, "learning_rate": 4.039715065240471e-05, "loss": 0.1368, "step": 84120 }, { "epoch": 0.96, "learning_rate": 4.039486752131874e-05, "loss": 0.3595, "step": 84140 }, { "epoch": 0.96, "learning_rate": 4.039258439023277e-05, "loss": 0.085, "step": 84160 }, { "epoch": 0.96, "learning_rate": 4.03903012591468e-05, "loss": 0.2438, "step": 84180 }, { "epoch": 0.96, "learning_rate": 4.038801812806082e-05, "loss": 0.1556, "step": 84200 }, { "epoch": 0.96, "learning_rate": 4.038573499697486e-05, "loss": 0.2957, "step": 84220 }, { "epoch": 0.96, "learning_rate": 4.038345186588888e-05, "loss": 0.3353, "step": 84240 }, { "epoch": 0.96, "learning_rate": 4.038116873480291e-05, "loss": 0.0675, "step": 84260 }, { "epoch": 0.96, "learning_rate": 4.037888560371694e-05, "loss": 0.3048, "step": 84280 }, { "epoch": 0.96, "learning_rate": 4.037660247263097e-05, "loss": 0.2968, "step": 84300 }, { "epoch": 0.96, "learning_rate": 4.0374319341545e-05, "loss": 0.1955, "step": 84320 }, { "epoch": 0.96, "learning_rate": 4.037203621045902e-05, "loss": 0.4292, "step": 84340 }, { "epoch": 0.96, "learning_rate": 4.036975307937305e-05, "loss": 0.2978, "step": 84360 }, { "epoch": 0.96, "learning_rate": 4.036746994828709e-05, "loss": 0.1905, "step": 84380 }, { "epoch": 0.96, "learning_rate": 4.036518681720111e-05, "loss": 0.1923, "step": 84400 }, { "epoch": 0.96, "learning_rate": 4.036290368611514e-05, "loss": 0.2757, "step": 84420 }, { "epoch": 0.96, "learning_rate": 4.036062055502917e-05, "loss": 0.1018, "step": 84440 }, { "epoch": 0.96, "learning_rate": 4.03583374239432e-05, "loss": 0.3615, "step": 84460 }, { "epoch": 0.96, "learning_rate": 4.035605429285723e-05, "loss": 0.1999, "step": 84480 }, { "epoch": 0.96, "learning_rate": 4.035377116177125e-05, "loss": 0.5237, "step": 84500 }, { "epoch": 0.96, "learning_rate": 4.035148803068529e-05, "loss": 0.2679, "step": 84520 }, { "epoch": 0.97, "learning_rate": 4.034920489959931e-05, "loss": 0.2024, "step": 84540 }, { "epoch": 0.97, "learning_rate": 4.034692176851334e-05, "loss": 0.1631, "step": 84560 }, { "epoch": 0.97, "learning_rate": 4.034463863742737e-05, "loss": 0.1585, "step": 84580 }, { "epoch": 0.97, "learning_rate": 4.03423555063414e-05, "loss": 0.4349, "step": 84600 }, { "epoch": 0.97, "learning_rate": 4.034007237525543e-05, "loss": 0.1425, "step": 84620 }, { "epoch": 0.97, "learning_rate": 4.033778924416945e-05, "loss": 0.2677, "step": 84640 }, { "epoch": 0.97, "learning_rate": 4.033550611308348e-05, "loss": 0.2547, "step": 84660 }, { "epoch": 0.97, "learning_rate": 4.033322298199751e-05, "loss": 0.2848, "step": 84680 }, { "epoch": 0.97, "learning_rate": 4.033093985091154e-05, "loss": 0.5362, "step": 84700 }, { "epoch": 0.97, "learning_rate": 4.032865671982557e-05, "loss": 0.2293, "step": 84720 }, { "epoch": 0.97, "learning_rate": 4.03263735887396e-05, "loss": 0.3308, "step": 84740 }, { "epoch": 0.97, "learning_rate": 4.032409045765363e-05, "loss": 0.1053, "step": 84760 }, { "epoch": 0.97, "learning_rate": 4.0321807326567654e-05, "loss": 0.157, "step": 84780 }, { "epoch": 0.97, "learning_rate": 4.0319524195481684e-05, "loss": 0.0946, "step": 84800 }, { "epoch": 0.97, "learning_rate": 4.031724106439571e-05, "loss": 0.2431, "step": 84820 }, { "epoch": 0.97, "learning_rate": 4.031495793330974e-05, "loss": 0.1371, "step": 84840 }, { "epoch": 0.97, "learning_rate": 4.031267480222377e-05, "loss": 0.4019, "step": 84860 }, { "epoch": 0.97, "learning_rate": 4.0310391671137796e-05, "loss": 0.2206, "step": 84880 }, { "epoch": 0.97, "learning_rate": 4.030810854005183e-05, "loss": 0.1445, "step": 84900 }, { "epoch": 0.97, "learning_rate": 4.0305825408965855e-05, "loss": 0.4343, "step": 84920 }, { "epoch": 0.97, "learning_rate": 4.0303542277879885e-05, "loss": 0.1082, "step": 84940 }, { "epoch": 0.97, "learning_rate": 4.0301259146793914e-05, "loss": 0.1919, "step": 84960 }, { "epoch": 0.97, "learning_rate": 4.0298976015707944e-05, "loss": 0.3277, "step": 84980 }, { "epoch": 0.97, "learning_rate": 4.0296692884621974e-05, "loss": 0.4986, "step": 85000 }, { "epoch": 0.97, "learning_rate": 4.0294409753536e-05, "loss": 0.3923, "step": 85020 }, { "epoch": 0.97, "learning_rate": 4.029212662245003e-05, "loss": 0.3519, "step": 85040 }, { "epoch": 0.97, "learning_rate": 4.028984349136406e-05, "loss": 0.0936, "step": 85060 }, { "epoch": 0.97, "learning_rate": 4.0287560360278086e-05, "loss": 0.2763, "step": 85080 }, { "epoch": 0.97, "learning_rate": 4.0285277229192115e-05, "loss": 0.258, "step": 85100 }, { "epoch": 0.97, "learning_rate": 4.0282994098106145e-05, "loss": 0.1738, "step": 85120 }, { "epoch": 0.97, "learning_rate": 4.0280710967020175e-05, "loss": 0.2269, "step": 85140 }, { "epoch": 0.97, "learning_rate": 4.0278427835934204e-05, "loss": 0.248, "step": 85160 }, { "epoch": 0.97, "learning_rate": 4.027614470484823e-05, "loss": 0.2219, "step": 85180 }, { "epoch": 0.97, "learning_rate": 4.0273861573762264e-05, "loss": 0.1274, "step": 85200 }, { "epoch": 0.97, "learning_rate": 4.027157844267629e-05, "loss": 0.3086, "step": 85220 }, { "epoch": 0.97, "learning_rate": 4.0269295311590316e-05, "loss": 0.4479, "step": 85240 }, { "epoch": 0.97, "learning_rate": 4.0267012180504346e-05, "loss": 0.154, "step": 85260 }, { "epoch": 0.97, "learning_rate": 4.0264729049418376e-05, "loss": 0.6866, "step": 85280 }, { "epoch": 0.97, "learning_rate": 4.0262445918332405e-05, "loss": 0.4508, "step": 85300 }, { "epoch": 0.97, "learning_rate": 4.026016278724643e-05, "loss": 0.145, "step": 85320 }, { "epoch": 0.97, "learning_rate": 4.025787965616046e-05, "loss": 0.2464, "step": 85340 }, { "epoch": 0.97, "learning_rate": 4.025559652507449e-05, "loss": 0.1312, "step": 85360 }, { "epoch": 0.97, "learning_rate": 4.025331339398852e-05, "loss": 0.2682, "step": 85380 }, { "epoch": 0.97, "learning_rate": 4.025103026290255e-05, "loss": 0.399, "step": 85400 }, { "epoch": 0.98, "learning_rate": 4.024874713181658e-05, "loss": 0.2648, "step": 85420 }, { "epoch": 0.98, "learning_rate": 4.0246464000730606e-05, "loss": 0.0638, "step": 85440 }, { "epoch": 0.98, "learning_rate": 4.024418086964463e-05, "loss": 0.1956, "step": 85460 }, { "epoch": 0.98, "learning_rate": 4.024189773855866e-05, "loss": 0.1079, "step": 85480 }, { "epoch": 0.98, "learning_rate": 4.0239614607472695e-05, "loss": 0.1295, "step": 85500 }, { "epoch": 0.98, "learning_rate": 4.023733147638672e-05, "loss": 0.1331, "step": 85520 }, { "epoch": 0.98, "learning_rate": 4.023504834530075e-05, "loss": 0.3516, "step": 85540 }, { "epoch": 0.98, "learning_rate": 4.023276521421477e-05, "loss": 0.193, "step": 85560 }, { "epoch": 0.98, "learning_rate": 4.023048208312881e-05, "loss": 0.124, "step": 85580 }, { "epoch": 0.98, "learning_rate": 4.022819895204284e-05, "loss": 0.2511, "step": 85600 }, { "epoch": 0.98, "learning_rate": 4.022591582095686e-05, "loss": 0.2322, "step": 85620 }, { "epoch": 0.98, "learning_rate": 4.022363268987089e-05, "loss": 0.2651, "step": 85640 }, { "epoch": 0.98, "learning_rate": 4.022134955878492e-05, "loss": 0.234, "step": 85660 }, { "epoch": 0.98, "learning_rate": 4.021906642769895e-05, "loss": 0.2573, "step": 85680 }, { "epoch": 0.98, "learning_rate": 4.021678329661298e-05, "loss": 0.223, "step": 85700 }, { "epoch": 0.98, "learning_rate": 4.021450016552701e-05, "loss": 0.3105, "step": 85720 }, { "epoch": 0.98, "learning_rate": 4.021221703444104e-05, "loss": 0.166, "step": 85740 }, { "epoch": 0.98, "learning_rate": 4.020993390335506e-05, "loss": 0.1401, "step": 85760 }, { "epoch": 0.98, "learning_rate": 4.020765077226909e-05, "loss": 0.1139, "step": 85780 }, { "epoch": 0.98, "learning_rate": 4.020536764118312e-05, "loss": 0.1601, "step": 85800 }, { "epoch": 0.98, "learning_rate": 4.020308451009715e-05, "loss": 0.4539, "step": 85820 }, { "epoch": 0.98, "learning_rate": 4.020080137901118e-05, "loss": 0.3272, "step": 85840 }, { "epoch": 0.98, "learning_rate": 4.01985182479252e-05, "loss": 0.2521, "step": 85860 }, { "epoch": 0.98, "learning_rate": 4.019623511683924e-05, "loss": 0.2236, "step": 85880 }, { "epoch": 0.98, "learning_rate": 4.019395198575326e-05, "loss": 0.2818, "step": 85900 }, { "epoch": 0.98, "learning_rate": 4.019166885466729e-05, "loss": 0.1245, "step": 85920 }, { "epoch": 0.98, "learning_rate": 4.018938572358132e-05, "loss": 0.2191, "step": 85940 }, { "epoch": 0.98, "learning_rate": 4.018710259249535e-05, "loss": 0.3281, "step": 85960 }, { "epoch": 0.98, "learning_rate": 4.018481946140938e-05, "loss": 0.1972, "step": 85980 }, { "epoch": 0.98, "learning_rate": 4.0182536330323404e-05, "loss": 0.1483, "step": 86000 }, { "epoch": 0.98, "learning_rate": 4.018025319923744e-05, "loss": 0.2328, "step": 86020 }, { "epoch": 0.98, "learning_rate": 4.017797006815146e-05, "loss": 0.1842, "step": 86040 }, { "epoch": 0.98, "learning_rate": 4.017568693706549e-05, "loss": 0.3058, "step": 86060 }, { "epoch": 0.98, "learning_rate": 4.017340380597952e-05, "loss": 0.2092, "step": 86080 }, { "epoch": 0.98, "learning_rate": 4.017112067489355e-05, "loss": 0.1706, "step": 86100 }, { "epoch": 0.98, "learning_rate": 4.016883754380758e-05, "loss": 0.1343, "step": 86120 }, { "epoch": 0.98, "learning_rate": 4.0166554412721605e-05, "loss": 0.3229, "step": 86140 }, { "epoch": 0.98, "learning_rate": 4.0164271281635634e-05, "loss": 0.1123, "step": 86160 }, { "epoch": 0.98, "learning_rate": 4.016198815054967e-05, "loss": 0.4704, "step": 86180 }, { "epoch": 0.98, "learning_rate": 4.0159705019463694e-05, "loss": 0.1541, "step": 86200 }, { "epoch": 0.98, "learning_rate": 4.015742188837772e-05, "loss": 0.4133, "step": 86220 }, { "epoch": 0.98, "learning_rate": 4.0155138757291746e-05, "loss": 0.2155, "step": 86240 }, { "epoch": 0.98, "learning_rate": 4.015285562620578e-05, "loss": 0.1929, "step": 86260 }, { "epoch": 0.98, "learning_rate": 4.015057249511981e-05, "loss": 0.136, "step": 86280 }, { "epoch": 0.99, "learning_rate": 4.0148289364033835e-05, "loss": 0.2006, "step": 86300 }, { "epoch": 0.99, "learning_rate": 4.014600623294787e-05, "loss": 0.4127, "step": 86320 }, { "epoch": 0.99, "learning_rate": 4.0143723101861895e-05, "loss": 0.264, "step": 86340 }, { "epoch": 0.99, "learning_rate": 4.0141439970775924e-05, "loss": 0.8569, "step": 86360 }, { "epoch": 0.99, "learning_rate": 4.0139156839689954e-05, "loss": 0.2044, "step": 86380 }, { "epoch": 0.99, "learning_rate": 4.0136873708603984e-05, "loss": 0.4603, "step": 86400 }, { "epoch": 0.99, "learning_rate": 4.0134590577518013e-05, "loss": 0.1914, "step": 86420 }, { "epoch": 0.99, "learning_rate": 4.0132307446432036e-05, "loss": 0.0606, "step": 86440 }, { "epoch": 0.99, "learning_rate": 4.0130024315346066e-05, "loss": 0.096, "step": 86460 }, { "epoch": 0.99, "learning_rate": 4.0127741184260096e-05, "loss": 0.471, "step": 86480 }, { "epoch": 0.99, "learning_rate": 4.0125458053174125e-05, "loss": 0.1763, "step": 86500 }, { "epoch": 0.99, "learning_rate": 4.0123174922088155e-05, "loss": 0.3706, "step": 86520 }, { "epoch": 0.99, "learning_rate": 4.012089179100218e-05, "loss": 0.2038, "step": 86540 }, { "epoch": 0.99, "learning_rate": 4.0118608659916214e-05, "loss": 0.2379, "step": 86560 }, { "epoch": 0.99, "learning_rate": 4.011632552883024e-05, "loss": 0.3336, "step": 86580 }, { "epoch": 0.99, "learning_rate": 4.011404239774427e-05, "loss": 0.1818, "step": 86600 }, { "epoch": 0.99, "learning_rate": 4.01117592666583e-05, "loss": 0.2912, "step": 86620 }, { "epoch": 0.99, "learning_rate": 4.0109476135572326e-05, "loss": 0.2099, "step": 86640 }, { "epoch": 0.99, "learning_rate": 4.0107193004486356e-05, "loss": 0.2911, "step": 86660 }, { "epoch": 0.99, "learning_rate": 4.010490987340038e-05, "loss": 0.1503, "step": 86680 }, { "epoch": 0.99, "learning_rate": 4.0102626742314415e-05, "loss": 0.2194, "step": 86700 }, { "epoch": 0.99, "learning_rate": 4.010034361122844e-05, "loss": 0.0787, "step": 86720 }, { "epoch": 0.99, "learning_rate": 4.009806048014247e-05, "loss": 0.074, "step": 86740 }, { "epoch": 0.99, "learning_rate": 4.00957773490565e-05, "loss": 0.364, "step": 86760 }, { "epoch": 0.99, "learning_rate": 4.009349421797053e-05, "loss": 0.0834, "step": 86780 }, { "epoch": 0.99, "learning_rate": 4.009121108688456e-05, "loss": 0.1364, "step": 86800 }, { "epoch": 0.99, "learning_rate": 4.008892795579858e-05, "loss": 0.2281, "step": 86820 }, { "epoch": 0.99, "learning_rate": 4.008664482471261e-05, "loss": 0.2441, "step": 86840 }, { "epoch": 0.99, "learning_rate": 4.0084361693626646e-05, "loss": 0.1742, "step": 86860 }, { "epoch": 0.99, "learning_rate": 4.008207856254067e-05, "loss": 0.3951, "step": 86880 }, { "epoch": 0.99, "learning_rate": 4.00797954314547e-05, "loss": 0.0994, "step": 86900 }, { "epoch": 0.99, "learning_rate": 4.007751230036873e-05, "loss": 0.2723, "step": 86920 }, { "epoch": 0.99, "learning_rate": 4.007522916928276e-05, "loss": 0.5346, "step": 86940 }, { "epoch": 0.99, "learning_rate": 4.007294603819679e-05, "loss": 0.1468, "step": 86960 }, { "epoch": 0.99, "learning_rate": 4.007066290711081e-05, "loss": 0.0494, "step": 86980 }, { "epoch": 0.99, "learning_rate": 4.006837977602485e-05, "loss": 0.0865, "step": 87000 }, { "epoch": 0.99, "learning_rate": 4.006609664493887e-05, "loss": 0.0978, "step": 87020 }, { "epoch": 0.99, "learning_rate": 4.00638135138529e-05, "loss": 0.2307, "step": 87040 }, { "epoch": 0.99, "learning_rate": 4.006153038276693e-05, "loss": 0.3513, "step": 87060 }, { "epoch": 0.99, "learning_rate": 4.005924725168096e-05, "loss": 0.2101, "step": 87080 }, { "epoch": 0.99, "learning_rate": 4.005696412059499e-05, "loss": 0.2372, "step": 87100 }, { "epoch": 0.99, "learning_rate": 4.005468098950901e-05, "loss": 0.0832, "step": 87120 }, { "epoch": 0.99, "learning_rate": 4.005239785842304e-05, "loss": 0.2047, "step": 87140 }, { "epoch": 0.99, "learning_rate": 4.005011472733707e-05, "loss": 0.3675, "step": 87160 }, { "epoch": 1.0, "learning_rate": 4.00478315962511e-05, "loss": 0.2379, "step": 87180 }, { "epoch": 1.0, "learning_rate": 4.004554846516513e-05, "loss": 0.366, "step": 87200 }, { "epoch": 1.0, "learning_rate": 4.004326533407916e-05, "loss": 0.115, "step": 87220 }, { "epoch": 1.0, "learning_rate": 4.004098220299319e-05, "loss": 0.2273, "step": 87240 }, { "epoch": 1.0, "learning_rate": 4.003869907190721e-05, "loss": 0.2124, "step": 87260 }, { "epoch": 1.0, "learning_rate": 4.003641594082124e-05, "loss": 0.2842, "step": 87280 }, { "epoch": 1.0, "learning_rate": 4.003413280973527e-05, "loss": 0.5749, "step": 87300 }, { "epoch": 1.0, "learning_rate": 4.00318496786493e-05, "loss": 0.2096, "step": 87320 }, { "epoch": 1.0, "learning_rate": 4.002956654756333e-05, "loss": 0.5021, "step": 87340 }, { "epoch": 1.0, "learning_rate": 4.0027283416477354e-05, "loss": 0.119, "step": 87360 }, { "epoch": 1.0, "learning_rate": 4.002500028539139e-05, "loss": 0.1169, "step": 87380 }, { "epoch": 1.0, "learning_rate": 4.0022717154305414e-05, "loss": 0.1874, "step": 87400 }, { "epoch": 1.0, "learning_rate": 4.002043402321944e-05, "loss": 0.146, "step": 87420 }, { "epoch": 1.0, "learning_rate": 4.001815089213347e-05, "loss": 0.1223, "step": 87440 }, { "epoch": 1.0, "learning_rate": 4.00158677610475e-05, "loss": 0.1743, "step": 87460 }, { "epoch": 1.0, "learning_rate": 4.001358462996153e-05, "loss": 0.1212, "step": 87480 }, { "epoch": 1.0, "learning_rate": 4.0011301498875555e-05, "loss": 0.1047, "step": 87500 }, { "epoch": 1.0, "learning_rate": 4.0009018367789585e-05, "loss": 0.126, "step": 87520 }, { "epoch": 1.0, "learning_rate": 4.000673523670362e-05, "loss": 0.3195, "step": 87540 }, { "epoch": 1.0, "learning_rate": 4.0004452105617644e-05, "loss": 0.2356, "step": 87560 }, { "epoch": 1.0, "learning_rate": 4.0002168974531674e-05, "loss": 0.2808, "step": 87580 }, { "epoch": 1.0, "eval_gen_len": 5.20321665089877, "eval_loss": 0.2915886640548706, "eval_rouge1": 87.5277, "eval_rouge2": 55.4401, "eval_rougeL": 87.5013, "eval_rougeLsum": 87.494, "eval_runtime": 8622.7413, "eval_samples_per_second": 1.226, "eval_steps_per_second": 1.226, "step": 87599 } ], "max_steps": 437995, "num_train_epochs": 5, "total_flos": 7.517139038927585e+17, "trial_name": null, "trial_params": null }