{ "best_metric": 0.013290228322148323, "best_model_checkpoint": "./save/eng-zho_all_facebook/wav2vec2-large-xlsr-53/checkpoint-33648", "epoch": 16.0, "global_step": 89728, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.9999108416547794e-05, "loss": 0.0532, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.99983059914408e-05, "loss": 0.0519, "step": 20 }, { "epoch": 0.01, "learning_rate": 4.999741440798859e-05, "loss": 0.0503, "step": 30 }, { "epoch": 0.01, "learning_rate": 4.999652282453638e-05, "loss": 0.0487, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.999563124108417e-05, "loss": 0.0444, "step": 50 }, { "epoch": 0.01, "learning_rate": 4.999473965763195e-05, "loss": 0.0416, "step": 60 }, { "epoch": 0.01, "learning_rate": 4.9993848074179744e-05, "loss": 0.0399, "step": 70 }, { "epoch": 0.01, "learning_rate": 4.9992956490727535e-05, "loss": 0.0377, "step": 80 }, { "epoch": 0.02, "learning_rate": 4.999206490727532e-05, "loss": 0.0387, "step": 90 }, { "epoch": 0.02, "learning_rate": 4.999117332382311e-05, "loss": 0.0367, "step": 100 }, { "epoch": 0.02, "learning_rate": 4.99902817403709e-05, "loss": 0.0375, "step": 110 }, { "epoch": 0.02, "learning_rate": 4.9989390156918693e-05, "loss": 0.0368, "step": 120 }, { "epoch": 0.02, "learning_rate": 4.998849857346648e-05, "loss": 0.0357, "step": 130 }, { "epoch": 0.02, "learning_rate": 4.998760699001427e-05, "loss": 0.0354, "step": 140 }, { "epoch": 0.03, "learning_rate": 4.998671540656206e-05, "loss": 0.0364, "step": 150 }, { "epoch": 0.03, "learning_rate": 4.9985823823109845e-05, "loss": 0.0365, "step": 160 }, { "epoch": 0.03, "learning_rate": 4.9984932239657636e-05, "loss": 0.0357, "step": 170 }, { "epoch": 0.03, "learning_rate": 4.998404065620542e-05, "loss": 0.0356, "step": 180 }, { "epoch": 0.03, "learning_rate": 4.998314907275321e-05, "loss": 0.0353, "step": 190 }, { "epoch": 0.04, "learning_rate": 4.9982257489300996e-05, "loss": 0.0358, "step": 200 }, { "epoch": 0.04, "learning_rate": 4.998136590584879e-05, "loss": 0.0363, "step": 210 }, { "epoch": 0.04, "learning_rate": 4.998047432239658e-05, "loss": 0.0348, "step": 220 }, { "epoch": 0.04, "learning_rate": 4.997958273894437e-05, "loss": 0.0364, "step": 230 }, { "epoch": 0.04, "learning_rate": 4.997869115549216e-05, "loss": 0.0382, "step": 240 }, { "epoch": 0.04, "learning_rate": 4.9977799572039945e-05, "loss": 0.0352, "step": 250 }, { "epoch": 0.05, "learning_rate": 4.9976907988587736e-05, "loss": 0.0348, "step": 260 }, { "epoch": 0.05, "learning_rate": 4.997601640513552e-05, "loss": 0.0357, "step": 270 }, { "epoch": 0.05, "learning_rate": 4.997512482168331e-05, "loss": 0.0326, "step": 280 }, { "epoch": 0.05, "learning_rate": 4.9974233238231097e-05, "loss": 0.033, "step": 290 }, { "epoch": 0.05, "learning_rate": 4.997334165477889e-05, "loss": 0.0346, "step": 300 }, { "epoch": 0.06, "learning_rate": 4.997245007132668e-05, "loss": 0.0361, "step": 310 }, { "epoch": 0.06, "learning_rate": 4.9971558487874463e-05, "loss": 0.0346, "step": 320 }, { "epoch": 0.06, "learning_rate": 4.997066690442226e-05, "loss": 0.035, "step": 330 }, { "epoch": 0.06, "learning_rate": 4.9969775320970046e-05, "loss": 0.0337, "step": 340 }, { "epoch": 0.06, "learning_rate": 4.996888373751784e-05, "loss": 0.0336, "step": 350 }, { "epoch": 0.06, "learning_rate": 4.996799215406562e-05, "loss": 0.0334, "step": 360 }, { "epoch": 0.07, "learning_rate": 4.996710057061341e-05, "loss": 0.0306, "step": 370 }, { "epoch": 0.07, "learning_rate": 4.99662089871612e-05, "loss": 0.0319, "step": 380 }, { "epoch": 0.07, "learning_rate": 4.996531740370899e-05, "loss": 0.0348, "step": 390 }, { "epoch": 0.07, "learning_rate": 4.996442582025678e-05, "loss": 0.0325, "step": 400 }, { "epoch": 0.07, "learning_rate": 4.9963534236804564e-05, "loss": 0.034, "step": 410 }, { "epoch": 0.07, "learning_rate": 4.9962642653352355e-05, "loss": 0.031, "step": 420 }, { "epoch": 0.08, "learning_rate": 4.996175106990014e-05, "loss": 0.032, "step": 430 }, { "epoch": 0.08, "learning_rate": 4.996085948644794e-05, "loss": 0.0342, "step": 440 }, { "epoch": 0.08, "learning_rate": 4.995996790299572e-05, "loss": 0.0336, "step": 450 }, { "epoch": 0.08, "learning_rate": 4.995907631954351e-05, "loss": 0.0344, "step": 460 }, { "epoch": 0.08, "learning_rate": 4.9958184736091304e-05, "loss": 0.0319, "step": 470 }, { "epoch": 0.09, "learning_rate": 4.995729315263909e-05, "loss": 0.0336, "step": 480 }, { "epoch": 0.09, "learning_rate": 4.995640156918688e-05, "loss": 0.0318, "step": 490 }, { "epoch": 0.09, "learning_rate": 4.9955509985734665e-05, "loss": 0.0305, "step": 500 }, { "epoch": 0.09, "learning_rate": 4.9954618402282456e-05, "loss": 0.0311, "step": 510 }, { "epoch": 0.09, "learning_rate": 4.995372681883024e-05, "loss": 0.0298, "step": 520 }, { "epoch": 0.09, "learning_rate": 4.995283523537803e-05, "loss": 0.0319, "step": 530 }, { "epoch": 0.1, "learning_rate": 4.995194365192582e-05, "loss": 0.0303, "step": 540 }, { "epoch": 0.1, "learning_rate": 4.9951052068473614e-05, "loss": 0.029, "step": 550 }, { "epoch": 0.1, "learning_rate": 4.9950160485021405e-05, "loss": 0.0316, "step": 560 }, { "epoch": 0.1, "learning_rate": 4.994926890156919e-05, "loss": 0.0304, "step": 570 }, { "epoch": 0.1, "learning_rate": 4.99484664764622e-05, "loss": 0.0288, "step": 580 }, { "epoch": 0.11, "learning_rate": 4.994757489300999e-05, "loss": 0.0308, "step": 590 }, { "epoch": 0.11, "learning_rate": 4.994668330955778e-05, "loss": 0.0319, "step": 600 }, { "epoch": 0.11, "learning_rate": 4.9945791726105564e-05, "loss": 0.0304, "step": 610 }, { "epoch": 0.11, "learning_rate": 4.9944900142653356e-05, "loss": 0.0311, "step": 620 }, { "epoch": 0.11, "learning_rate": 4.994400855920115e-05, "loss": 0.0295, "step": 630 }, { "epoch": 0.11, "learning_rate": 4.994311697574893e-05, "loss": 0.0306, "step": 640 }, { "epoch": 0.12, "learning_rate": 4.994222539229672e-05, "loss": 0.0304, "step": 650 }, { "epoch": 0.12, "learning_rate": 4.994133380884451e-05, "loss": 0.029, "step": 660 }, { "epoch": 0.12, "learning_rate": 4.99404422253923e-05, "loss": 0.0296, "step": 670 }, { "epoch": 0.12, "learning_rate": 4.993955064194009e-05, "loss": 0.0297, "step": 680 }, { "epoch": 0.12, "learning_rate": 4.9938659058487874e-05, "loss": 0.0288, "step": 690 }, { "epoch": 0.12, "learning_rate": 4.993776747503567e-05, "loss": 0.0314, "step": 700 }, { "epoch": 0.13, "learning_rate": 4.9936875891583456e-05, "loss": 0.0295, "step": 710 }, { "epoch": 0.13, "learning_rate": 4.993598430813125e-05, "loss": 0.0305, "step": 720 }, { "epoch": 0.13, "learning_rate": 4.993509272467903e-05, "loss": 0.03, "step": 730 }, { "epoch": 0.13, "learning_rate": 4.993420114122682e-05, "loss": 0.0304, "step": 740 }, { "epoch": 0.13, "learning_rate": 4.993330955777461e-05, "loss": 0.0304, "step": 750 }, { "epoch": 0.14, "learning_rate": 4.99324179743224e-05, "loss": 0.0309, "step": 760 }, { "epoch": 0.14, "learning_rate": 4.993152639087019e-05, "loss": 0.0308, "step": 770 }, { "epoch": 0.14, "learning_rate": 4.9930634807417974e-05, "loss": 0.0286, "step": 780 }, { "epoch": 0.14, "learning_rate": 4.9929743223965765e-05, "loss": 0.0282, "step": 790 }, { "epoch": 0.14, "learning_rate": 4.992885164051355e-05, "loss": 0.0302, "step": 800 }, { "epoch": 0.14, "learning_rate": 4.992796005706135e-05, "loss": 0.028, "step": 810 }, { "epoch": 0.15, "learning_rate": 4.992706847360913e-05, "loss": 0.0278, "step": 820 }, { "epoch": 0.15, "learning_rate": 4.9926176890156923e-05, "loss": 0.0295, "step": 830 }, { "epoch": 0.15, "learning_rate": 4.992528530670471e-05, "loss": 0.0284, "step": 840 }, { "epoch": 0.15, "learning_rate": 4.99243937232525e-05, "loss": 0.0299, "step": 850 }, { "epoch": 0.15, "learning_rate": 4.992350213980029e-05, "loss": 0.0278, "step": 860 }, { "epoch": 0.16, "learning_rate": 4.9922610556348075e-05, "loss": 0.031, "step": 870 }, { "epoch": 0.16, "learning_rate": 4.9921718972895866e-05, "loss": 0.0291, "step": 880 }, { "epoch": 0.16, "learning_rate": 4.992082738944365e-05, "loss": 0.03, "step": 890 }, { "epoch": 0.16, "learning_rate": 4.991993580599144e-05, "loss": 0.0304, "step": 900 }, { "epoch": 0.16, "learning_rate": 4.991904422253923e-05, "loss": 0.0321, "step": 910 }, { "epoch": 0.16, "learning_rate": 4.9918152639087024e-05, "loss": 0.0261, "step": 920 }, { "epoch": 0.17, "learning_rate": 4.9917261055634815e-05, "loss": 0.0299, "step": 930 }, { "epoch": 0.17, "learning_rate": 4.99163694721826e-05, "loss": 0.0282, "step": 940 }, { "epoch": 0.17, "learning_rate": 4.991547788873039e-05, "loss": 0.0294, "step": 950 }, { "epoch": 0.17, "learning_rate": 4.9914586305278175e-05, "loss": 0.0284, "step": 960 }, { "epoch": 0.17, "learning_rate": 4.9913694721825967e-05, "loss": 0.027, "step": 970 }, { "epoch": 0.17, "learning_rate": 4.991280313837375e-05, "loss": 0.0277, "step": 980 }, { "epoch": 0.18, "learning_rate": 4.991191155492154e-05, "loss": 0.0279, "step": 990 }, { "epoch": 0.18, "learning_rate": 4.9911019971469333e-05, "loss": 0.0268, "step": 1000 }, { "epoch": 0.18, "learning_rate": 4.991012838801712e-05, "loss": 0.0286, "step": 1010 }, { "epoch": 0.18, "learning_rate": 4.990923680456491e-05, "loss": 0.0254, "step": 1020 }, { "epoch": 0.18, "learning_rate": 4.99083452211127e-05, "loss": 0.0276, "step": 1030 }, { "epoch": 0.19, "learning_rate": 4.990745363766049e-05, "loss": 0.0305, "step": 1040 }, { "epoch": 0.19, "learning_rate": 4.9906562054208276e-05, "loss": 0.0284, "step": 1050 }, { "epoch": 0.19, "learning_rate": 4.990567047075607e-05, "loss": 0.0271, "step": 1060 }, { "epoch": 0.19, "learning_rate": 4.990477888730385e-05, "loss": 0.0284, "step": 1070 }, { "epoch": 0.19, "learning_rate": 4.990388730385164e-05, "loss": 0.026, "step": 1080 }, { "epoch": 0.19, "learning_rate": 4.9902995720399434e-05, "loss": 0.0278, "step": 1090 }, { "epoch": 0.2, "learning_rate": 4.990210413694722e-05, "loss": 0.0264, "step": 1100 }, { "epoch": 0.2, "learning_rate": 4.990121255349501e-05, "loss": 0.0254, "step": 1110 }, { "epoch": 0.2, "learning_rate": 4.9900320970042794e-05, "loss": 0.0269, "step": 1120 }, { "epoch": 0.2, "learning_rate": 4.9899429386590585e-05, "loss": 0.0271, "step": 1130 }, { "epoch": 0.2, "learning_rate": 4.9898537803138376e-05, "loss": 0.028, "step": 1140 }, { "epoch": 0.21, "learning_rate": 4.989764621968617e-05, "loss": 0.0265, "step": 1150 }, { "epoch": 0.21, "learning_rate": 4.989675463623396e-05, "loss": 0.0294, "step": 1160 }, { "epoch": 0.21, "learning_rate": 4.989586305278174e-05, "loss": 0.0268, "step": 1170 }, { "epoch": 0.21, "learning_rate": 4.9894971469329535e-05, "loss": 0.0254, "step": 1180 }, { "epoch": 0.21, "learning_rate": 4.989407988587732e-05, "loss": 0.0262, "step": 1190 }, { "epoch": 0.21, "learning_rate": 4.989318830242511e-05, "loss": 0.026, "step": 1200 }, { "epoch": 0.22, "learning_rate": 4.9892296718972895e-05, "loss": 0.0258, "step": 1210 }, { "epoch": 0.22, "learning_rate": 4.9891405135520686e-05, "loss": 0.0255, "step": 1220 }, { "epoch": 0.22, "learning_rate": 4.989051355206848e-05, "loss": 0.0247, "step": 1230 }, { "epoch": 0.22, "learning_rate": 4.988962196861626e-05, "loss": 0.0253, "step": 1240 }, { "epoch": 0.22, "learning_rate": 4.988873038516406e-05, "loss": 0.0233, "step": 1250 }, { "epoch": 0.22, "learning_rate": 4.9887838801711844e-05, "loss": 0.0263, "step": 1260 }, { "epoch": 0.23, "learning_rate": 4.9886947218259635e-05, "loss": 0.0247, "step": 1270 }, { "epoch": 0.23, "learning_rate": 4.988605563480742e-05, "loss": 0.0251, "step": 1280 }, { "epoch": 0.23, "learning_rate": 4.988516405135521e-05, "loss": 0.0232, "step": 1290 }, { "epoch": 0.23, "learning_rate": 4.9884272467902995e-05, "loss": 0.0223, "step": 1300 }, { "epoch": 0.23, "learning_rate": 4.9883380884450786e-05, "loss": 0.0246, "step": 1310 }, { "epoch": 0.24, "learning_rate": 4.988248930099858e-05, "loss": 0.0236, "step": 1320 }, { "epoch": 0.24, "learning_rate": 4.988159771754636e-05, "loss": 0.0258, "step": 1330 }, { "epoch": 0.24, "learning_rate": 4.988070613409415e-05, "loss": 0.0274, "step": 1340 }, { "epoch": 0.24, "learning_rate": 4.987981455064194e-05, "loss": 0.0263, "step": 1350 }, { "epoch": 0.24, "learning_rate": 4.9878922967189736e-05, "loss": 0.0288, "step": 1360 }, { "epoch": 0.24, "learning_rate": 4.987803138373752e-05, "loss": 0.0252, "step": 1370 }, { "epoch": 0.25, "learning_rate": 4.987713980028531e-05, "loss": 0.0239, "step": 1380 }, { "epoch": 0.25, "learning_rate": 4.987633737517832e-05, "loss": 0.0237, "step": 1390 }, { "epoch": 0.25, "learning_rate": 4.9875445791726104e-05, "loss": 0.0237, "step": 1400 }, { "epoch": 0.25, "learning_rate": 4.98745542082739e-05, "loss": 0.0213, "step": 1410 }, { "epoch": 0.25, "learning_rate": 4.9873662624821686e-05, "loss": 0.0233, "step": 1420 }, { "epoch": 0.25, "learning_rate": 4.987277104136948e-05, "loss": 0.022, "step": 1430 }, { "epoch": 0.26, "learning_rate": 4.987187945791726e-05, "loss": 0.0267, "step": 1440 }, { "epoch": 0.26, "learning_rate": 4.987098787446505e-05, "loss": 0.0261, "step": 1450 }, { "epoch": 0.26, "learning_rate": 4.9870096291012844e-05, "loss": 0.0221, "step": 1460 }, { "epoch": 0.26, "learning_rate": 4.986920470756063e-05, "loss": 0.022, "step": 1470 }, { "epoch": 0.26, "learning_rate": 4.986831312410842e-05, "loss": 0.0238, "step": 1480 }, { "epoch": 0.27, "learning_rate": 4.9867421540656204e-05, "loss": 0.0249, "step": 1490 }, { "epoch": 0.27, "learning_rate": 4.9866529957203996e-05, "loss": 0.0212, "step": 1500 }, { "epoch": 0.27, "learning_rate": 4.986563837375178e-05, "loss": 0.0247, "step": 1510 }, { "epoch": 0.27, "learning_rate": 4.986474679029958e-05, "loss": 0.0238, "step": 1520 }, { "epoch": 0.27, "learning_rate": 4.986385520684736e-05, "loss": 0.0246, "step": 1530 }, { "epoch": 0.27, "learning_rate": 4.9862963623395154e-05, "loss": 0.0218, "step": 1540 }, { "epoch": 0.28, "learning_rate": 4.9862072039942945e-05, "loss": 0.0245, "step": 1550 }, { "epoch": 0.28, "learning_rate": 4.986118045649073e-05, "loss": 0.0206, "step": 1560 }, { "epoch": 0.28, "learning_rate": 4.986028887303852e-05, "loss": 0.0203, "step": 1570 }, { "epoch": 0.28, "learning_rate": 4.9859397289586305e-05, "loss": 0.0244, "step": 1580 }, { "epoch": 0.28, "learning_rate": 4.9858505706134096e-05, "loss": 0.0266, "step": 1590 }, { "epoch": 0.29, "learning_rate": 4.985761412268188e-05, "loss": 0.0223, "step": 1600 }, { "epoch": 0.29, "learning_rate": 4.985672253922967e-05, "loss": 0.0224, "step": 1610 }, { "epoch": 0.29, "learning_rate": 4.985583095577746e-05, "loss": 0.0221, "step": 1620 }, { "epoch": 0.29, "learning_rate": 4.9854939372325254e-05, "loss": 0.0222, "step": 1630 }, { "epoch": 0.29, "learning_rate": 4.9854047788873045e-05, "loss": 0.0224, "step": 1640 }, { "epoch": 0.29, "learning_rate": 4.985315620542083e-05, "loss": 0.0231, "step": 1650 }, { "epoch": 0.3, "learning_rate": 4.985226462196862e-05, "loss": 0.0207, "step": 1660 }, { "epoch": 0.3, "learning_rate": 4.9851373038516405e-05, "loss": 0.0251, "step": 1670 }, { "epoch": 0.3, "learning_rate": 4.98504814550642e-05, "loss": 0.0187, "step": 1680 }, { "epoch": 0.3, "learning_rate": 4.984958987161199e-05, "loss": 0.021, "step": 1690 }, { "epoch": 0.3, "learning_rate": 4.984869828815977e-05, "loss": 0.0174, "step": 1700 }, { "epoch": 0.3, "learning_rate": 4.9847806704707564e-05, "loss": 0.0176, "step": 1710 }, { "epoch": 0.31, "learning_rate": 4.984691512125535e-05, "loss": 0.018, "step": 1720 }, { "epoch": 0.31, "learning_rate": 4.984602353780314e-05, "loss": 0.0188, "step": 1730 }, { "epoch": 0.31, "learning_rate": 4.984513195435093e-05, "loss": 0.0219, "step": 1740 }, { "epoch": 0.31, "learning_rate": 4.984424037089872e-05, "loss": 0.0235, "step": 1750 }, { "epoch": 0.31, "learning_rate": 4.9843348787446506e-05, "loss": 0.0196, "step": 1760 }, { "epoch": 0.32, "learning_rate": 4.98424572039943e-05, "loss": 0.0198, "step": 1770 }, { "epoch": 0.32, "learning_rate": 4.984156562054209e-05, "loss": 0.0198, "step": 1780 }, { "epoch": 0.32, "learning_rate": 4.984067403708987e-05, "loss": 0.0238, "step": 1790 }, { "epoch": 0.32, "learning_rate": 4.9839782453637664e-05, "loss": 0.0186, "step": 1800 }, { "epoch": 0.32, "learning_rate": 4.983889087018545e-05, "loss": 0.0227, "step": 1810 }, { "epoch": 0.32, "learning_rate": 4.983799928673324e-05, "loss": 0.0214, "step": 1820 }, { "epoch": 0.33, "learning_rate": 4.9837107703281024e-05, "loss": 0.0211, "step": 1830 }, { "epoch": 0.33, "learning_rate": 4.9836216119828815e-05, "loss": 0.0204, "step": 1840 }, { "epoch": 0.33, "learning_rate": 4.9835324536376607e-05, "loss": 0.0187, "step": 1850 }, { "epoch": 0.33, "learning_rate": 4.98344329529244e-05, "loss": 0.0191, "step": 1860 }, { "epoch": 0.33, "learning_rate": 4.983354136947219e-05, "loss": 0.023, "step": 1870 }, { "epoch": 0.34, "learning_rate": 4.9832649786019973e-05, "loss": 0.0211, "step": 1880 }, { "epoch": 0.34, "learning_rate": 4.9831758202567765e-05, "loss": 0.0203, "step": 1890 }, { "epoch": 0.34, "learning_rate": 4.983086661911555e-05, "loss": 0.0208, "step": 1900 }, { "epoch": 0.34, "learning_rate": 4.982997503566334e-05, "loss": 0.0239, "step": 1910 }, { "epoch": 0.34, "learning_rate": 4.982908345221113e-05, "loss": 0.019, "step": 1920 }, { "epoch": 0.34, "learning_rate": 4.9828191868758916e-05, "loss": 0.0172, "step": 1930 }, { "epoch": 0.35, "learning_rate": 4.982730028530671e-05, "loss": 0.0186, "step": 1940 }, { "epoch": 0.35, "learning_rate": 4.982640870185449e-05, "loss": 0.02, "step": 1950 }, { "epoch": 0.35, "learning_rate": 4.982551711840229e-05, "loss": 0.0175, "step": 1960 }, { "epoch": 0.35, "learning_rate": 4.9824625534950074e-05, "loss": 0.0225, "step": 1970 }, { "epoch": 0.35, "learning_rate": 4.9823733951497865e-05, "loss": 0.0192, "step": 1980 }, { "epoch": 0.35, "learning_rate": 4.982284236804565e-05, "loss": 0.0201, "step": 1990 }, { "epoch": 0.36, "learning_rate": 4.982195078459344e-05, "loss": 0.0191, "step": 2000 }, { "epoch": 0.36, "learning_rate": 4.982105920114123e-05, "loss": 0.0214, "step": 2010 }, { "epoch": 0.36, "learning_rate": 4.9820167617689016e-05, "loss": 0.0173, "step": 2020 }, { "epoch": 0.36, "learning_rate": 4.981927603423681e-05, "loss": 0.0206, "step": 2030 }, { "epoch": 0.36, "learning_rate": 4.981838445078459e-05, "loss": 0.0163, "step": 2040 }, { "epoch": 0.37, "learning_rate": 4.981749286733238e-05, "loss": 0.0187, "step": 2050 }, { "epoch": 0.37, "learning_rate": 4.981660128388017e-05, "loss": 0.0213, "step": 2060 }, { "epoch": 0.37, "learning_rate": 4.9815709700427966e-05, "loss": 0.0226, "step": 2070 }, { "epoch": 0.37, "learning_rate": 4.981481811697575e-05, "loss": 0.0177, "step": 2080 }, { "epoch": 0.37, "learning_rate": 4.981392653352354e-05, "loss": 0.0194, "step": 2090 }, { "epoch": 0.37, "learning_rate": 4.981303495007133e-05, "loss": 0.0164, "step": 2100 }, { "epoch": 0.38, "learning_rate": 4.981214336661912e-05, "loss": 0.0146, "step": 2110 }, { "epoch": 0.38, "learning_rate": 4.981125178316691e-05, "loss": 0.0201, "step": 2120 }, { "epoch": 0.38, "learning_rate": 4.981036019971469e-05, "loss": 0.0179, "step": 2130 }, { "epoch": 0.38, "learning_rate": 4.9809468616262484e-05, "loss": 0.0188, "step": 2140 }, { "epoch": 0.38, "learning_rate": 4.9808577032810275e-05, "loss": 0.0202, "step": 2150 }, { "epoch": 0.39, "learning_rate": 4.980768544935806e-05, "loss": 0.02, "step": 2160 }, { "epoch": 0.39, "learning_rate": 4.980679386590585e-05, "loss": 0.0163, "step": 2170 }, { "epoch": 0.39, "learning_rate": 4.980590228245364e-05, "loss": 0.0235, "step": 2180 }, { "epoch": 0.39, "learning_rate": 4.980501069900143e-05, "loss": 0.0242, "step": 2190 }, { "epoch": 0.39, "learning_rate": 4.980411911554922e-05, "loss": 0.0209, "step": 2200 }, { "epoch": 0.39, "learning_rate": 4.980322753209701e-05, "loss": 0.0191, "step": 2210 }, { "epoch": 0.4, "learning_rate": 4.980233594864479e-05, "loss": 0.0166, "step": 2220 }, { "epoch": 0.4, "learning_rate": 4.9801444365192584e-05, "loss": 0.0177, "step": 2230 }, { "epoch": 0.4, "learning_rate": 4.9800552781740376e-05, "loss": 0.0183, "step": 2240 }, { "epoch": 0.4, "learning_rate": 4.979966119828816e-05, "loss": 0.0189, "step": 2250 }, { "epoch": 0.4, "learning_rate": 4.979876961483595e-05, "loss": 0.0189, "step": 2260 }, { "epoch": 0.4, "learning_rate": 4.9797878031383736e-05, "loss": 0.0162, "step": 2270 }, { "epoch": 0.41, "learning_rate": 4.979698644793153e-05, "loss": 0.0158, "step": 2280 }, { "epoch": 0.41, "learning_rate": 4.979609486447932e-05, "loss": 0.0193, "step": 2290 }, { "epoch": 0.41, "learning_rate": 4.979520328102711e-05, "loss": 0.0189, "step": 2300 }, { "epoch": 0.41, "learning_rate": 4.9794311697574894e-05, "loss": 0.0204, "step": 2310 }, { "epoch": 0.41, "learning_rate": 4.9793420114122685e-05, "loss": 0.0235, "step": 2320 }, { "epoch": 0.42, "learning_rate": 4.9792528530670476e-05, "loss": 0.0163, "step": 2330 }, { "epoch": 0.42, "learning_rate": 4.979163694721826e-05, "loss": 0.0226, "step": 2340 }, { "epoch": 0.42, "learning_rate": 4.979074536376605e-05, "loss": 0.0173, "step": 2350 }, { "epoch": 0.42, "learning_rate": 4.9789853780313836e-05, "loss": 0.017, "step": 2360 }, { "epoch": 0.42, "learning_rate": 4.978896219686163e-05, "loss": 0.0152, "step": 2370 }, { "epoch": 0.42, "learning_rate": 4.978807061340942e-05, "loss": 0.0174, "step": 2380 }, { "epoch": 0.43, "learning_rate": 4.97871790299572e-05, "loss": 0.0205, "step": 2390 }, { "epoch": 0.43, "learning_rate": 4.9786287446505e-05, "loss": 0.0198, "step": 2400 }, { "epoch": 0.43, "learning_rate": 4.9785395863052786e-05, "loss": 0.0215, "step": 2410 }, { "epoch": 0.43, "learning_rate": 4.978450427960058e-05, "loss": 0.0188, "step": 2420 }, { "epoch": 0.43, "learning_rate": 4.978361269614836e-05, "loss": 0.0178, "step": 2430 }, { "epoch": 0.44, "learning_rate": 4.978272111269615e-05, "loss": 0.014, "step": 2440 }, { "epoch": 0.44, "learning_rate": 4.978182952924394e-05, "loss": 0.0164, "step": 2450 }, { "epoch": 0.44, "learning_rate": 4.978093794579173e-05, "loss": 0.0179, "step": 2460 }, { "epoch": 0.44, "learning_rate": 4.978004636233952e-05, "loss": 0.0161, "step": 2470 }, { "epoch": 0.44, "learning_rate": 4.9779154778887304e-05, "loss": 0.0149, "step": 2480 }, { "epoch": 0.44, "learning_rate": 4.9778263195435095e-05, "loss": 0.0161, "step": 2490 }, { "epoch": 0.45, "learning_rate": 4.977737161198288e-05, "loss": 0.018, "step": 2500 }, { "epoch": 0.45, "learning_rate": 4.977648002853068e-05, "loss": 0.017, "step": 2510 }, { "epoch": 0.45, "learning_rate": 4.977558844507846e-05, "loss": 0.0163, "step": 2520 }, { "epoch": 0.45, "learning_rate": 4.977469686162625e-05, "loss": 0.0166, "step": 2530 }, { "epoch": 0.45, "learning_rate": 4.977380527817404e-05, "loss": 0.0172, "step": 2540 }, { "epoch": 0.45, "learning_rate": 4.977291369472183e-05, "loss": 0.0186, "step": 2550 }, { "epoch": 0.46, "learning_rate": 4.977202211126962e-05, "loss": 0.0154, "step": 2560 }, { "epoch": 0.46, "learning_rate": 4.9771130527817404e-05, "loss": 0.017, "step": 2570 }, { "epoch": 0.46, "learning_rate": 4.9770238944365195e-05, "loss": 0.0179, "step": 2580 }, { "epoch": 0.46, "learning_rate": 4.976934736091298e-05, "loss": 0.017, "step": 2590 }, { "epoch": 0.46, "learning_rate": 4.976845577746077e-05, "loss": 0.0162, "step": 2600 }, { "epoch": 0.47, "learning_rate": 4.976756419400856e-05, "loss": 0.0186, "step": 2610 }, { "epoch": 0.47, "learning_rate": 4.9766672610556354e-05, "loss": 0.018, "step": 2620 }, { "epoch": 0.47, "learning_rate": 4.976578102710414e-05, "loss": 0.0189, "step": 2630 }, { "epoch": 0.47, "learning_rate": 4.976488944365193e-05, "loss": 0.018, "step": 2640 }, { "epoch": 0.47, "learning_rate": 4.976399786019972e-05, "loss": 0.0173, "step": 2650 }, { "epoch": 0.47, "learning_rate": 4.9763106276747505e-05, "loss": 0.02, "step": 2660 }, { "epoch": 0.48, "learning_rate": 4.9762214693295296e-05, "loss": 0.0182, "step": 2670 }, { "epoch": 0.48, "learning_rate": 4.976132310984308e-05, "loss": 0.0173, "step": 2680 }, { "epoch": 0.48, "learning_rate": 4.976043152639087e-05, "loss": 0.0142, "step": 2690 }, { "epoch": 0.48, "learning_rate": 4.975953994293866e-05, "loss": 0.0185, "step": 2700 }, { "epoch": 0.48, "learning_rate": 4.975864835948645e-05, "loss": 0.0185, "step": 2710 }, { "epoch": 0.49, "learning_rate": 4.975775677603424e-05, "loss": 0.0169, "step": 2720 }, { "epoch": 0.49, "learning_rate": 4.975686519258203e-05, "loss": 0.0177, "step": 2730 }, { "epoch": 0.49, "learning_rate": 4.975597360912982e-05, "loss": 0.0176, "step": 2740 }, { "epoch": 0.49, "learning_rate": 4.9755082025677605e-05, "loss": 0.0137, "step": 2750 }, { "epoch": 0.49, "learning_rate": 4.9754190442225397e-05, "loss": 0.0132, "step": 2760 }, { "epoch": 0.49, "learning_rate": 4.975329885877318e-05, "loss": 0.0173, "step": 2770 }, { "epoch": 0.5, "learning_rate": 4.975240727532097e-05, "loss": 0.0158, "step": 2780 }, { "epoch": 0.5, "learning_rate": 4.9751515691868763e-05, "loss": 0.0133, "step": 2790 }, { "epoch": 0.5, "learning_rate": 4.975062410841655e-05, "loss": 0.0187, "step": 2800 }, { "epoch": 0.5, "learning_rate": 4.974973252496434e-05, "loss": 0.0148, "step": 2810 }, { "epoch": 0.5, "learning_rate": 4.9748840941512124e-05, "loss": 0.0156, "step": 2820 }, { "epoch": 0.5, "learning_rate": 4.9747949358059915e-05, "loss": 0.0163, "step": 2830 }, { "epoch": 0.51, "learning_rate": 4.9747057774607706e-05, "loss": 0.0168, "step": 2840 }, { "epoch": 0.51, "learning_rate": 4.97461661911555e-05, "loss": 0.0172, "step": 2850 }, { "epoch": 0.51, "learning_rate": 4.974527460770328e-05, "loss": 0.0149, "step": 2860 }, { "epoch": 0.51, "learning_rate": 4.974438302425107e-05, "loss": 0.0156, "step": 2870 }, { "epoch": 0.51, "learning_rate": 4.9743491440798864e-05, "loss": 0.0164, "step": 2880 }, { "epoch": 0.52, "learning_rate": 4.974259985734665e-05, "loss": 0.0162, "step": 2890 }, { "epoch": 0.52, "learning_rate": 4.974170827389444e-05, "loss": 0.0158, "step": 2900 }, { "epoch": 0.52, "learning_rate": 4.9740816690442224e-05, "loss": 0.0137, "step": 2910 }, { "epoch": 0.52, "learning_rate": 4.9739925106990015e-05, "loss": 0.0155, "step": 2920 }, { "epoch": 0.52, "learning_rate": 4.9739033523537807e-05, "loss": 0.0193, "step": 2930 }, { "epoch": 0.52, "learning_rate": 4.973814194008559e-05, "loss": 0.0168, "step": 2940 }, { "epoch": 0.53, "learning_rate": 4.973725035663339e-05, "loss": 0.0152, "step": 2950 }, { "epoch": 0.53, "learning_rate": 4.973635877318117e-05, "loss": 0.0156, "step": 2960 }, { "epoch": 0.53, "learning_rate": 4.9735467189728965e-05, "loss": 0.0135, "step": 2970 }, { "epoch": 0.53, "learning_rate": 4.973457560627675e-05, "loss": 0.0167, "step": 2980 }, { "epoch": 0.53, "learning_rate": 4.973368402282454e-05, "loss": 0.0126, "step": 2990 }, { "epoch": 0.53, "learning_rate": 4.9732792439372325e-05, "loss": 0.017, "step": 3000 }, { "epoch": 0.54, "learning_rate": 4.9731900855920116e-05, "loss": 0.0185, "step": 3010 }, { "epoch": 0.54, "learning_rate": 4.973100927246791e-05, "loss": 0.0164, "step": 3020 }, { "epoch": 0.54, "learning_rate": 4.973011768901569e-05, "loss": 0.0176, "step": 3030 }, { "epoch": 0.54, "learning_rate": 4.972922610556348e-05, "loss": 0.0198, "step": 3040 }, { "epoch": 0.54, "learning_rate": 4.972833452211127e-05, "loss": 0.0172, "step": 3050 }, { "epoch": 0.55, "learning_rate": 4.9727442938659065e-05, "loss": 0.0188, "step": 3060 }, { "epoch": 0.55, "learning_rate": 4.972655135520685e-05, "loss": 0.0162, "step": 3070 }, { "epoch": 0.55, "learning_rate": 4.972565977175464e-05, "loss": 0.0147, "step": 3080 }, { "epoch": 0.55, "learning_rate": 4.9724768188302425e-05, "loss": 0.0125, "step": 3090 }, { "epoch": 0.55, "learning_rate": 4.9723876604850216e-05, "loss": 0.0138, "step": 3100 }, { "epoch": 0.55, "learning_rate": 4.972298502139801e-05, "loss": 0.0108, "step": 3110 }, { "epoch": 0.56, "learning_rate": 4.972209343794579e-05, "loss": 0.0173, "step": 3120 }, { "epoch": 0.56, "learning_rate": 4.972120185449358e-05, "loss": 0.0148, "step": 3130 }, { "epoch": 0.56, "learning_rate": 4.972031027104137e-05, "loss": 0.0176, "step": 3140 }, { "epoch": 0.56, "learning_rate": 4.971941868758916e-05, "loss": 0.0202, "step": 3150 }, { "epoch": 0.56, "learning_rate": 4.971852710413695e-05, "loss": 0.017, "step": 3160 }, { "epoch": 0.57, "learning_rate": 4.971763552068474e-05, "loss": 0.015, "step": 3170 }, { "epoch": 0.57, "learning_rate": 4.971674393723253e-05, "loss": 0.0164, "step": 3180 }, { "epoch": 0.57, "learning_rate": 4.971585235378032e-05, "loss": 0.012, "step": 3190 }, { "epoch": 0.57, "learning_rate": 4.971496077032811e-05, "loss": 0.0194, "step": 3200 }, { "epoch": 0.57, "learning_rate": 4.971406918687589e-05, "loss": 0.0147, "step": 3210 }, { "epoch": 0.57, "learning_rate": 4.9713177603423684e-05, "loss": 0.0129, "step": 3220 }, { "epoch": 0.58, "learning_rate": 4.971228601997147e-05, "loss": 0.0132, "step": 3230 }, { "epoch": 0.58, "learning_rate": 4.971139443651926e-05, "loss": 0.0155, "step": 3240 }, { "epoch": 0.58, "learning_rate": 4.971050285306705e-05, "loss": 0.0168, "step": 3250 }, { "epoch": 0.58, "learning_rate": 4.9709611269614835e-05, "loss": 0.0155, "step": 3260 }, { "epoch": 0.58, "learning_rate": 4.9708719686162626e-05, "loss": 0.0163, "step": 3270 }, { "epoch": 0.58, "learning_rate": 4.970782810271042e-05, "loss": 0.017, "step": 3280 }, { "epoch": 0.59, "learning_rate": 4.970693651925821e-05, "loss": 0.0155, "step": 3290 }, { "epoch": 0.59, "learning_rate": 4.970604493580599e-05, "loss": 0.02, "step": 3300 }, { "epoch": 0.59, "learning_rate": 4.9705153352353784e-05, "loss": 0.0147, "step": 3310 }, { "epoch": 0.59, "learning_rate": 4.970426176890157e-05, "loss": 0.0147, "step": 3320 }, { "epoch": 0.59, "learning_rate": 4.970337018544936e-05, "loss": 0.0175, "step": 3330 }, { "epoch": 0.6, "learning_rate": 4.970247860199715e-05, "loss": 0.0136, "step": 3340 }, { "epoch": 0.6, "learning_rate": 4.9701587018544936e-05, "loss": 0.0162, "step": 3350 }, { "epoch": 0.6, "learning_rate": 4.970069543509273e-05, "loss": 0.0145, "step": 3360 }, { "epoch": 0.6, "learning_rate": 4.969980385164051e-05, "loss": 0.015, "step": 3370 }, { "epoch": 0.6, "learning_rate": 4.96989122681883e-05, "loss": 0.0136, "step": 3380 }, { "epoch": 0.6, "learning_rate": 4.9698020684736094e-05, "loss": 0.014, "step": 3390 }, { "epoch": 0.61, "learning_rate": 4.9697129101283885e-05, "loss": 0.014, "step": 3400 }, { "epoch": 0.61, "learning_rate": 4.9696237517831676e-05, "loss": 0.0173, "step": 3410 }, { "epoch": 0.61, "learning_rate": 4.969534593437946e-05, "loss": 0.0164, "step": 3420 }, { "epoch": 0.61, "learning_rate": 4.969445435092725e-05, "loss": 0.0129, "step": 3430 }, { "epoch": 0.61, "learning_rate": 4.9693562767475036e-05, "loss": 0.0119, "step": 3440 }, { "epoch": 0.62, "learning_rate": 4.969267118402283e-05, "loss": 0.0133, "step": 3450 }, { "epoch": 0.62, "learning_rate": 4.969177960057061e-05, "loss": 0.0199, "step": 3460 }, { "epoch": 0.62, "learning_rate": 4.96908880171184e-05, "loss": 0.0143, "step": 3470 }, { "epoch": 0.62, "learning_rate": 4.9689996433666194e-05, "loss": 0.0177, "step": 3480 }, { "epoch": 0.62, "learning_rate": 4.968910485021398e-05, "loss": 0.0149, "step": 3490 }, { "epoch": 0.62, "learning_rate": 4.968821326676178e-05, "loss": 0.0181, "step": 3500 }, { "epoch": 0.63, "learning_rate": 4.968732168330956e-05, "loss": 0.019, "step": 3510 }, { "epoch": 0.63, "learning_rate": 4.968643009985735e-05, "loss": 0.0146, "step": 3520 }, { "epoch": 0.63, "learning_rate": 4.968553851640514e-05, "loss": 0.0128, "step": 3530 }, { "epoch": 0.63, "learning_rate": 4.968464693295293e-05, "loss": 0.0142, "step": 3540 }, { "epoch": 0.63, "learning_rate": 4.968375534950071e-05, "loss": 0.0168, "step": 3550 }, { "epoch": 0.63, "learning_rate": 4.9682863766048504e-05, "loss": 0.0168, "step": 3560 }, { "epoch": 0.64, "learning_rate": 4.9681972182596295e-05, "loss": 0.0149, "step": 3570 }, { "epoch": 0.64, "learning_rate": 4.968108059914408e-05, "loss": 0.0137, "step": 3580 }, { "epoch": 0.64, "learning_rate": 4.968018901569187e-05, "loss": 0.0126, "step": 3590 }, { "epoch": 0.64, "learning_rate": 4.9679297432239655e-05, "loss": 0.0104, "step": 3600 }, { "epoch": 0.64, "learning_rate": 4.967840584878745e-05, "loss": 0.0139, "step": 3610 }, { "epoch": 0.65, "learning_rate": 4.967751426533524e-05, "loss": 0.014, "step": 3620 }, { "epoch": 0.65, "learning_rate": 4.967662268188303e-05, "loss": 0.0155, "step": 3630 }, { "epoch": 0.65, "learning_rate": 4.967573109843082e-05, "loss": 0.0197, "step": 3640 }, { "epoch": 0.65, "learning_rate": 4.9674839514978604e-05, "loss": 0.0181, "step": 3650 }, { "epoch": 0.65, "learning_rate": 4.9673947931526395e-05, "loss": 0.0152, "step": 3660 }, { "epoch": 0.65, "learning_rate": 4.967305634807418e-05, "loss": 0.0153, "step": 3670 }, { "epoch": 0.66, "learning_rate": 4.967216476462197e-05, "loss": 0.0132, "step": 3680 }, { "epoch": 0.66, "learning_rate": 4.9671273181169756e-05, "loss": 0.0151, "step": 3690 }, { "epoch": 0.66, "learning_rate": 4.967038159771755e-05, "loss": 0.0119, "step": 3700 }, { "epoch": 0.66, "learning_rate": 4.966949001426534e-05, "loss": 0.0149, "step": 3710 }, { "epoch": 0.66, "learning_rate": 4.966859843081313e-05, "loss": 0.0108, "step": 3720 }, { "epoch": 0.67, "learning_rate": 4.966770684736092e-05, "loss": 0.0129, "step": 3730 }, { "epoch": 0.67, "learning_rate": 4.9666815263908705e-05, "loss": 0.0111, "step": 3740 }, { "epoch": 0.67, "learning_rate": 4.9665923680456496e-05, "loss": 0.0166, "step": 3750 }, { "epoch": 0.67, "learning_rate": 4.966503209700428e-05, "loss": 0.0128, "step": 3760 }, { "epoch": 0.67, "learning_rate": 4.966414051355207e-05, "loss": 0.0114, "step": 3770 }, { "epoch": 0.67, "learning_rate": 4.9663248930099856e-05, "loss": 0.0125, "step": 3780 }, { "epoch": 0.68, "learning_rate": 4.966235734664765e-05, "loss": 0.0134, "step": 3790 }, { "epoch": 0.68, "learning_rate": 4.966155492154066e-05, "loss": 0.0161, "step": 3800 }, { "epoch": 0.68, "learning_rate": 4.9660663338088447e-05, "loss": 0.0116, "step": 3810 }, { "epoch": 0.68, "learning_rate": 4.965977175463624e-05, "loss": 0.013, "step": 3820 }, { "epoch": 0.68, "learning_rate": 4.965888017118402e-05, "loss": 0.0107, "step": 3830 }, { "epoch": 0.68, "learning_rate": 4.965798858773181e-05, "loss": 0.0159, "step": 3840 }, { "epoch": 0.69, "learning_rate": 4.9657097004279605e-05, "loss": 0.0151, "step": 3850 }, { "epoch": 0.69, "learning_rate": 4.965620542082739e-05, "loss": 0.0154, "step": 3860 }, { "epoch": 0.69, "learning_rate": 4.965531383737518e-05, "loss": 0.0103, "step": 3870 }, { "epoch": 0.69, "learning_rate": 4.965442225392297e-05, "loss": 0.0118, "step": 3880 }, { "epoch": 0.69, "learning_rate": 4.965353067047076e-05, "loss": 0.0165, "step": 3890 }, { "epoch": 0.7, "learning_rate": 4.965263908701855e-05, "loss": 0.01, "step": 3900 }, { "epoch": 0.7, "learning_rate": 4.965174750356634e-05, "loss": 0.0162, "step": 3910 }, { "epoch": 0.7, "learning_rate": 4.965085592011412e-05, "loss": 0.0133, "step": 3920 }, { "epoch": 0.7, "learning_rate": 4.9649964336661914e-05, "loss": 0.0137, "step": 3930 }, { "epoch": 0.7, "learning_rate": 4.9649072753209705e-05, "loss": 0.013, "step": 3940 }, { "epoch": 0.7, "learning_rate": 4.964818116975749e-05, "loss": 0.0159, "step": 3950 }, { "epoch": 0.71, "learning_rate": 4.964728958630528e-05, "loss": 0.0092, "step": 3960 }, { "epoch": 0.71, "learning_rate": 4.9646398002853065e-05, "loss": 0.0128, "step": 3970 }, { "epoch": 0.71, "learning_rate": 4.9645506419400856e-05, "loss": 0.0158, "step": 3980 }, { "epoch": 0.71, "learning_rate": 4.964461483594865e-05, "loss": 0.0132, "step": 3990 }, { "epoch": 0.71, "learning_rate": 4.964372325249644e-05, "loss": 0.0159, "step": 4000 }, { "epoch": 0.72, "learning_rate": 4.964283166904422e-05, "loss": 0.0132, "step": 4010 }, { "epoch": 0.72, "learning_rate": 4.9641940085592015e-05, "loss": 0.0131, "step": 4020 }, { "epoch": 0.72, "learning_rate": 4.9641048502139806e-05, "loss": 0.0154, "step": 4030 }, { "epoch": 0.72, "learning_rate": 4.964015691868759e-05, "loss": 0.0161, "step": 4040 }, { "epoch": 0.72, "learning_rate": 4.963926533523538e-05, "loss": 0.0129, "step": 4050 }, { "epoch": 0.72, "learning_rate": 4.9638373751783166e-05, "loss": 0.0134, "step": 4060 }, { "epoch": 0.73, "learning_rate": 4.963748216833096e-05, "loss": 0.0115, "step": 4070 }, { "epoch": 0.73, "learning_rate": 4.963659058487875e-05, "loss": 0.013, "step": 4080 }, { "epoch": 0.73, "learning_rate": 4.963569900142653e-05, "loss": 0.0127, "step": 4090 }, { "epoch": 0.73, "learning_rate": 4.9634807417974324e-05, "loss": 0.0147, "step": 4100 }, { "epoch": 0.73, "learning_rate": 4.9633915834522115e-05, "loss": 0.0156, "step": 4110 }, { "epoch": 0.73, "learning_rate": 4.9633024251069906e-05, "loss": 0.0135, "step": 4120 }, { "epoch": 0.74, "learning_rate": 4.963213266761769e-05, "loss": 0.0149, "step": 4130 }, { "epoch": 0.74, "learning_rate": 4.963124108416548e-05, "loss": 0.0174, "step": 4140 }, { "epoch": 0.74, "learning_rate": 4.9630349500713266e-05, "loss": 0.0127, "step": 4150 }, { "epoch": 0.74, "learning_rate": 4.962945791726106e-05, "loss": 0.0143, "step": 4160 }, { "epoch": 0.74, "learning_rate": 4.962856633380885e-05, "loss": 0.0163, "step": 4170 }, { "epoch": 0.75, "learning_rate": 4.962767475035663e-05, "loss": 0.0112, "step": 4180 }, { "epoch": 0.75, "learning_rate": 4.9626783166904424e-05, "loss": 0.0098, "step": 4190 }, { "epoch": 0.75, "learning_rate": 4.962589158345221e-05, "loss": 0.0122, "step": 4200 }, { "epoch": 0.75, "learning_rate": 4.962500000000001e-05, "loss": 0.0153, "step": 4210 }, { "epoch": 0.75, "learning_rate": 4.962410841654779e-05, "loss": 0.0112, "step": 4220 }, { "epoch": 0.75, "learning_rate": 4.962321683309558e-05, "loss": 0.0112, "step": 4230 }, { "epoch": 0.76, "learning_rate": 4.962232524964337e-05, "loss": 0.0133, "step": 4240 }, { "epoch": 0.76, "learning_rate": 4.962143366619116e-05, "loss": 0.0146, "step": 4250 }, { "epoch": 0.76, "learning_rate": 4.962054208273895e-05, "loss": 0.0122, "step": 4260 }, { "epoch": 0.76, "learning_rate": 4.9619650499286734e-05, "loss": 0.0116, "step": 4270 }, { "epoch": 0.76, "learning_rate": 4.9618758915834525e-05, "loss": 0.0125, "step": 4280 }, { "epoch": 0.76, "learning_rate": 4.961786733238231e-05, "loss": 0.0163, "step": 4290 }, { "epoch": 0.77, "learning_rate": 4.96169757489301e-05, "loss": 0.0106, "step": 4300 }, { "epoch": 0.77, "learning_rate": 4.961608416547789e-05, "loss": 0.0089, "step": 4310 }, { "epoch": 0.77, "learning_rate": 4.961519258202568e-05, "loss": 0.0162, "step": 4320 }, { "epoch": 0.77, "learning_rate": 4.961430099857347e-05, "loss": 0.0128, "step": 4330 }, { "epoch": 0.77, "learning_rate": 4.961340941512126e-05, "loss": 0.0159, "step": 4340 }, { "epoch": 0.78, "learning_rate": 4.961251783166905e-05, "loss": 0.0135, "step": 4350 }, { "epoch": 0.78, "learning_rate": 4.9611626248216834e-05, "loss": 0.016, "step": 4360 }, { "epoch": 0.78, "learning_rate": 4.9610734664764626e-05, "loss": 0.0173, "step": 4370 }, { "epoch": 0.78, "learning_rate": 4.960984308131241e-05, "loss": 0.0157, "step": 4380 }, { "epoch": 0.78, "learning_rate": 4.96089514978602e-05, "loss": 0.0147, "step": 4390 }, { "epoch": 0.78, "learning_rate": 4.960805991440799e-05, "loss": 0.0121, "step": 4400 }, { "epoch": 0.79, "learning_rate": 4.960716833095578e-05, "loss": 0.0138, "step": 4410 }, { "epoch": 0.79, "learning_rate": 4.960627674750357e-05, "loss": 0.011, "step": 4420 }, { "epoch": 0.79, "learning_rate": 4.960538516405136e-05, "loss": 0.0156, "step": 4430 }, { "epoch": 0.79, "learning_rate": 4.960449358059915e-05, "loss": 0.0114, "step": 4440 }, { "epoch": 0.79, "learning_rate": 4.9603601997146935e-05, "loss": 0.0149, "step": 4450 }, { "epoch": 0.8, "learning_rate": 4.9602710413694726e-05, "loss": 0.0093, "step": 4460 }, { "epoch": 0.8, "learning_rate": 4.960181883024251e-05, "loss": 0.012, "step": 4470 }, { "epoch": 0.8, "learning_rate": 4.96009272467903e-05, "loss": 0.0139, "step": 4480 }, { "epoch": 0.8, "learning_rate": 4.960003566333809e-05, "loss": 0.0105, "step": 4490 }, { "epoch": 0.8, "learning_rate": 4.959914407988588e-05, "loss": 0.0157, "step": 4500 }, { "epoch": 0.8, "learning_rate": 4.959825249643367e-05, "loss": 0.0126, "step": 4510 }, { "epoch": 0.81, "learning_rate": 4.959736091298145e-05, "loss": 0.0132, "step": 4520 }, { "epoch": 0.81, "learning_rate": 4.9596469329529244e-05, "loss": 0.0158, "step": 4530 }, { "epoch": 0.81, "learning_rate": 4.9595577746077035e-05, "loss": 0.014, "step": 4540 }, { "epoch": 0.81, "learning_rate": 4.959468616262483e-05, "loss": 0.0116, "step": 4550 }, { "epoch": 0.81, "learning_rate": 4.959379457917261e-05, "loss": 0.017, "step": 4560 }, { "epoch": 0.81, "learning_rate": 4.95929029957204e-05, "loss": 0.0124, "step": 4570 }, { "epoch": 0.82, "learning_rate": 4.9592011412268194e-05, "loss": 0.0126, "step": 4580 }, { "epoch": 0.82, "learning_rate": 4.959111982881598e-05, "loss": 0.012, "step": 4590 }, { "epoch": 0.82, "learning_rate": 4.959022824536377e-05, "loss": 0.0142, "step": 4600 }, { "epoch": 0.82, "learning_rate": 4.9589336661911554e-05, "loss": 0.0146, "step": 4610 }, { "epoch": 0.82, "learning_rate": 4.9588445078459345e-05, "loss": 0.0111, "step": 4620 }, { "epoch": 0.83, "learning_rate": 4.9587553495007136e-05, "loss": 0.0106, "step": 4630 }, { "epoch": 0.83, "learning_rate": 4.958666191155492e-05, "loss": 0.016, "step": 4640 }, { "epoch": 0.83, "learning_rate": 4.958577032810272e-05, "loss": 0.0111, "step": 4650 }, { "epoch": 0.83, "learning_rate": 4.95848787446505e-05, "loss": 0.0122, "step": 4660 }, { "epoch": 0.83, "learning_rate": 4.9583987161198294e-05, "loss": 0.0134, "step": 4670 }, { "epoch": 0.83, "learning_rate": 4.958309557774608e-05, "loss": 0.0165, "step": 4680 }, { "epoch": 0.84, "learning_rate": 4.958220399429387e-05, "loss": 0.0117, "step": 4690 }, { "epoch": 0.84, "learning_rate": 4.9581312410841654e-05, "loss": 0.0102, "step": 4700 }, { "epoch": 0.84, "learning_rate": 4.9580420827389445e-05, "loss": 0.0151, "step": 4710 }, { "epoch": 0.84, "learning_rate": 4.9579529243937237e-05, "loss": 0.0134, "step": 4720 }, { "epoch": 0.84, "learning_rate": 4.957863766048502e-05, "loss": 0.0107, "step": 4730 }, { "epoch": 0.85, "learning_rate": 4.957774607703281e-05, "loss": 0.0121, "step": 4740 }, { "epoch": 0.85, "learning_rate": 4.95768544935806e-05, "loss": 0.0114, "step": 4750 }, { "epoch": 0.85, "learning_rate": 4.9575962910128395e-05, "loss": 0.0135, "step": 4760 }, { "epoch": 0.85, "learning_rate": 4.957507132667618e-05, "loss": 0.0139, "step": 4770 }, { "epoch": 0.85, "learning_rate": 4.957417974322397e-05, "loss": 0.01, "step": 4780 }, { "epoch": 0.85, "learning_rate": 4.9573288159771755e-05, "loss": 0.0134, "step": 4790 }, { "epoch": 0.86, "learning_rate": 4.9572396576319546e-05, "loss": 0.0149, "step": 4800 }, { "epoch": 0.86, "learning_rate": 4.957150499286734e-05, "loss": 0.0129, "step": 4810 }, { "epoch": 0.86, "learning_rate": 4.957061340941512e-05, "loss": 0.0182, "step": 4820 }, { "epoch": 0.86, "learning_rate": 4.956972182596291e-05, "loss": 0.0088, "step": 4830 }, { "epoch": 0.86, "learning_rate": 4.95688302425107e-05, "loss": 0.0137, "step": 4840 }, { "epoch": 0.86, "learning_rate": 4.956793865905849e-05, "loss": 0.0103, "step": 4850 }, { "epoch": 0.87, "learning_rate": 4.956704707560628e-05, "loss": 0.0111, "step": 4860 }, { "epoch": 0.87, "learning_rate": 4.956615549215407e-05, "loss": 0.0162, "step": 4870 }, { "epoch": 0.87, "learning_rate": 4.956526390870186e-05, "loss": 0.0147, "step": 4880 }, { "epoch": 0.87, "learning_rate": 4.9564372325249646e-05, "loss": 0.0131, "step": 4890 }, { "epoch": 0.87, "learning_rate": 4.956348074179744e-05, "loss": 0.0108, "step": 4900 }, { "epoch": 0.88, "learning_rate": 4.956258915834522e-05, "loss": 0.0138, "step": 4910 }, { "epoch": 0.88, "learning_rate": 4.956169757489301e-05, "loss": 0.0113, "step": 4920 }, { "epoch": 0.88, "learning_rate": 4.95608059914408e-05, "loss": 0.0151, "step": 4930 }, { "epoch": 0.88, "learning_rate": 4.955991440798859e-05, "loss": 0.0178, "step": 4940 }, { "epoch": 0.88, "learning_rate": 4.955902282453638e-05, "loss": 0.0132, "step": 4950 }, { "epoch": 0.88, "learning_rate": 4.9558131241084165e-05, "loss": 0.0127, "step": 4960 }, { "epoch": 0.89, "learning_rate": 4.9557239657631956e-05, "loss": 0.0135, "step": 4970 }, { "epoch": 0.89, "learning_rate": 4.955634807417975e-05, "loss": 0.0123, "step": 4980 }, { "epoch": 0.89, "learning_rate": 4.955545649072754e-05, "loss": 0.0081, "step": 4990 }, { "epoch": 0.89, "learning_rate": 4.955456490727532e-05, "loss": 0.0135, "step": 5000 }, { "epoch": 0.89, "learning_rate": 4.9553673323823114e-05, "loss": 0.0187, "step": 5010 }, { "epoch": 0.9, "learning_rate": 4.95527817403709e-05, "loss": 0.0134, "step": 5020 }, { "epoch": 0.9, "learning_rate": 4.955189015691869e-05, "loss": 0.0095, "step": 5030 }, { "epoch": 0.9, "learning_rate": 4.9551087731811704e-05, "loss": 0.0145, "step": 5040 }, { "epoch": 0.9, "learning_rate": 4.955019614835949e-05, "loss": 0.0179, "step": 5050 }, { "epoch": 0.9, "learning_rate": 4.954930456490728e-05, "loss": 0.014, "step": 5060 }, { "epoch": 0.9, "learning_rate": 4.9548412981455064e-05, "loss": 0.0148, "step": 5070 }, { "epoch": 0.91, "learning_rate": 4.9547521398002856e-05, "loss": 0.0114, "step": 5080 }, { "epoch": 0.91, "learning_rate": 4.954662981455065e-05, "loss": 0.0093, "step": 5090 }, { "epoch": 0.91, "learning_rate": 4.954573823109843e-05, "loss": 0.011, "step": 5100 }, { "epoch": 0.91, "learning_rate": 4.954484664764622e-05, "loss": 0.0129, "step": 5110 }, { "epoch": 0.91, "learning_rate": 4.954395506419401e-05, "loss": 0.0157, "step": 5120 }, { "epoch": 0.91, "learning_rate": 4.95430634807418e-05, "loss": 0.0128, "step": 5130 }, { "epoch": 0.92, "learning_rate": 4.954217189728959e-05, "loss": 0.0153, "step": 5140 }, { "epoch": 0.92, "learning_rate": 4.954128031383738e-05, "loss": 0.0108, "step": 5150 }, { "epoch": 0.92, "learning_rate": 4.9540388730385165e-05, "loss": 0.0179, "step": 5160 }, { "epoch": 0.92, "learning_rate": 4.9539497146932956e-05, "loss": 0.011, "step": 5170 }, { "epoch": 0.92, "learning_rate": 4.953860556348075e-05, "loss": 0.0127, "step": 5180 }, { "epoch": 0.93, "learning_rate": 4.953771398002853e-05, "loss": 0.0121, "step": 5190 }, { "epoch": 0.93, "learning_rate": 4.953682239657632e-05, "loss": 0.0131, "step": 5200 }, { "epoch": 0.93, "learning_rate": 4.953593081312411e-05, "loss": 0.0107, "step": 5210 }, { "epoch": 0.93, "learning_rate": 4.95350392296719e-05, "loss": 0.0147, "step": 5220 }, { "epoch": 0.93, "learning_rate": 4.953414764621968e-05, "loss": 0.0116, "step": 5230 }, { "epoch": 0.93, "learning_rate": 4.9533256062767474e-05, "loss": 0.0081, "step": 5240 }, { "epoch": 0.94, "learning_rate": 4.9532364479315266e-05, "loss": 0.0126, "step": 5250 }, { "epoch": 0.94, "learning_rate": 4.953147289586306e-05, "loss": 0.0114, "step": 5260 }, { "epoch": 0.94, "learning_rate": 4.953058131241085e-05, "loss": 0.0112, "step": 5270 }, { "epoch": 0.94, "learning_rate": 4.952968972895863e-05, "loss": 0.0131, "step": 5280 }, { "epoch": 0.94, "learning_rate": 4.9528798145506424e-05, "loss": 0.0137, "step": 5290 }, { "epoch": 0.95, "learning_rate": 4.952790656205421e-05, "loss": 0.0152, "step": 5300 }, { "epoch": 0.95, "learning_rate": 4.9527014978602e-05, "loss": 0.0137, "step": 5310 }, { "epoch": 0.95, "learning_rate": 4.952612339514979e-05, "loss": 0.0136, "step": 5320 }, { "epoch": 0.95, "learning_rate": 4.9525231811697575e-05, "loss": 0.0115, "step": 5330 }, { "epoch": 0.95, "learning_rate": 4.9524340228245366e-05, "loss": 0.0176, "step": 5340 }, { "epoch": 0.95, "learning_rate": 4.952344864479315e-05, "loss": 0.0124, "step": 5350 }, { "epoch": 0.96, "learning_rate": 4.952255706134095e-05, "loss": 0.0142, "step": 5360 }, { "epoch": 0.96, "learning_rate": 4.952166547788873e-05, "loss": 0.0111, "step": 5370 }, { "epoch": 0.96, "learning_rate": 4.9520773894436524e-05, "loss": 0.0135, "step": 5380 }, { "epoch": 0.96, "learning_rate": 4.951988231098431e-05, "loss": 0.0135, "step": 5390 }, { "epoch": 0.96, "learning_rate": 4.95189907275321e-05, "loss": 0.0116, "step": 5400 }, { "epoch": 0.96, "learning_rate": 4.951809914407989e-05, "loss": 0.0094, "step": 5410 }, { "epoch": 0.97, "learning_rate": 4.9517207560627675e-05, "loss": 0.0122, "step": 5420 }, { "epoch": 0.97, "learning_rate": 4.951631597717547e-05, "loss": 0.0113, "step": 5430 }, { "epoch": 0.97, "learning_rate": 4.951542439372325e-05, "loss": 0.0142, "step": 5440 }, { "epoch": 0.97, "learning_rate": 4.951453281027104e-05, "loss": 0.0141, "step": 5450 }, { "epoch": 0.97, "learning_rate": 4.951364122681883e-05, "loss": 0.0087, "step": 5460 }, { "epoch": 0.98, "learning_rate": 4.9512749643366625e-05, "loss": 0.0125, "step": 5470 }, { "epoch": 0.98, "learning_rate": 4.951185805991441e-05, "loss": 0.0141, "step": 5480 }, { "epoch": 0.98, "learning_rate": 4.95109664764622e-05, "loss": 0.0108, "step": 5490 }, { "epoch": 0.98, "learning_rate": 4.951007489300999e-05, "loss": 0.0135, "step": 5500 }, { "epoch": 0.98, "learning_rate": 4.9509183309557776e-05, "loss": 0.0107, "step": 5510 }, { "epoch": 0.98, "learning_rate": 4.950829172610557e-05, "loss": 0.0123, "step": 5520 }, { "epoch": 0.99, "learning_rate": 4.950740014265335e-05, "loss": 0.0122, "step": 5530 }, { "epoch": 0.99, "learning_rate": 4.950650855920114e-05, "loss": 0.0131, "step": 5540 }, { "epoch": 0.99, "learning_rate": 4.9505616975748934e-05, "loss": 0.0103, "step": 5550 }, { "epoch": 0.99, "learning_rate": 4.950472539229672e-05, "loss": 0.0099, "step": 5560 }, { "epoch": 0.99, "learning_rate": 4.950383380884451e-05, "loss": 0.0097, "step": 5570 }, { "epoch": 1.0, "learning_rate": 4.95029422253923e-05, "loss": 0.0118, "step": 5580 }, { "epoch": 1.0, "learning_rate": 4.950205064194009e-05, "loss": 0.0181, "step": 5590 }, { "epoch": 1.0, "learning_rate": 4.9501159058487877e-05, "loss": 0.0128, "step": 5600 }, { "epoch": 1.0, "eval_loss": 0.01809591054916382, "eval_runtime": 196.4064, "eval_samples_per_second": 23.619, "eval_steps_per_second": 2.953, "step": 5608 }, { "epoch": 1.0, "learning_rate": 4.950026747503567e-05, "loss": 0.0096, "step": 5610 }, { "epoch": 1.0, "learning_rate": 4.949937589158345e-05, "loss": 0.0119, "step": 5620 }, { "epoch": 1.0, "learning_rate": 4.9498484308131243e-05, "loss": 0.0152, "step": 5630 }, { "epoch": 1.01, "learning_rate": 4.9497592724679035e-05, "loss": 0.009, "step": 5640 }, { "epoch": 1.01, "learning_rate": 4.949670114122682e-05, "loss": 0.0102, "step": 5650 }, { "epoch": 1.01, "learning_rate": 4.949580955777461e-05, "loss": 0.008, "step": 5660 }, { "epoch": 1.01, "learning_rate": 4.9494917974322395e-05, "loss": 0.0095, "step": 5670 }, { "epoch": 1.01, "learning_rate": 4.9494026390870186e-05, "loss": 0.0111, "step": 5680 }, { "epoch": 1.01, "learning_rate": 4.949313480741798e-05, "loss": 0.0133, "step": 5690 }, { "epoch": 1.02, "learning_rate": 4.949224322396577e-05, "loss": 0.012, "step": 5700 }, { "epoch": 1.02, "learning_rate": 4.949135164051355e-05, "loss": 0.0077, "step": 5710 }, { "epoch": 1.02, "learning_rate": 4.9490460057061344e-05, "loss": 0.0108, "step": 5720 }, { "epoch": 1.02, "learning_rate": 4.9489568473609135e-05, "loss": 0.0156, "step": 5730 }, { "epoch": 1.02, "learning_rate": 4.948867689015692e-05, "loss": 0.0113, "step": 5740 }, { "epoch": 1.03, "learning_rate": 4.948778530670471e-05, "loss": 0.0079, "step": 5750 }, { "epoch": 1.03, "learning_rate": 4.9486893723252495e-05, "loss": 0.0094, "step": 5760 }, { "epoch": 1.03, "learning_rate": 4.9486002139800286e-05, "loss": 0.0124, "step": 5770 }, { "epoch": 1.03, "learning_rate": 4.948511055634808e-05, "loss": 0.0081, "step": 5780 }, { "epoch": 1.03, "learning_rate": 4.948421897289586e-05, "loss": 0.0116, "step": 5790 }, { "epoch": 1.03, "learning_rate": 4.948332738944365e-05, "loss": 0.0099, "step": 5800 }, { "epoch": 1.04, "learning_rate": 4.9482435805991445e-05, "loss": 0.0076, "step": 5810 }, { "epoch": 1.04, "learning_rate": 4.9481544222539236e-05, "loss": 0.014, "step": 5820 }, { "epoch": 1.04, "learning_rate": 4.948065263908702e-05, "loss": 0.0104, "step": 5830 }, { "epoch": 1.04, "learning_rate": 4.947976105563481e-05, "loss": 0.0069, "step": 5840 }, { "epoch": 1.04, "learning_rate": 4.9478869472182596e-05, "loss": 0.0121, "step": 5850 }, { "epoch": 1.04, "learning_rate": 4.947797788873039e-05, "loss": 0.0104, "step": 5860 }, { "epoch": 1.05, "learning_rate": 4.947708630527818e-05, "loss": 0.0133, "step": 5870 }, { "epoch": 1.05, "learning_rate": 4.947619472182596e-05, "loss": 0.0115, "step": 5880 }, { "epoch": 1.05, "learning_rate": 4.9475303138373754e-05, "loss": 0.0111, "step": 5890 }, { "epoch": 1.05, "learning_rate": 4.947441155492154e-05, "loss": 0.0108, "step": 5900 }, { "epoch": 1.05, "learning_rate": 4.9473519971469336e-05, "loss": 0.0131, "step": 5910 }, { "epoch": 1.06, "learning_rate": 4.947262838801712e-05, "loss": 0.0105, "step": 5920 }, { "epoch": 1.06, "learning_rate": 4.947173680456491e-05, "loss": 0.0118, "step": 5930 }, { "epoch": 1.06, "learning_rate": 4.9470845221112696e-05, "loss": 0.0087, "step": 5940 }, { "epoch": 1.06, "learning_rate": 4.946995363766049e-05, "loss": 0.007, "step": 5950 }, { "epoch": 1.06, "learning_rate": 4.946906205420828e-05, "loss": 0.0146, "step": 5960 }, { "epoch": 1.06, "learning_rate": 4.946817047075606e-05, "loss": 0.0133, "step": 5970 }, { "epoch": 1.07, "learning_rate": 4.9467278887303854e-05, "loss": 0.0152, "step": 5980 }, { "epoch": 1.07, "learning_rate": 4.946638730385164e-05, "loss": 0.0108, "step": 5990 }, { "epoch": 1.07, "learning_rate": 4.946549572039943e-05, "loss": 0.0116, "step": 6000 }, { "epoch": 1.07, "learning_rate": 4.946460413694722e-05, "loss": 0.0155, "step": 6010 }, { "epoch": 1.07, "learning_rate": 4.946371255349501e-05, "loss": 0.0113, "step": 6020 }, { "epoch": 1.08, "learning_rate": 4.94628209700428e-05, "loss": 0.0112, "step": 6030 }, { "epoch": 1.08, "learning_rate": 4.946192938659059e-05, "loss": 0.0111, "step": 6040 }, { "epoch": 1.08, "learning_rate": 4.946103780313838e-05, "loss": 0.0117, "step": 6050 }, { "epoch": 1.08, "learning_rate": 4.9460146219686164e-05, "loss": 0.0112, "step": 6060 }, { "epoch": 1.08, "learning_rate": 4.9459254636233955e-05, "loss": 0.011, "step": 6070 }, { "epoch": 1.08, "learning_rate": 4.945836305278174e-05, "loss": 0.0095, "step": 6080 }, { "epoch": 1.09, "learning_rate": 4.945747146932953e-05, "loss": 0.0131, "step": 6090 }, { "epoch": 1.09, "learning_rate": 4.945657988587732e-05, "loss": 0.0091, "step": 6100 }, { "epoch": 1.09, "learning_rate": 4.9455688302425106e-05, "loss": 0.0111, "step": 6110 }, { "epoch": 1.09, "learning_rate": 4.94547967189729e-05, "loss": 0.0062, "step": 6120 }, { "epoch": 1.09, "learning_rate": 4.945390513552069e-05, "loss": 0.015, "step": 6130 }, { "epoch": 1.09, "learning_rate": 4.945301355206848e-05, "loss": 0.0091, "step": 6140 }, { "epoch": 1.1, "learning_rate": 4.9452121968616264e-05, "loss": 0.0118, "step": 6150 }, { "epoch": 1.1, "learning_rate": 4.9451230385164056e-05, "loss": 0.0148, "step": 6160 }, { "epoch": 1.1, "learning_rate": 4.945033880171184e-05, "loss": 0.0103, "step": 6170 }, { "epoch": 1.1, "learning_rate": 4.944944721825963e-05, "loss": 0.0121, "step": 6180 }, { "epoch": 1.1, "learning_rate": 4.944855563480742e-05, "loss": 0.011, "step": 6190 }, { "epoch": 1.11, "learning_rate": 4.944766405135521e-05, "loss": 0.0103, "step": 6200 }, { "epoch": 1.11, "learning_rate": 4.9446772467903e-05, "loss": 0.0092, "step": 6210 }, { "epoch": 1.11, "learning_rate": 4.944588088445078e-05, "loss": 0.006, "step": 6220 }, { "epoch": 1.11, "learning_rate": 4.9444989300998574e-05, "loss": 0.0101, "step": 6230 }, { "epoch": 1.11, "learning_rate": 4.9444097717546365e-05, "loss": 0.0094, "step": 6240 }, { "epoch": 1.11, "learning_rate": 4.9443206134094156e-05, "loss": 0.0092, "step": 6250 }, { "epoch": 1.12, "learning_rate": 4.944231455064194e-05, "loss": 0.0125, "step": 6260 }, { "epoch": 1.12, "learning_rate": 4.944142296718973e-05, "loss": 0.0119, "step": 6270 }, { "epoch": 1.12, "learning_rate": 4.944053138373752e-05, "loss": 0.0087, "step": 6280 }, { "epoch": 1.12, "learning_rate": 4.943963980028531e-05, "loss": 0.0107, "step": 6290 }, { "epoch": 1.12, "learning_rate": 4.94387482168331e-05, "loss": 0.0118, "step": 6300 }, { "epoch": 1.13, "learning_rate": 4.943785663338088e-05, "loss": 0.0152, "step": 6310 }, { "epoch": 1.13, "learning_rate": 4.9436965049928674e-05, "loss": 0.0136, "step": 6320 }, { "epoch": 1.13, "learning_rate": 4.9436073466476466e-05, "loss": 0.0103, "step": 6330 }, { "epoch": 1.13, "learning_rate": 4.943518188302425e-05, "loss": 0.0092, "step": 6340 }, { "epoch": 1.13, "learning_rate": 4.943429029957205e-05, "loss": 0.0114, "step": 6350 }, { "epoch": 1.13, "learning_rate": 4.943339871611983e-05, "loss": 0.0123, "step": 6360 }, { "epoch": 1.14, "learning_rate": 4.9432507132667624e-05, "loss": 0.0116, "step": 6370 }, { "epoch": 1.14, "learning_rate": 4.943161554921541e-05, "loss": 0.0125, "step": 6380 }, { "epoch": 1.14, "learning_rate": 4.94307239657632e-05, "loss": 0.0098, "step": 6390 }, { "epoch": 1.14, "learning_rate": 4.9429832382310984e-05, "loss": 0.008, "step": 6400 }, { "epoch": 1.14, "learning_rate": 4.9428940798858775e-05, "loss": 0.0092, "step": 6410 }, { "epoch": 1.14, "learning_rate": 4.9428049215406566e-05, "loss": 0.0077, "step": 6420 }, { "epoch": 1.15, "learning_rate": 4.942715763195435e-05, "loss": 0.0129, "step": 6430 }, { "epoch": 1.15, "learning_rate": 4.942626604850214e-05, "loss": 0.0108, "step": 6440 }, { "epoch": 1.15, "learning_rate": 4.9425374465049926e-05, "loss": 0.0136, "step": 6450 }, { "epoch": 1.15, "learning_rate": 4.9424482881597724e-05, "loss": 0.013, "step": 6460 }, { "epoch": 1.15, "learning_rate": 4.942359129814551e-05, "loss": 0.0109, "step": 6470 }, { "epoch": 1.16, "learning_rate": 4.94226997146933e-05, "loss": 0.0123, "step": 6480 }, { "epoch": 1.16, "learning_rate": 4.9421808131241084e-05, "loss": 0.0114, "step": 6490 }, { "epoch": 1.16, "learning_rate": 4.9420916547788875e-05, "loss": 0.0145, "step": 6500 }, { "epoch": 1.16, "learning_rate": 4.942002496433667e-05, "loss": 0.0094, "step": 6510 }, { "epoch": 1.16, "learning_rate": 4.941913338088445e-05, "loss": 0.0102, "step": 6520 }, { "epoch": 1.16, "learning_rate": 4.941824179743224e-05, "loss": 0.0102, "step": 6530 }, { "epoch": 1.17, "learning_rate": 4.941735021398003e-05, "loss": 0.0115, "step": 6540 }, { "epoch": 1.17, "learning_rate": 4.941645863052782e-05, "loss": 0.0107, "step": 6550 }, { "epoch": 1.17, "learning_rate": 4.941556704707561e-05, "loss": 0.0084, "step": 6560 }, { "epoch": 1.17, "learning_rate": 4.94146754636234e-05, "loss": 0.0119, "step": 6570 }, { "epoch": 1.17, "learning_rate": 4.941378388017119e-05, "loss": 0.0115, "step": 6580 }, { "epoch": 1.18, "learning_rate": 4.9412892296718976e-05, "loss": 0.0111, "step": 6590 }, { "epoch": 1.18, "learning_rate": 4.941200071326677e-05, "loss": 0.0128, "step": 6600 }, { "epoch": 1.18, "learning_rate": 4.941110912981455e-05, "loss": 0.0106, "step": 6610 }, { "epoch": 1.18, "learning_rate": 4.941021754636234e-05, "loss": 0.0146, "step": 6620 }, { "epoch": 1.18, "learning_rate": 4.940932596291013e-05, "loss": 0.0144, "step": 6630 }, { "epoch": 1.18, "learning_rate": 4.940843437945792e-05, "loss": 0.0094, "step": 6640 }, { "epoch": 1.19, "learning_rate": 4.940754279600571e-05, "loss": 0.0117, "step": 6650 }, { "epoch": 1.19, "learning_rate": 4.9406651212553494e-05, "loss": 0.0121, "step": 6660 }, { "epoch": 1.19, "learning_rate": 4.9405759629101285e-05, "loss": 0.014, "step": 6670 }, { "epoch": 1.19, "learning_rate": 4.9404868045649077e-05, "loss": 0.0121, "step": 6680 }, { "epoch": 1.19, "learning_rate": 4.940397646219687e-05, "loss": 0.0106, "step": 6690 }, { "epoch": 1.19, "learning_rate": 4.940308487874465e-05, "loss": 0.0114, "step": 6700 }, { "epoch": 1.2, "learning_rate": 4.9402193295292443e-05, "loss": 0.0131, "step": 6710 }, { "epoch": 1.2, "learning_rate": 4.940130171184023e-05, "loss": 0.0083, "step": 6720 }, { "epoch": 1.2, "learning_rate": 4.940041012838802e-05, "loss": 0.0117, "step": 6730 }, { "epoch": 1.2, "learning_rate": 4.939951854493581e-05, "loss": 0.0127, "step": 6740 }, { "epoch": 1.2, "learning_rate": 4.9398626961483595e-05, "loss": 0.0085, "step": 6750 }, { "epoch": 1.21, "learning_rate": 4.9397735378031386e-05, "loss": 0.0076, "step": 6760 }, { "epoch": 1.21, "learning_rate": 4.939684379457917e-05, "loss": 0.0119, "step": 6770 }, { "epoch": 1.21, "learning_rate": 4.939595221112696e-05, "loss": 0.0123, "step": 6780 }, { "epoch": 1.21, "learning_rate": 4.939506062767475e-05, "loss": 0.0106, "step": 6790 }, { "epoch": 1.21, "learning_rate": 4.9394169044222544e-05, "loss": 0.0081, "step": 6800 }, { "epoch": 1.21, "learning_rate": 4.9393277460770335e-05, "loss": 0.0091, "step": 6810 }, { "epoch": 1.22, "learning_rate": 4.939238587731812e-05, "loss": 0.0083, "step": 6820 }, { "epoch": 1.22, "learning_rate": 4.939149429386591e-05, "loss": 0.0127, "step": 6830 }, { "epoch": 1.22, "learning_rate": 4.9390602710413695e-05, "loss": 0.01, "step": 6840 }, { "epoch": 1.22, "learning_rate": 4.9389711126961486e-05, "loss": 0.013, "step": 6850 }, { "epoch": 1.22, "learning_rate": 4.938881954350927e-05, "loss": 0.0077, "step": 6860 }, { "epoch": 1.23, "learning_rate": 4.938792796005706e-05, "loss": 0.0115, "step": 6870 }, { "epoch": 1.23, "learning_rate": 4.938703637660485e-05, "loss": 0.0093, "step": 6880 }, { "epoch": 1.23, "learning_rate": 4.938614479315264e-05, "loss": 0.0108, "step": 6890 }, { "epoch": 1.23, "learning_rate": 4.9385253209700436e-05, "loss": 0.0083, "step": 6900 }, { "epoch": 1.23, "learning_rate": 4.938436162624822e-05, "loss": 0.0143, "step": 6910 }, { "epoch": 1.23, "learning_rate": 4.938347004279601e-05, "loss": 0.0101, "step": 6920 }, { "epoch": 1.24, "learning_rate": 4.9382578459343796e-05, "loss": 0.0115, "step": 6930 }, { "epoch": 1.24, "learning_rate": 4.938168687589159e-05, "loss": 0.011, "step": 6940 }, { "epoch": 1.24, "learning_rate": 4.938079529243937e-05, "loss": 0.013, "step": 6950 }, { "epoch": 1.24, "learning_rate": 4.937990370898716e-05, "loss": 0.0074, "step": 6960 }, { "epoch": 1.24, "learning_rate": 4.9379012125534954e-05, "loss": 0.0111, "step": 6970 }, { "epoch": 1.24, "learning_rate": 4.937812054208274e-05, "loss": 0.0107, "step": 6980 }, { "epoch": 1.25, "learning_rate": 4.937722895863053e-05, "loss": 0.0136, "step": 6990 }, { "epoch": 1.25, "learning_rate": 4.9376337375178314e-05, "loss": 0.012, "step": 7000 }, { "epoch": 1.25, "learning_rate": 4.937544579172611e-05, "loss": 0.0076, "step": 7010 }, { "epoch": 1.25, "learning_rate": 4.9374554208273896e-05, "loss": 0.0081, "step": 7020 }, { "epoch": 1.25, "learning_rate": 4.937366262482169e-05, "loss": 0.0106, "step": 7030 }, { "epoch": 1.26, "learning_rate": 4.937277104136948e-05, "loss": 0.0115, "step": 7040 }, { "epoch": 1.26, "learning_rate": 4.937187945791726e-05, "loss": 0.011, "step": 7050 }, { "epoch": 1.26, "learning_rate": 4.9370987874465054e-05, "loss": 0.0108, "step": 7060 }, { "epoch": 1.26, "learning_rate": 4.937009629101284e-05, "loss": 0.0074, "step": 7070 }, { "epoch": 1.26, "learning_rate": 4.936920470756063e-05, "loss": 0.0131, "step": 7080 }, { "epoch": 1.26, "learning_rate": 4.9368313124108415e-05, "loss": 0.0145, "step": 7090 }, { "epoch": 1.27, "learning_rate": 4.9367421540656206e-05, "loss": 0.0106, "step": 7100 }, { "epoch": 1.27, "learning_rate": 4.9366529957204e-05, "loss": 0.0112, "step": 7110 }, { "epoch": 1.27, "learning_rate": 4.936563837375179e-05, "loss": 0.0081, "step": 7120 }, { "epoch": 1.27, "learning_rate": 4.936474679029958e-05, "loss": 0.0118, "step": 7130 }, { "epoch": 1.27, "learning_rate": 4.9363855206847364e-05, "loss": 0.0081, "step": 7140 }, { "epoch": 1.27, "learning_rate": 4.9362963623395155e-05, "loss": 0.0118, "step": 7150 }, { "epoch": 1.28, "learning_rate": 4.936207203994294e-05, "loss": 0.0117, "step": 7160 }, { "epoch": 1.28, "learning_rate": 4.936118045649073e-05, "loss": 0.0108, "step": 7170 }, { "epoch": 1.28, "learning_rate": 4.9360288873038515e-05, "loss": 0.0133, "step": 7180 }, { "epoch": 1.28, "learning_rate": 4.9359397289586306e-05, "loss": 0.0124, "step": 7190 }, { "epoch": 1.28, "learning_rate": 4.93585057061341e-05, "loss": 0.0092, "step": 7200 }, { "epoch": 1.29, "learning_rate": 4.935761412268188e-05, "loss": 0.0103, "step": 7210 }, { "epoch": 1.29, "learning_rate": 4.935672253922967e-05, "loss": 0.0104, "step": 7220 }, { "epoch": 1.29, "learning_rate": 4.9355830955777464e-05, "loss": 0.01, "step": 7230 }, { "epoch": 1.29, "learning_rate": 4.9354939372325256e-05, "loss": 0.0123, "step": 7240 }, { "epoch": 1.29, "learning_rate": 4.935404778887304e-05, "loss": 0.0132, "step": 7250 }, { "epoch": 1.29, "learning_rate": 4.935315620542083e-05, "loss": 0.0116, "step": 7260 }, { "epoch": 1.3, "learning_rate": 4.935226462196862e-05, "loss": 0.0113, "step": 7270 }, { "epoch": 1.3, "learning_rate": 4.935137303851641e-05, "loss": 0.0099, "step": 7280 }, { "epoch": 1.3, "learning_rate": 4.93504814550642e-05, "loss": 0.0108, "step": 7290 }, { "epoch": 1.3, "learning_rate": 4.934958987161198e-05, "loss": 0.0129, "step": 7300 }, { "epoch": 1.3, "learning_rate": 4.9348698288159774e-05, "loss": 0.0119, "step": 7310 }, { "epoch": 1.31, "learning_rate": 4.934780670470756e-05, "loss": 0.0096, "step": 7320 }, { "epoch": 1.31, "learning_rate": 4.934691512125535e-05, "loss": 0.0106, "step": 7330 }, { "epoch": 1.31, "learning_rate": 4.934602353780314e-05, "loss": 0.0106, "step": 7340 }, { "epoch": 1.31, "learning_rate": 4.934513195435093e-05, "loss": 0.0101, "step": 7350 }, { "epoch": 1.31, "learning_rate": 4.934424037089872e-05, "loss": 0.0086, "step": 7360 }, { "epoch": 1.31, "learning_rate": 4.934334878744651e-05, "loss": 0.0072, "step": 7370 }, { "epoch": 1.32, "learning_rate": 4.93424572039943e-05, "loss": 0.0085, "step": 7380 }, { "epoch": 1.32, "learning_rate": 4.934156562054208e-05, "loss": 0.01, "step": 7390 }, { "epoch": 1.32, "learning_rate": 4.9340674037089874e-05, "loss": 0.0158, "step": 7400 }, { "epoch": 1.32, "learning_rate": 4.933978245363766e-05, "loss": 0.0076, "step": 7410 }, { "epoch": 1.32, "learning_rate": 4.933889087018545e-05, "loss": 0.0148, "step": 7420 }, { "epoch": 1.32, "learning_rate": 4.933799928673324e-05, "loss": 0.0115, "step": 7430 }, { "epoch": 1.33, "learning_rate": 4.9337107703281026e-05, "loss": 0.0124, "step": 7440 }, { "epoch": 1.33, "learning_rate": 4.9336216119828824e-05, "loss": 0.0104, "step": 7450 }, { "epoch": 1.33, "learning_rate": 4.933532453637661e-05, "loss": 0.0075, "step": 7460 }, { "epoch": 1.33, "learning_rate": 4.93344329529244e-05, "loss": 0.0112, "step": 7470 }, { "epoch": 1.33, "learning_rate": 4.9333541369472184e-05, "loss": 0.0131, "step": 7480 }, { "epoch": 1.34, "learning_rate": 4.9332649786019975e-05, "loss": 0.0098, "step": 7490 }, { "epoch": 1.34, "learning_rate": 4.9331758202567766e-05, "loss": 0.011, "step": 7500 }, { "epoch": 1.34, "learning_rate": 4.933086661911555e-05, "loss": 0.0072, "step": 7510 }, { "epoch": 1.34, "learning_rate": 4.932997503566334e-05, "loss": 0.0116, "step": 7520 }, { "epoch": 1.34, "learning_rate": 4.9329083452211126e-05, "loss": 0.0117, "step": 7530 }, { "epoch": 1.34, "learning_rate": 4.932819186875892e-05, "loss": 0.0083, "step": 7540 }, { "epoch": 1.35, "learning_rate": 4.93273002853067e-05, "loss": 0.0143, "step": 7550 }, { "epoch": 1.35, "learning_rate": 4.93264087018545e-05, "loss": 0.01, "step": 7560 }, { "epoch": 1.35, "learning_rate": 4.9325517118402284e-05, "loss": 0.0103, "step": 7570 }, { "epoch": 1.35, "learning_rate": 4.9324625534950075e-05, "loss": 0.013, "step": 7580 }, { "epoch": 1.35, "learning_rate": 4.9323733951497867e-05, "loss": 0.0123, "step": 7590 }, { "epoch": 1.36, "learning_rate": 4.932284236804565e-05, "loss": 0.0102, "step": 7600 }, { "epoch": 1.36, "learning_rate": 4.932195078459344e-05, "loss": 0.0103, "step": 7610 }, { "epoch": 1.36, "learning_rate": 4.932105920114123e-05, "loss": 0.0137, "step": 7620 }, { "epoch": 1.36, "learning_rate": 4.932016761768902e-05, "loss": 0.0105, "step": 7630 }, { "epoch": 1.36, "learning_rate": 4.93192760342368e-05, "loss": 0.0118, "step": 7640 }, { "epoch": 1.36, "learning_rate": 4.9318384450784594e-05, "loss": 0.0109, "step": 7650 }, { "epoch": 1.37, "learning_rate": 4.9317492867332385e-05, "loss": 0.0121, "step": 7660 }, { "epoch": 1.37, "learning_rate": 4.9316601283880176e-05, "loss": 0.0105, "step": 7670 }, { "epoch": 1.37, "learning_rate": 4.931570970042797e-05, "loss": 0.007, "step": 7680 }, { "epoch": 1.37, "learning_rate": 4.931481811697575e-05, "loss": 0.0117, "step": 7690 }, { "epoch": 1.37, "learning_rate": 4.931392653352354e-05, "loss": 0.0121, "step": 7700 }, { "epoch": 1.37, "learning_rate": 4.931303495007133e-05, "loss": 0.0096, "step": 7710 }, { "epoch": 1.38, "learning_rate": 4.931214336661912e-05, "loss": 0.009, "step": 7720 }, { "epoch": 1.38, "learning_rate": 4.931125178316691e-05, "loss": 0.0125, "step": 7730 }, { "epoch": 1.38, "learning_rate": 4.9310360199714694e-05, "loss": 0.0082, "step": 7740 }, { "epoch": 1.38, "learning_rate": 4.9309468616262485e-05, "loss": 0.0098, "step": 7750 }, { "epoch": 1.38, "learning_rate": 4.930857703281027e-05, "loss": 0.01, "step": 7760 }, { "epoch": 1.39, "learning_rate": 4.930768544935806e-05, "loss": 0.015, "step": 7770 }, { "epoch": 1.39, "learning_rate": 4.930679386590585e-05, "loss": 0.0152, "step": 7780 }, { "epoch": 1.39, "learning_rate": 4.930590228245364e-05, "loss": 0.0132, "step": 7790 }, { "epoch": 1.39, "learning_rate": 4.930501069900143e-05, "loss": 0.0084, "step": 7800 }, { "epoch": 1.39, "learning_rate": 4.930411911554922e-05, "loss": 0.0109, "step": 7810 }, { "epoch": 1.39, "learning_rate": 4.930322753209701e-05, "loss": 0.0082, "step": 7820 }, { "epoch": 1.4, "learning_rate": 4.9302335948644795e-05, "loss": 0.0094, "step": 7830 }, { "epoch": 1.4, "learning_rate": 4.9301444365192586e-05, "loss": 0.0112, "step": 7840 }, { "epoch": 1.4, "learning_rate": 4.930055278174037e-05, "loss": 0.0073, "step": 7850 }, { "epoch": 1.4, "learning_rate": 4.929966119828816e-05, "loss": 0.0141, "step": 7860 }, { "epoch": 1.4, "learning_rate": 4.9298769614835946e-05, "loss": 0.0076, "step": 7870 }, { "epoch": 1.41, "learning_rate": 4.929787803138374e-05, "loss": 0.0072, "step": 7880 }, { "epoch": 1.41, "learning_rate": 4.929698644793153e-05, "loss": 0.0098, "step": 7890 }, { "epoch": 1.41, "learning_rate": 4.929609486447932e-05, "loss": 0.0128, "step": 7900 }, { "epoch": 1.41, "learning_rate": 4.929520328102711e-05, "loss": 0.0105, "step": 7910 }, { "epoch": 1.41, "learning_rate": 4.9294311697574895e-05, "loss": 0.0087, "step": 7920 }, { "epoch": 1.41, "learning_rate": 4.9293420114122686e-05, "loss": 0.0094, "step": 7930 }, { "epoch": 1.42, "learning_rate": 4.929252853067047e-05, "loss": 0.0168, "step": 7940 }, { "epoch": 1.42, "learning_rate": 4.929163694721826e-05, "loss": 0.0096, "step": 7950 }, { "epoch": 1.42, "learning_rate": 4.929074536376605e-05, "loss": 0.009, "step": 7960 }, { "epoch": 1.42, "learning_rate": 4.928985378031384e-05, "loss": 0.013, "step": 7970 }, { "epoch": 1.42, "learning_rate": 4.928896219686163e-05, "loss": 0.0141, "step": 7980 }, { "epoch": 1.42, "learning_rate": 4.928807061340941e-05, "loss": 0.012, "step": 7990 }, { "epoch": 1.43, "learning_rate": 4.928717902995721e-05, "loss": 0.0103, "step": 8000 }, { "epoch": 1.43, "learning_rate": 4.9286287446504996e-05, "loss": 0.0088, "step": 8010 }, { "epoch": 1.43, "learning_rate": 4.928539586305279e-05, "loss": 0.0078, "step": 8020 }, { "epoch": 1.43, "learning_rate": 4.928450427960057e-05, "loss": 0.0127, "step": 8030 }, { "epoch": 1.43, "learning_rate": 4.928361269614836e-05, "loss": 0.0078, "step": 8040 }, { "epoch": 1.44, "learning_rate": 4.9282721112696154e-05, "loss": 0.0115, "step": 8050 }, { "epoch": 1.44, "learning_rate": 4.928182952924394e-05, "loss": 0.0113, "step": 8060 }, { "epoch": 1.44, "learning_rate": 4.928093794579173e-05, "loss": 0.012, "step": 8070 }, { "epoch": 1.44, "learning_rate": 4.9280046362339514e-05, "loss": 0.0141, "step": 8080 }, { "epoch": 1.44, "learning_rate": 4.9279154778887305e-05, "loss": 0.0079, "step": 8090 }, { "epoch": 1.44, "learning_rate": 4.927826319543509e-05, "loss": 0.0093, "step": 8100 }, { "epoch": 1.45, "learning_rate": 4.927737161198289e-05, "loss": 0.0085, "step": 8110 }, { "epoch": 1.45, "learning_rate": 4.927648002853067e-05, "loss": 0.0106, "step": 8120 }, { "epoch": 1.45, "learning_rate": 4.927558844507846e-05, "loss": 0.0121, "step": 8130 }, { "epoch": 1.45, "learning_rate": 4.9274696861626254e-05, "loss": 0.0137, "step": 8140 }, { "epoch": 1.45, "learning_rate": 4.927380527817404e-05, "loss": 0.0095, "step": 8150 }, { "epoch": 1.46, "learning_rate": 4.927291369472183e-05, "loss": 0.0149, "step": 8160 }, { "epoch": 1.46, "learning_rate": 4.9272022111269614e-05, "loss": 0.0091, "step": 8170 }, { "epoch": 1.46, "learning_rate": 4.9271130527817406e-05, "loss": 0.0081, "step": 8180 }, { "epoch": 1.46, "learning_rate": 4.92702389443652e-05, "loss": 0.0095, "step": 8190 }, { "epoch": 1.46, "learning_rate": 4.926934736091298e-05, "loss": 0.0078, "step": 8200 }, { "epoch": 1.46, "learning_rate": 4.926845577746077e-05, "loss": 0.0084, "step": 8210 }, { "epoch": 1.47, "learning_rate": 4.9267564194008564e-05, "loss": 0.0125, "step": 8220 }, { "epoch": 1.47, "learning_rate": 4.9266672610556355e-05, "loss": 0.0116, "step": 8230 }, { "epoch": 1.47, "learning_rate": 4.926578102710414e-05, "loss": 0.0081, "step": 8240 }, { "epoch": 1.47, "learning_rate": 4.926488944365193e-05, "loss": 0.0116, "step": 8250 }, { "epoch": 1.47, "learning_rate": 4.9263997860199715e-05, "loss": 0.0097, "step": 8260 }, { "epoch": 1.47, "learning_rate": 4.9263106276747506e-05, "loss": 0.0091, "step": 8270 }, { "epoch": 1.48, "learning_rate": 4.92622146932953e-05, "loss": 0.0086, "step": 8280 }, { "epoch": 1.48, "learning_rate": 4.926132310984308e-05, "loss": 0.0087, "step": 8290 }, { "epoch": 1.48, "learning_rate": 4.926043152639087e-05, "loss": 0.013, "step": 8300 }, { "epoch": 1.48, "learning_rate": 4.925953994293866e-05, "loss": 0.0082, "step": 8310 }, { "epoch": 1.48, "learning_rate": 4.925864835948645e-05, "loss": 0.0087, "step": 8320 }, { "epoch": 1.49, "learning_rate": 4.925775677603424e-05, "loss": 0.0112, "step": 8330 }, { "epoch": 1.49, "learning_rate": 4.925686519258203e-05, "loss": 0.0126, "step": 8340 }, { "epoch": 1.49, "learning_rate": 4.9255973609129816e-05, "loss": 0.0095, "step": 8350 }, { "epoch": 1.49, "learning_rate": 4.925508202567761e-05, "loss": 0.0088, "step": 8360 }, { "epoch": 1.49, "learning_rate": 4.92541904422254e-05, "loss": 0.0088, "step": 8370 }, { "epoch": 1.49, "learning_rate": 4.925329885877318e-05, "loss": 0.0096, "step": 8380 }, { "epoch": 1.5, "learning_rate": 4.9252407275320974e-05, "loss": 0.0114, "step": 8390 }, { "epoch": 1.5, "learning_rate": 4.925151569186876e-05, "loss": 0.0101, "step": 8400 }, { "epoch": 1.5, "learning_rate": 4.925062410841655e-05, "loss": 0.0083, "step": 8410 }, { "epoch": 1.5, "learning_rate": 4.9249732524964334e-05, "loss": 0.0088, "step": 8420 }, { "epoch": 1.5, "learning_rate": 4.9248840941512125e-05, "loss": 0.0115, "step": 8430 }, { "epoch": 1.5, "learning_rate": 4.9247949358059916e-05, "loss": 0.0084, "step": 8440 }, { "epoch": 1.51, "learning_rate": 4.924705777460771e-05, "loss": 0.0121, "step": 8450 }, { "epoch": 1.51, "learning_rate": 4.92461661911555e-05, "loss": 0.0137, "step": 8460 }, { "epoch": 1.51, "learning_rate": 4.924527460770328e-05, "loss": 0.0102, "step": 8470 }, { "epoch": 1.51, "learning_rate": 4.9244383024251074e-05, "loss": 0.0107, "step": 8480 }, { "epoch": 1.51, "learning_rate": 4.924349144079886e-05, "loss": 0.0149, "step": 8490 }, { "epoch": 1.52, "learning_rate": 4.924259985734665e-05, "loss": 0.0101, "step": 8500 }, { "epoch": 1.52, "learning_rate": 4.924170827389444e-05, "loss": 0.0079, "step": 8510 }, { "epoch": 1.52, "learning_rate": 4.9240816690442225e-05, "loss": 0.0086, "step": 8520 }, { "epoch": 1.52, "learning_rate": 4.923992510699002e-05, "loss": 0.0085, "step": 8530 }, { "epoch": 1.52, "learning_rate": 4.92390335235378e-05, "loss": 0.0077, "step": 8540 }, { "epoch": 1.52, "learning_rate": 4.92381419400856e-05, "loss": 0.0097, "step": 8550 }, { "epoch": 1.53, "learning_rate": 4.9237250356633384e-05, "loss": 0.0101, "step": 8560 }, { "epoch": 1.53, "learning_rate": 4.9236358773181175e-05, "loss": 0.0097, "step": 8570 }, { "epoch": 1.53, "learning_rate": 4.923546718972896e-05, "loss": 0.0091, "step": 8580 }, { "epoch": 1.53, "learning_rate": 4.923457560627675e-05, "loss": 0.0107, "step": 8590 }, { "epoch": 1.53, "learning_rate": 4.923368402282454e-05, "loss": 0.0081, "step": 8600 }, { "epoch": 1.54, "learning_rate": 4.9232792439372326e-05, "loss": 0.0061, "step": 8610 }, { "epoch": 1.54, "learning_rate": 4.923190085592012e-05, "loss": 0.0074, "step": 8620 }, { "epoch": 1.54, "learning_rate": 4.92310092724679e-05, "loss": 0.013, "step": 8630 }, { "epoch": 1.54, "learning_rate": 4.923011768901569e-05, "loss": 0.015, "step": 8640 }, { "epoch": 1.54, "learning_rate": 4.922922610556348e-05, "loss": 0.0097, "step": 8650 }, { "epoch": 1.54, "learning_rate": 4.9228334522111275e-05, "loss": 0.0116, "step": 8660 }, { "epoch": 1.55, "learning_rate": 4.922744293865906e-05, "loss": 0.0105, "step": 8670 }, { "epoch": 1.55, "learning_rate": 4.922655135520685e-05, "loss": 0.0101, "step": 8680 }, { "epoch": 1.55, "learning_rate": 4.922565977175464e-05, "loss": 0.007, "step": 8690 }, { "epoch": 1.55, "learning_rate": 4.922476818830243e-05, "loss": 0.0101, "step": 8700 }, { "epoch": 1.55, "learning_rate": 4.922387660485022e-05, "loss": 0.0065, "step": 8710 }, { "epoch": 1.55, "learning_rate": 4.9222985021398e-05, "loss": 0.0112, "step": 8720 }, { "epoch": 1.56, "learning_rate": 4.9222093437945793e-05, "loss": 0.0124, "step": 8730 }, { "epoch": 1.56, "learning_rate": 4.9221201854493585e-05, "loss": 0.0128, "step": 8740 }, { "epoch": 1.56, "learning_rate": 4.922031027104137e-05, "loss": 0.0103, "step": 8750 }, { "epoch": 1.56, "learning_rate": 4.921941868758916e-05, "loss": 0.0103, "step": 8760 }, { "epoch": 1.56, "learning_rate": 4.921852710413695e-05, "loss": 0.0072, "step": 8770 }, { "epoch": 1.57, "learning_rate": 4.921763552068474e-05, "loss": 0.0075, "step": 8780 }, { "epoch": 1.57, "learning_rate": 4.921674393723253e-05, "loss": 0.0086, "step": 8790 }, { "epoch": 1.57, "learning_rate": 4.921585235378032e-05, "loss": 0.0126, "step": 8800 }, { "epoch": 1.57, "learning_rate": 4.92149607703281e-05, "loss": 0.0089, "step": 8810 }, { "epoch": 1.57, "learning_rate": 4.9214069186875894e-05, "loss": 0.0133, "step": 8820 }, { "epoch": 1.57, "learning_rate": 4.9213177603423685e-05, "loss": 0.0094, "step": 8830 }, { "epoch": 1.58, "learning_rate": 4.921228601997147e-05, "loss": 0.0082, "step": 8840 }, { "epoch": 1.58, "learning_rate": 4.921139443651926e-05, "loss": 0.0072, "step": 8850 }, { "epoch": 1.58, "learning_rate": 4.9210502853067045e-05, "loss": 0.0083, "step": 8860 }, { "epoch": 1.58, "learning_rate": 4.9209611269614837e-05, "loss": 0.0082, "step": 8870 }, { "epoch": 1.58, "learning_rate": 4.920871968616263e-05, "loss": 0.0087, "step": 8880 }, { "epoch": 1.59, "learning_rate": 4.920782810271042e-05, "loss": 0.0084, "step": 8890 }, { "epoch": 1.59, "learning_rate": 4.92069365192582e-05, "loss": 0.0106, "step": 8900 }, { "epoch": 1.59, "learning_rate": 4.9206044935805995e-05, "loss": 0.0103, "step": 8910 }, { "epoch": 1.59, "learning_rate": 4.9205153352353786e-05, "loss": 0.0111, "step": 8920 }, { "epoch": 1.59, "learning_rate": 4.920426176890157e-05, "loss": 0.0072, "step": 8930 }, { "epoch": 1.59, "learning_rate": 4.920337018544936e-05, "loss": 0.0107, "step": 8940 }, { "epoch": 1.6, "learning_rate": 4.9202478601997146e-05, "loss": 0.0093, "step": 8950 }, { "epoch": 1.6, "learning_rate": 4.920158701854494e-05, "loss": 0.0069, "step": 8960 }, { "epoch": 1.6, "learning_rate": 4.920069543509273e-05, "loss": 0.0105, "step": 8970 }, { "epoch": 1.6, "learning_rate": 4.919980385164051e-05, "loss": 0.0102, "step": 8980 }, { "epoch": 1.6, "learning_rate": 4.919891226818831e-05, "loss": 0.0131, "step": 8990 }, { "epoch": 1.6, "learning_rate": 4.9198020684736095e-05, "loss": 0.0088, "step": 9000 }, { "epoch": 1.61, "learning_rate": 4.9197129101283886e-05, "loss": 0.01, "step": 9010 }, { "epoch": 1.61, "learning_rate": 4.919623751783167e-05, "loss": 0.0085, "step": 9020 }, { "epoch": 1.61, "learning_rate": 4.919534593437946e-05, "loss": 0.0078, "step": 9030 }, { "epoch": 1.61, "learning_rate": 4.9194454350927246e-05, "loss": 0.0074, "step": 9040 }, { "epoch": 1.61, "learning_rate": 4.919356276747504e-05, "loss": 0.011, "step": 9050 }, { "epoch": 1.62, "learning_rate": 4.919267118402283e-05, "loss": 0.0133, "step": 9060 }, { "epoch": 1.62, "learning_rate": 4.919177960057061e-05, "loss": 0.0088, "step": 9070 }, { "epoch": 1.62, "learning_rate": 4.9190888017118405e-05, "loss": 0.0119, "step": 9080 }, { "epoch": 1.62, "learning_rate": 4.918999643366619e-05, "loss": 0.0097, "step": 9090 }, { "epoch": 1.62, "learning_rate": 4.918910485021399e-05, "loss": 0.0143, "step": 9100 }, { "epoch": 1.62, "learning_rate": 4.918821326676177e-05, "loss": 0.009, "step": 9110 }, { "epoch": 1.63, "learning_rate": 4.918732168330956e-05, "loss": 0.0116, "step": 9120 }, { "epoch": 1.63, "learning_rate": 4.918643009985735e-05, "loss": 0.0096, "step": 9130 }, { "epoch": 1.63, "learning_rate": 4.918553851640514e-05, "loss": 0.0085, "step": 9140 }, { "epoch": 1.63, "learning_rate": 4.918464693295293e-05, "loss": 0.009, "step": 9150 }, { "epoch": 1.63, "learning_rate": 4.9183755349500714e-05, "loss": 0.0116, "step": 9160 }, { "epoch": 1.64, "learning_rate": 4.9182863766048505e-05, "loss": 0.0107, "step": 9170 }, { "epoch": 1.64, "learning_rate": 4.918197218259629e-05, "loss": 0.009, "step": 9180 }, { "epoch": 1.64, "learning_rate": 4.918108059914408e-05, "loss": 0.0073, "step": 9190 }, { "epoch": 1.64, "learning_rate": 4.918018901569187e-05, "loss": 0.011, "step": 9200 }, { "epoch": 1.64, "learning_rate": 4.917929743223966e-05, "loss": 0.0082, "step": 9210 }, { "epoch": 1.64, "learning_rate": 4.9178405848787454e-05, "loss": 0.0114, "step": 9220 }, { "epoch": 1.65, "learning_rate": 4.917751426533524e-05, "loss": 0.0141, "step": 9230 }, { "epoch": 1.65, "learning_rate": 4.917662268188303e-05, "loss": 0.0142, "step": 9240 }, { "epoch": 1.65, "learning_rate": 4.9175731098430814e-05, "loss": 0.0091, "step": 9250 }, { "epoch": 1.65, "learning_rate": 4.9174839514978606e-05, "loss": 0.0086, "step": 9260 }, { "epoch": 1.65, "learning_rate": 4.917394793152639e-05, "loss": 0.0106, "step": 9270 }, { "epoch": 1.65, "learning_rate": 4.917305634807418e-05, "loss": 0.0084, "step": 9280 }, { "epoch": 1.66, "learning_rate": 4.917216476462197e-05, "loss": 0.012, "step": 9290 }, { "epoch": 1.66, "learning_rate": 4.917127318116976e-05, "loss": 0.0067, "step": 9300 }, { "epoch": 1.66, "learning_rate": 4.917038159771755e-05, "loss": 0.0073, "step": 9310 }, { "epoch": 1.66, "learning_rate": 4.916949001426534e-05, "loss": 0.0123, "step": 9320 }, { "epoch": 1.66, "learning_rate": 4.916868758915835e-05, "loss": 0.0071, "step": 9330 }, { "epoch": 1.67, "learning_rate": 4.916779600570613e-05, "loss": 0.013, "step": 9340 }, { "epoch": 1.67, "learning_rate": 4.916690442225392e-05, "loss": 0.0126, "step": 9350 }, { "epoch": 1.67, "learning_rate": 4.9166012838801714e-05, "loss": 0.0137, "step": 9360 }, { "epoch": 1.67, "learning_rate": 4.9165121255349505e-05, "loss": 0.0104, "step": 9370 }, { "epoch": 1.67, "learning_rate": 4.91642296718973e-05, "loss": 0.0092, "step": 9380 }, { "epoch": 1.67, "learning_rate": 4.916333808844508e-05, "loss": 0.0103, "step": 9390 }, { "epoch": 1.68, "learning_rate": 4.916244650499287e-05, "loss": 0.0102, "step": 9400 }, { "epoch": 1.68, "learning_rate": 4.916155492154066e-05, "loss": 0.0103, "step": 9410 }, { "epoch": 1.68, "learning_rate": 4.916066333808845e-05, "loss": 0.0122, "step": 9420 }, { "epoch": 1.68, "learning_rate": 4.915977175463623e-05, "loss": 0.0104, "step": 9430 }, { "epoch": 1.68, "learning_rate": 4.9158880171184024e-05, "loss": 0.012, "step": 9440 }, { "epoch": 1.69, "learning_rate": 4.9157988587731815e-05, "loss": 0.0095, "step": 9450 }, { "epoch": 1.69, "learning_rate": 4.91570970042796e-05, "loss": 0.0088, "step": 9460 }, { "epoch": 1.69, "learning_rate": 4.915620542082739e-05, "loss": 0.0139, "step": 9470 }, { "epoch": 1.69, "learning_rate": 4.915531383737518e-05, "loss": 0.011, "step": 9480 }, { "epoch": 1.69, "learning_rate": 4.915442225392297e-05, "loss": 0.0138, "step": 9490 }, { "epoch": 1.69, "learning_rate": 4.915353067047076e-05, "loss": 0.0078, "step": 9500 }, { "epoch": 1.7, "learning_rate": 4.915263908701855e-05, "loss": 0.0082, "step": 9510 }, { "epoch": 1.7, "learning_rate": 4.915174750356634e-05, "loss": 0.0087, "step": 9520 }, { "epoch": 1.7, "learning_rate": 4.9150855920114124e-05, "loss": 0.0088, "step": 9530 }, { "epoch": 1.7, "learning_rate": 4.9149964336661915e-05, "loss": 0.009, "step": 9540 }, { "epoch": 1.7, "learning_rate": 4.91490727532097e-05, "loss": 0.0103, "step": 9550 }, { "epoch": 1.7, "learning_rate": 4.914818116975749e-05, "loss": 0.0099, "step": 9560 }, { "epoch": 1.71, "learning_rate": 4.9147289586305275e-05, "loss": 0.0076, "step": 9570 }, { "epoch": 1.71, "learning_rate": 4.914639800285307e-05, "loss": 0.0098, "step": 9580 }, { "epoch": 1.71, "learning_rate": 4.914550641940086e-05, "loss": 0.0119, "step": 9590 }, { "epoch": 1.71, "learning_rate": 4.914461483594865e-05, "loss": 0.0125, "step": 9600 }, { "epoch": 1.71, "learning_rate": 4.914372325249644e-05, "loss": 0.008, "step": 9610 }, { "epoch": 1.72, "learning_rate": 4.9142831669044225e-05, "loss": 0.0118, "step": 9620 }, { "epoch": 1.72, "learning_rate": 4.9141940085592016e-05, "loss": 0.0135, "step": 9630 }, { "epoch": 1.72, "learning_rate": 4.91410485021398e-05, "loss": 0.012, "step": 9640 }, { "epoch": 1.72, "learning_rate": 4.914015691868759e-05, "loss": 0.0061, "step": 9650 }, { "epoch": 1.72, "learning_rate": 4.9139265335235376e-05, "loss": 0.0063, "step": 9660 }, { "epoch": 1.72, "learning_rate": 4.913837375178317e-05, "loss": 0.0106, "step": 9670 }, { "epoch": 1.73, "learning_rate": 4.913748216833096e-05, "loss": 0.0091, "step": 9680 }, { "epoch": 1.73, "learning_rate": 4.913659058487874e-05, "loss": 0.013, "step": 9690 }, { "epoch": 1.73, "learning_rate": 4.913569900142654e-05, "loss": 0.0106, "step": 9700 }, { "epoch": 1.73, "learning_rate": 4.9134807417974325e-05, "loss": 0.0122, "step": 9710 }, { "epoch": 1.73, "learning_rate": 4.9133915834522116e-05, "loss": 0.0098, "step": 9720 }, { "epoch": 1.74, "learning_rate": 4.91330242510699e-05, "loss": 0.0094, "step": 9730 }, { "epoch": 1.74, "learning_rate": 4.913213266761769e-05, "loss": 0.009, "step": 9740 }, { "epoch": 1.74, "learning_rate": 4.913124108416548e-05, "loss": 0.0079, "step": 9750 }, { "epoch": 1.74, "learning_rate": 4.913034950071327e-05, "loss": 0.0075, "step": 9760 }, { "epoch": 1.74, "learning_rate": 4.912945791726106e-05, "loss": 0.0076, "step": 9770 }, { "epoch": 1.74, "learning_rate": 4.9128566333808843e-05, "loss": 0.0084, "step": 9780 }, { "epoch": 1.75, "learning_rate": 4.9127674750356635e-05, "loss": 0.008, "step": 9790 }, { "epoch": 1.75, "learning_rate": 4.912678316690442e-05, "loss": 0.0078, "step": 9800 }, { "epoch": 1.75, "learning_rate": 4.912589158345222e-05, "loss": 0.0078, "step": 9810 }, { "epoch": 1.75, "learning_rate": 4.9125e-05, "loss": 0.0112, "step": 9820 }, { "epoch": 1.75, "learning_rate": 4.912410841654779e-05, "loss": 0.0134, "step": 9830 }, { "epoch": 1.75, "learning_rate": 4.9123216833095584e-05, "loss": 0.0089, "step": 9840 }, { "epoch": 1.76, "learning_rate": 4.912232524964337e-05, "loss": 0.0112, "step": 9850 }, { "epoch": 1.76, "learning_rate": 4.912143366619116e-05, "loss": 0.0107, "step": 9860 }, { "epoch": 1.76, "learning_rate": 4.9120542082738944e-05, "loss": 0.0112, "step": 9870 }, { "epoch": 1.76, "learning_rate": 4.9119650499286735e-05, "loss": 0.0124, "step": 9880 }, { "epoch": 1.76, "learning_rate": 4.911875891583452e-05, "loss": 0.0105, "step": 9890 }, { "epoch": 1.77, "learning_rate": 4.911786733238231e-05, "loss": 0.0117, "step": 9900 }, { "epoch": 1.77, "learning_rate": 4.91169757489301e-05, "loss": 0.0115, "step": 9910 }, { "epoch": 1.77, "learning_rate": 4.911608416547789e-05, "loss": 0.0075, "step": 9920 }, { "epoch": 1.77, "learning_rate": 4.9115192582025684e-05, "loss": 0.0082, "step": 9930 }, { "epoch": 1.77, "learning_rate": 4.911430099857347e-05, "loss": 0.0093, "step": 9940 }, { "epoch": 1.77, "learning_rate": 4.911340941512126e-05, "loss": 0.0132, "step": 9950 }, { "epoch": 1.78, "learning_rate": 4.9112517831669045e-05, "loss": 0.0095, "step": 9960 }, { "epoch": 1.78, "learning_rate": 4.9111626248216836e-05, "loss": 0.0093, "step": 9970 }, { "epoch": 1.78, "learning_rate": 4.911073466476463e-05, "loss": 0.0082, "step": 9980 }, { "epoch": 1.78, "learning_rate": 4.910984308131241e-05, "loss": 0.0073, "step": 9990 }, { "epoch": 1.78, "learning_rate": 4.91089514978602e-05, "loss": 0.0093, "step": 10000 }, { "epoch": 1.78, "learning_rate": 4.910805991440799e-05, "loss": 0.0066, "step": 10010 }, { "epoch": 1.79, "learning_rate": 4.910716833095578e-05, "loss": 0.0098, "step": 10020 }, { "epoch": 1.79, "learning_rate": 4.910627674750357e-05, "loss": 0.0093, "step": 10030 }, { "epoch": 1.79, "learning_rate": 4.910538516405136e-05, "loss": 0.0075, "step": 10040 }, { "epoch": 1.79, "learning_rate": 4.9104493580599145e-05, "loss": 0.007, "step": 10050 }, { "epoch": 1.79, "learning_rate": 4.9103601997146936e-05, "loss": 0.012, "step": 10060 }, { "epoch": 1.8, "learning_rate": 4.910271041369473e-05, "loss": 0.0096, "step": 10070 }, { "epoch": 1.8, "learning_rate": 4.910181883024251e-05, "loss": 0.01, "step": 10080 }, { "epoch": 1.8, "learning_rate": 4.91009272467903e-05, "loss": 0.0074, "step": 10090 }, { "epoch": 1.8, "learning_rate": 4.910003566333809e-05, "loss": 0.0084, "step": 10100 }, { "epoch": 1.8, "learning_rate": 4.909914407988588e-05, "loss": 0.0104, "step": 10110 }, { "epoch": 1.8, "learning_rate": 4.909825249643366e-05, "loss": 0.0091, "step": 10120 }, { "epoch": 1.81, "learning_rate": 4.9097360912981454e-05, "loss": 0.0096, "step": 10130 }, { "epoch": 1.81, "learning_rate": 4.9096469329529246e-05, "loss": 0.0102, "step": 10140 }, { "epoch": 1.81, "learning_rate": 4.909557774607704e-05, "loss": 0.0094, "step": 10150 }, { "epoch": 1.81, "learning_rate": 4.909468616262483e-05, "loss": 0.0098, "step": 10160 }, { "epoch": 1.81, "learning_rate": 4.909379457917261e-05, "loss": 0.0116, "step": 10170 }, { "epoch": 1.82, "learning_rate": 4.9092902995720404e-05, "loss": 0.0132, "step": 10180 }, { "epoch": 1.82, "learning_rate": 4.909201141226819e-05, "loss": 0.0069, "step": 10190 }, { "epoch": 1.82, "learning_rate": 4.909111982881598e-05, "loss": 0.0091, "step": 10200 }, { "epoch": 1.82, "learning_rate": 4.909022824536377e-05, "loss": 0.0105, "step": 10210 }, { "epoch": 1.82, "learning_rate": 4.9089336661911555e-05, "loss": 0.009, "step": 10220 }, { "epoch": 1.82, "learning_rate": 4.9088445078459346e-05, "loss": 0.0125, "step": 10230 }, { "epoch": 1.83, "learning_rate": 4.908755349500713e-05, "loss": 0.0122, "step": 10240 }, { "epoch": 1.83, "learning_rate": 4.908666191155493e-05, "loss": 0.0081, "step": 10250 }, { "epoch": 1.83, "learning_rate": 4.908577032810271e-05, "loss": 0.0109, "step": 10260 }, { "epoch": 1.83, "learning_rate": 4.9084878744650504e-05, "loss": 0.0095, "step": 10270 }, { "epoch": 1.83, "learning_rate": 4.908398716119829e-05, "loss": 0.0097, "step": 10280 }, { "epoch": 1.83, "learning_rate": 4.908309557774608e-05, "loss": 0.0103, "step": 10290 }, { "epoch": 1.84, "learning_rate": 4.908220399429387e-05, "loss": 0.0112, "step": 10300 }, { "epoch": 1.84, "learning_rate": 4.9081312410841656e-05, "loss": 0.0086, "step": 10310 }, { "epoch": 1.84, "learning_rate": 4.908042082738945e-05, "loss": 0.0098, "step": 10320 }, { "epoch": 1.84, "learning_rate": 4.907952924393723e-05, "loss": 0.0098, "step": 10330 }, { "epoch": 1.84, "learning_rate": 4.907863766048502e-05, "loss": 0.0088, "step": 10340 }, { "epoch": 1.85, "learning_rate": 4.907774607703281e-05, "loss": 0.0091, "step": 10350 }, { "epoch": 1.85, "learning_rate": 4.9076854493580605e-05, "loss": 0.0075, "step": 10360 }, { "epoch": 1.85, "learning_rate": 4.907596291012839e-05, "loss": 0.01, "step": 10370 }, { "epoch": 1.85, "learning_rate": 4.907507132667618e-05, "loss": 0.0094, "step": 10380 }, { "epoch": 1.85, "learning_rate": 4.907417974322397e-05, "loss": 0.0117, "step": 10390 }, { "epoch": 1.85, "learning_rate": 4.9073288159771756e-05, "loss": 0.0064, "step": 10400 }, { "epoch": 1.86, "learning_rate": 4.907239657631955e-05, "loss": 0.0084, "step": 10410 }, { "epoch": 1.86, "learning_rate": 4.907150499286733e-05, "loss": 0.0098, "step": 10420 }, { "epoch": 1.86, "learning_rate": 4.907061340941512e-05, "loss": 0.0065, "step": 10430 }, { "epoch": 1.86, "learning_rate": 4.9069721825962914e-05, "loss": 0.0081, "step": 10440 }, { "epoch": 1.86, "learning_rate": 4.90688302425107e-05, "loss": 0.0098, "step": 10450 }, { "epoch": 1.87, "learning_rate": 4.906793865905849e-05, "loss": 0.008, "step": 10460 }, { "epoch": 1.87, "learning_rate": 4.906704707560628e-05, "loss": 0.0081, "step": 10470 }, { "epoch": 1.87, "learning_rate": 4.906615549215407e-05, "loss": 0.0079, "step": 10480 }, { "epoch": 1.87, "learning_rate": 4.906526390870186e-05, "loss": 0.0087, "step": 10490 }, { "epoch": 1.87, "learning_rate": 4.906437232524965e-05, "loss": 0.0103, "step": 10500 }, { "epoch": 1.87, "learning_rate": 4.906348074179743e-05, "loss": 0.0106, "step": 10510 }, { "epoch": 1.88, "learning_rate": 4.9062589158345224e-05, "loss": 0.0105, "step": 10520 }, { "epoch": 1.88, "learning_rate": 4.9061697574893015e-05, "loss": 0.008, "step": 10530 }, { "epoch": 1.88, "learning_rate": 4.90608059914408e-05, "loss": 0.0076, "step": 10540 }, { "epoch": 1.88, "learning_rate": 4.905991440798859e-05, "loss": 0.0073, "step": 10550 }, { "epoch": 1.88, "learning_rate": 4.9059022824536375e-05, "loss": 0.0103, "step": 10560 }, { "epoch": 1.88, "learning_rate": 4.9058131241084166e-05, "loss": 0.0093, "step": 10570 }, { "epoch": 1.89, "learning_rate": 4.905723965763196e-05, "loss": 0.0068, "step": 10580 }, { "epoch": 1.89, "learning_rate": 4.905634807417975e-05, "loss": 0.0117, "step": 10590 }, { "epoch": 1.89, "learning_rate": 4.905545649072753e-05, "loss": 0.0121, "step": 10600 }, { "epoch": 1.89, "learning_rate": 4.9054564907275324e-05, "loss": 0.0086, "step": 10610 }, { "epoch": 1.89, "learning_rate": 4.9053673323823115e-05, "loss": 0.0066, "step": 10620 }, { "epoch": 1.9, "learning_rate": 4.90527817403709e-05, "loss": 0.0082, "step": 10630 }, { "epoch": 1.9, "learning_rate": 4.905189015691869e-05, "loss": 0.008, "step": 10640 }, { "epoch": 1.9, "learning_rate": 4.9050998573466475e-05, "loss": 0.0113, "step": 10650 }, { "epoch": 1.9, "learning_rate": 4.9050106990014267e-05, "loss": 0.0104, "step": 10660 }, { "epoch": 1.9, "learning_rate": 4.904921540656206e-05, "loss": 0.009, "step": 10670 }, { "epoch": 1.9, "learning_rate": 4.904832382310984e-05, "loss": 0.012, "step": 10680 }, { "epoch": 1.91, "learning_rate": 4.904743223965764e-05, "loss": 0.0065, "step": 10690 }, { "epoch": 1.91, "learning_rate": 4.9046540656205425e-05, "loss": 0.0099, "step": 10700 }, { "epoch": 1.91, "learning_rate": 4.9045649072753216e-05, "loss": 0.0096, "step": 10710 }, { "epoch": 1.91, "learning_rate": 4.9044757489301e-05, "loss": 0.0096, "step": 10720 }, { "epoch": 1.91, "learning_rate": 4.904386590584879e-05, "loss": 0.016, "step": 10730 }, { "epoch": 1.92, "learning_rate": 4.9042974322396576e-05, "loss": 0.0149, "step": 10740 }, { "epoch": 1.92, "learning_rate": 4.904208273894437e-05, "loss": 0.0145, "step": 10750 }, { "epoch": 1.92, "learning_rate": 4.904119115549216e-05, "loss": 0.0101, "step": 10760 }, { "epoch": 1.92, "learning_rate": 4.904029957203994e-05, "loss": 0.0109, "step": 10770 }, { "epoch": 1.92, "learning_rate": 4.9039407988587734e-05, "loss": 0.0103, "step": 10780 }, { "epoch": 1.92, "learning_rate": 4.903851640513552e-05, "loss": 0.0125, "step": 10790 }, { "epoch": 1.93, "learning_rate": 4.9037624821683316e-05, "loss": 0.0092, "step": 10800 }, { "epoch": 1.93, "learning_rate": 4.90367332382311e-05, "loss": 0.0093, "step": 10810 }, { "epoch": 1.93, "learning_rate": 4.903584165477889e-05, "loss": 0.0097, "step": 10820 }, { "epoch": 1.93, "learning_rate": 4.9034950071326676e-05, "loss": 0.0135, "step": 10830 }, { "epoch": 1.93, "learning_rate": 4.903405848787447e-05, "loss": 0.0082, "step": 10840 }, { "epoch": 1.93, "learning_rate": 4.903316690442226e-05, "loss": 0.0087, "step": 10850 }, { "epoch": 1.94, "learning_rate": 4.903227532097004e-05, "loss": 0.0096, "step": 10860 }, { "epoch": 1.94, "learning_rate": 4.9031383737517835e-05, "loss": 0.01, "step": 10870 }, { "epoch": 1.94, "learning_rate": 4.903049215406562e-05, "loss": 0.0122, "step": 10880 }, { "epoch": 1.94, "learning_rate": 4.902960057061341e-05, "loss": 0.0109, "step": 10890 }, { "epoch": 1.94, "learning_rate": 4.90287089871612e-05, "loss": 0.0065, "step": 10900 }, { "epoch": 1.95, "learning_rate": 4.902781740370899e-05, "loss": 0.0104, "step": 10910 }, { "epoch": 1.95, "learning_rate": 4.9026925820256784e-05, "loss": 0.0106, "step": 10920 }, { "epoch": 1.95, "learning_rate": 4.902603423680457e-05, "loss": 0.0054, "step": 10930 }, { "epoch": 1.95, "learning_rate": 4.902514265335236e-05, "loss": 0.0098, "step": 10940 }, { "epoch": 1.95, "learning_rate": 4.9024251069900144e-05, "loss": 0.0083, "step": 10950 }, { "epoch": 1.95, "learning_rate": 4.9023359486447935e-05, "loss": 0.0079, "step": 10960 }, { "epoch": 1.96, "learning_rate": 4.902246790299572e-05, "loss": 0.0098, "step": 10970 }, { "epoch": 1.96, "learning_rate": 4.902157631954351e-05, "loss": 0.0093, "step": 10980 }, { "epoch": 1.96, "learning_rate": 4.90206847360913e-05, "loss": 0.0076, "step": 10990 }, { "epoch": 1.96, "learning_rate": 4.9019793152639086e-05, "loss": 0.0083, "step": 11000 }, { "epoch": 1.96, "learning_rate": 4.901890156918688e-05, "loss": 0.0132, "step": 11010 }, { "epoch": 1.97, "learning_rate": 4.901800998573467e-05, "loss": 0.0146, "step": 11020 }, { "epoch": 1.97, "learning_rate": 4.901711840228246e-05, "loss": 0.0071, "step": 11030 }, { "epoch": 1.97, "learning_rate": 4.9016226818830244e-05, "loss": 0.0107, "step": 11040 }, { "epoch": 1.97, "learning_rate": 4.9015335235378036e-05, "loss": 0.0105, "step": 11050 }, { "epoch": 1.97, "learning_rate": 4.901444365192582e-05, "loss": 0.0095, "step": 11060 }, { "epoch": 1.97, "learning_rate": 4.901355206847361e-05, "loss": 0.0094, "step": 11070 }, { "epoch": 1.98, "learning_rate": 4.90126604850214e-05, "loss": 0.0089, "step": 11080 }, { "epoch": 1.98, "learning_rate": 4.901176890156919e-05, "loss": 0.0126, "step": 11090 }, { "epoch": 1.98, "learning_rate": 4.901087731811698e-05, "loss": 0.0095, "step": 11100 }, { "epoch": 1.98, "learning_rate": 4.900998573466476e-05, "loss": 0.0101, "step": 11110 }, { "epoch": 1.98, "learning_rate": 4.9009094151212554e-05, "loss": 0.0105, "step": 11120 }, { "epoch": 1.98, "learning_rate": 4.9008202567760345e-05, "loss": 0.0086, "step": 11130 }, { "epoch": 1.99, "learning_rate": 4.9007310984308136e-05, "loss": 0.0087, "step": 11140 }, { "epoch": 1.99, "learning_rate": 4.900641940085593e-05, "loss": 0.007, "step": 11150 }, { "epoch": 1.99, "learning_rate": 4.900552781740371e-05, "loss": 0.0119, "step": 11160 }, { "epoch": 1.99, "learning_rate": 4.90046362339515e-05, "loss": 0.0098, "step": 11170 }, { "epoch": 1.99, "learning_rate": 4.900374465049929e-05, "loss": 0.0104, "step": 11180 }, { "epoch": 2.0, "learning_rate": 4.900285306704708e-05, "loss": 0.007, "step": 11190 }, { "epoch": 2.0, "learning_rate": 4.900196148359486e-05, "loss": 0.009, "step": 11200 }, { "epoch": 2.0, "learning_rate": 4.9001069900142654e-05, "loss": 0.0098, "step": 11210 }, { "epoch": 2.0, "eval_loss": 0.015248224139213562, "eval_runtime": 196.1154, "eval_samples_per_second": 23.654, "eval_steps_per_second": 2.957, "step": 11216 }, { "epoch": 2.0, "learning_rate": 4.9000178316690446e-05, "loss": 0.0093, "step": 11220 }, { "epoch": 2.0, "learning_rate": 4.899928673323823e-05, "loss": 0.0068, "step": 11230 }, { "epoch": 2.0, "learning_rate": 4.899839514978603e-05, "loss": 0.0063, "step": 11240 }, { "epoch": 2.01, "learning_rate": 4.899750356633381e-05, "loss": 0.0063, "step": 11250 }, { "epoch": 2.01, "learning_rate": 4.8996611982881604e-05, "loss": 0.0088, "step": 11260 }, { "epoch": 2.01, "learning_rate": 4.899572039942939e-05, "loss": 0.0105, "step": 11270 }, { "epoch": 2.01, "learning_rate": 4.899482881597718e-05, "loss": 0.007, "step": 11280 }, { "epoch": 2.01, "learning_rate": 4.8993937232524964e-05, "loss": 0.0076, "step": 11290 }, { "epoch": 2.01, "learning_rate": 4.8993045649072755e-05, "loss": 0.0101, "step": 11300 }, { "epoch": 2.02, "learning_rate": 4.8992154065620546e-05, "loss": 0.006, "step": 11310 }, { "epoch": 2.02, "learning_rate": 4.899126248216833e-05, "loss": 0.0088, "step": 11320 }, { "epoch": 2.02, "learning_rate": 4.899037089871612e-05, "loss": 0.0079, "step": 11330 }, { "epoch": 2.02, "learning_rate": 4.8989479315263906e-05, "loss": 0.008, "step": 11340 }, { "epoch": 2.02, "learning_rate": 4.8988587731811704e-05, "loss": 0.0086, "step": 11350 }, { "epoch": 2.03, "learning_rate": 4.898769614835949e-05, "loss": 0.0093, "step": 11360 }, { "epoch": 2.03, "learning_rate": 4.898680456490728e-05, "loss": 0.0121, "step": 11370 }, { "epoch": 2.03, "learning_rate": 4.898591298145507e-05, "loss": 0.0091, "step": 11380 }, { "epoch": 2.03, "learning_rate": 4.8985021398002856e-05, "loss": 0.0094, "step": 11390 }, { "epoch": 2.03, "learning_rate": 4.898412981455065e-05, "loss": 0.0112, "step": 11400 }, { "epoch": 2.03, "learning_rate": 4.898323823109843e-05, "loss": 0.0072, "step": 11410 }, { "epoch": 2.04, "learning_rate": 4.898234664764622e-05, "loss": 0.0086, "step": 11420 }, { "epoch": 2.04, "learning_rate": 4.898145506419401e-05, "loss": 0.0071, "step": 11430 }, { "epoch": 2.04, "learning_rate": 4.89805634807418e-05, "loss": 0.008, "step": 11440 }, { "epoch": 2.04, "learning_rate": 4.897967189728959e-05, "loss": 0.008, "step": 11450 }, { "epoch": 2.04, "learning_rate": 4.897878031383738e-05, "loss": 0.0075, "step": 11460 }, { "epoch": 2.05, "learning_rate": 4.897788873038517e-05, "loss": 0.008, "step": 11470 }, { "epoch": 2.05, "learning_rate": 4.8976997146932956e-05, "loss": 0.0111, "step": 11480 }, { "epoch": 2.05, "learning_rate": 4.897610556348075e-05, "loss": 0.0074, "step": 11490 }, { "epoch": 2.05, "learning_rate": 4.897521398002853e-05, "loss": 0.0073, "step": 11500 }, { "epoch": 2.05, "learning_rate": 4.897432239657632e-05, "loss": 0.0078, "step": 11510 }, { "epoch": 2.05, "learning_rate": 4.897343081312411e-05, "loss": 0.0111, "step": 11520 }, { "epoch": 2.06, "learning_rate": 4.89725392296719e-05, "loss": 0.008, "step": 11530 }, { "epoch": 2.06, "learning_rate": 4.897164764621969e-05, "loss": 0.0073, "step": 11540 }, { "epoch": 2.06, "learning_rate": 4.8970756062767474e-05, "loss": 0.0075, "step": 11550 }, { "epoch": 2.06, "learning_rate": 4.8969864479315265e-05, "loss": 0.0103, "step": 11560 }, { "epoch": 2.06, "learning_rate": 4.896897289586306e-05, "loss": 0.0053, "step": 11570 }, { "epoch": 2.06, "learning_rate": 4.896808131241085e-05, "loss": 0.0085, "step": 11580 }, { "epoch": 2.07, "learning_rate": 4.896718972895863e-05, "loss": 0.0127, "step": 11590 }, { "epoch": 2.07, "learning_rate": 4.896638730385164e-05, "loss": 0.0096, "step": 11600 }, { "epoch": 2.07, "learning_rate": 4.896549572039943e-05, "loss": 0.0094, "step": 11610 }, { "epoch": 2.07, "learning_rate": 4.896460413694722e-05, "loss": 0.0073, "step": 11620 }, { "epoch": 2.07, "learning_rate": 4.8963712553495014e-05, "loss": 0.0071, "step": 11630 }, { "epoch": 2.08, "learning_rate": 4.89628209700428e-05, "loss": 0.0067, "step": 11640 }, { "epoch": 2.08, "learning_rate": 4.896192938659059e-05, "loss": 0.0081, "step": 11650 }, { "epoch": 2.08, "learning_rate": 4.8961037803138374e-05, "loss": 0.009, "step": 11660 }, { "epoch": 2.08, "learning_rate": 4.8960146219686165e-05, "loss": 0.0113, "step": 11670 }, { "epoch": 2.08, "learning_rate": 4.8959254636233956e-05, "loss": 0.0083, "step": 11680 }, { "epoch": 2.08, "learning_rate": 4.895836305278174e-05, "loss": 0.0082, "step": 11690 }, { "epoch": 2.09, "learning_rate": 4.895747146932953e-05, "loss": 0.011, "step": 11700 }, { "epoch": 2.09, "learning_rate": 4.8956579885877317e-05, "loss": 0.0087, "step": 11710 }, { "epoch": 2.09, "learning_rate": 4.895568830242511e-05, "loss": 0.0105, "step": 11720 }, { "epoch": 2.09, "learning_rate": 4.89547967189729e-05, "loss": 0.0092, "step": 11730 }, { "epoch": 2.09, "learning_rate": 4.895390513552069e-05, "loss": 0.0068, "step": 11740 }, { "epoch": 2.1, "learning_rate": 4.8953013552068475e-05, "loss": 0.0083, "step": 11750 }, { "epoch": 2.1, "learning_rate": 4.8952121968616266e-05, "loss": 0.0074, "step": 11760 }, { "epoch": 2.1, "learning_rate": 4.895123038516406e-05, "loss": 0.0073, "step": 11770 }, { "epoch": 2.1, "learning_rate": 4.895033880171184e-05, "loss": 0.0093, "step": 11780 }, { "epoch": 2.1, "learning_rate": 4.894944721825963e-05, "loss": 0.0064, "step": 11790 }, { "epoch": 2.1, "learning_rate": 4.894855563480742e-05, "loss": 0.0073, "step": 11800 }, { "epoch": 2.11, "learning_rate": 4.894766405135521e-05, "loss": 0.0074, "step": 11810 }, { "epoch": 2.11, "learning_rate": 4.894677246790299e-05, "loss": 0.0091, "step": 11820 }, { "epoch": 2.11, "learning_rate": 4.8945880884450784e-05, "loss": 0.0065, "step": 11830 }, { "epoch": 2.11, "learning_rate": 4.8944989300998575e-05, "loss": 0.0086, "step": 11840 }, { "epoch": 2.11, "learning_rate": 4.8944097717546366e-05, "loss": 0.0064, "step": 11850 }, { "epoch": 2.11, "learning_rate": 4.894320613409416e-05, "loss": 0.0088, "step": 11860 }, { "epoch": 2.12, "learning_rate": 4.894231455064194e-05, "loss": 0.0062, "step": 11870 }, { "epoch": 2.12, "learning_rate": 4.894142296718973e-05, "loss": 0.0102, "step": 11880 }, { "epoch": 2.12, "learning_rate": 4.894053138373752e-05, "loss": 0.01, "step": 11890 }, { "epoch": 2.12, "learning_rate": 4.893963980028531e-05, "loss": 0.0084, "step": 11900 }, { "epoch": 2.12, "learning_rate": 4.89387482168331e-05, "loss": 0.0067, "step": 11910 }, { "epoch": 2.13, "learning_rate": 4.8937856633380884e-05, "loss": 0.0093, "step": 11920 }, { "epoch": 2.13, "learning_rate": 4.8936965049928676e-05, "loss": 0.0077, "step": 11930 }, { "epoch": 2.13, "learning_rate": 4.893607346647646e-05, "loss": 0.0123, "step": 11940 }, { "epoch": 2.13, "learning_rate": 4.893518188302426e-05, "loss": 0.0052, "step": 11950 }, { "epoch": 2.13, "learning_rate": 4.893429029957204e-05, "loss": 0.0091, "step": 11960 }, { "epoch": 2.13, "learning_rate": 4.8933398716119834e-05, "loss": 0.0088, "step": 11970 }, { "epoch": 2.14, "learning_rate": 4.893250713266762e-05, "loss": 0.0094, "step": 11980 }, { "epoch": 2.14, "learning_rate": 4.893161554921541e-05, "loss": 0.0075, "step": 11990 }, { "epoch": 2.14, "learning_rate": 4.89307239657632e-05, "loss": 0.0074, "step": 12000 }, { "epoch": 2.14, "learning_rate": 4.8929832382310985e-05, "loss": 0.0075, "step": 12010 }, { "epoch": 2.14, "learning_rate": 4.8928940798858776e-05, "loss": 0.0073, "step": 12020 }, { "epoch": 2.15, "learning_rate": 4.892804921540656e-05, "loss": 0.0098, "step": 12030 }, { "epoch": 2.15, "learning_rate": 4.892715763195435e-05, "loss": 0.0075, "step": 12040 }, { "epoch": 2.15, "learning_rate": 4.8926266048502136e-05, "loss": 0.0072, "step": 12050 }, { "epoch": 2.15, "learning_rate": 4.8925374465049934e-05, "loss": 0.0061, "step": 12060 }, { "epoch": 2.15, "learning_rate": 4.892448288159772e-05, "loss": 0.0107, "step": 12070 }, { "epoch": 2.15, "learning_rate": 4.892359129814551e-05, "loss": 0.0082, "step": 12080 }, { "epoch": 2.16, "learning_rate": 4.89226997146933e-05, "loss": 0.0048, "step": 12090 }, { "epoch": 2.16, "learning_rate": 4.8921808131241086e-05, "loss": 0.0073, "step": 12100 }, { "epoch": 2.16, "learning_rate": 4.892091654778888e-05, "loss": 0.0097, "step": 12110 }, { "epoch": 2.16, "learning_rate": 4.892002496433666e-05, "loss": 0.0071, "step": 12120 }, { "epoch": 2.16, "learning_rate": 4.891913338088445e-05, "loss": 0.008, "step": 12130 }, { "epoch": 2.16, "learning_rate": 4.8918241797432244e-05, "loss": 0.0063, "step": 12140 }, { "epoch": 2.17, "learning_rate": 4.891735021398003e-05, "loss": 0.0095, "step": 12150 }, { "epoch": 2.17, "learning_rate": 4.891645863052782e-05, "loss": 0.0095, "step": 12160 }, { "epoch": 2.17, "learning_rate": 4.891556704707561e-05, "loss": 0.0101, "step": 12170 }, { "epoch": 2.17, "learning_rate": 4.89146754636234e-05, "loss": 0.0103, "step": 12180 }, { "epoch": 2.17, "learning_rate": 4.8913783880171186e-05, "loss": 0.0091, "step": 12190 }, { "epoch": 2.18, "learning_rate": 4.891289229671898e-05, "loss": 0.0133, "step": 12200 }, { "epoch": 2.18, "learning_rate": 4.891200071326676e-05, "loss": 0.0047, "step": 12210 }, { "epoch": 2.18, "learning_rate": 4.891110912981455e-05, "loss": 0.0073, "step": 12220 }, { "epoch": 2.18, "learning_rate": 4.8910217546362344e-05, "loss": 0.0065, "step": 12230 }, { "epoch": 2.18, "learning_rate": 4.890932596291013e-05, "loss": 0.0075, "step": 12240 }, { "epoch": 2.18, "learning_rate": 4.890843437945792e-05, "loss": 0.01, "step": 12250 }, { "epoch": 2.19, "learning_rate": 4.8907542796005704e-05, "loss": 0.0087, "step": 12260 }, { "epoch": 2.19, "learning_rate": 4.8906651212553496e-05, "loss": 0.01, "step": 12270 }, { "epoch": 2.19, "learning_rate": 4.890575962910129e-05, "loss": 0.0077, "step": 12280 }, { "epoch": 2.19, "learning_rate": 4.890486804564908e-05, "loss": 0.0097, "step": 12290 }, { "epoch": 2.19, "learning_rate": 4.890397646219686e-05, "loss": 0.008, "step": 12300 }, { "epoch": 2.2, "learning_rate": 4.8903084878744654e-05, "loss": 0.005, "step": 12310 }, { "epoch": 2.2, "learning_rate": 4.8902193295292445e-05, "loss": 0.0072, "step": 12320 }, { "epoch": 2.2, "learning_rate": 4.890130171184023e-05, "loss": 0.0077, "step": 12330 }, { "epoch": 2.2, "learning_rate": 4.890041012838802e-05, "loss": 0.0088, "step": 12340 }, { "epoch": 2.2, "learning_rate": 4.8899518544935805e-05, "loss": 0.0068, "step": 12350 }, { "epoch": 2.2, "learning_rate": 4.8898626961483596e-05, "loss": 0.0071, "step": 12360 }, { "epoch": 2.21, "learning_rate": 4.889773537803139e-05, "loss": 0.0077, "step": 12370 }, { "epoch": 2.21, "learning_rate": 4.889684379457917e-05, "loss": 0.0081, "step": 12380 }, { "epoch": 2.21, "learning_rate": 4.889595221112697e-05, "loss": 0.0067, "step": 12390 }, { "epoch": 2.21, "learning_rate": 4.8895060627674754e-05, "loss": 0.0099, "step": 12400 }, { "epoch": 2.21, "learning_rate": 4.8894169044222545e-05, "loss": 0.0068, "step": 12410 }, { "epoch": 2.21, "learning_rate": 4.889327746077033e-05, "loss": 0.0069, "step": 12420 }, { "epoch": 2.22, "learning_rate": 4.889238587731812e-05, "loss": 0.0066, "step": 12430 }, { "epoch": 2.22, "learning_rate": 4.8891494293865905e-05, "loss": 0.0064, "step": 12440 }, { "epoch": 2.22, "learning_rate": 4.88906027104137e-05, "loss": 0.0113, "step": 12450 }, { "epoch": 2.22, "learning_rate": 4.888971112696149e-05, "loss": 0.0095, "step": 12460 }, { "epoch": 2.22, "learning_rate": 4.888881954350927e-05, "loss": 0.0071, "step": 12470 }, { "epoch": 2.23, "learning_rate": 4.8887927960057063e-05, "loss": 0.0101, "step": 12480 }, { "epoch": 2.23, "learning_rate": 4.888703637660485e-05, "loss": 0.0084, "step": 12490 }, { "epoch": 2.23, "learning_rate": 4.8886144793152646e-05, "loss": 0.0065, "step": 12500 }, { "epoch": 2.23, "learning_rate": 4.888525320970043e-05, "loss": 0.0085, "step": 12510 }, { "epoch": 2.23, "learning_rate": 4.888436162624822e-05, "loss": 0.0068, "step": 12520 }, { "epoch": 2.23, "learning_rate": 4.8883470042796006e-05, "loss": 0.0047, "step": 12530 }, { "epoch": 2.24, "learning_rate": 4.88825784593438e-05, "loss": 0.0106, "step": 12540 }, { "epoch": 2.24, "learning_rate": 4.888168687589159e-05, "loss": 0.0067, "step": 12550 }, { "epoch": 2.24, "learning_rate": 4.888079529243937e-05, "loss": 0.0082, "step": 12560 }, { "epoch": 2.24, "learning_rate": 4.8879903708987164e-05, "loss": 0.0088, "step": 12570 }, { "epoch": 2.24, "learning_rate": 4.887901212553495e-05, "loss": 0.0087, "step": 12580 }, { "epoch": 2.25, "learning_rate": 4.887812054208274e-05, "loss": 0.0102, "step": 12590 }, { "epoch": 2.25, "learning_rate": 4.887722895863053e-05, "loss": 0.0092, "step": 12600 }, { "epoch": 2.25, "learning_rate": 4.887633737517832e-05, "loss": 0.0113, "step": 12610 }, { "epoch": 2.25, "learning_rate": 4.8875445791726107e-05, "loss": 0.01, "step": 12620 }, { "epoch": 2.25, "learning_rate": 4.88745542082739e-05, "loss": 0.0095, "step": 12630 }, { "epoch": 2.25, "learning_rate": 4.887366262482169e-05, "loss": 0.0095, "step": 12640 }, { "epoch": 2.26, "learning_rate": 4.8872771041369473e-05, "loss": 0.0071, "step": 12650 }, { "epoch": 2.26, "learning_rate": 4.8871879457917265e-05, "loss": 0.0092, "step": 12660 }, { "epoch": 2.26, "learning_rate": 4.887098787446505e-05, "loss": 0.0075, "step": 12670 }, { "epoch": 2.26, "learning_rate": 4.887009629101284e-05, "loss": 0.007, "step": 12680 }, { "epoch": 2.26, "learning_rate": 4.886920470756063e-05, "loss": 0.0094, "step": 12690 }, { "epoch": 2.26, "learning_rate": 4.8868313124108416e-05, "loss": 0.0077, "step": 12700 }, { "epoch": 2.27, "learning_rate": 4.886742154065621e-05, "loss": 0.0114, "step": 12710 }, { "epoch": 2.27, "learning_rate": 4.8866529957204e-05, "loss": 0.0077, "step": 12720 }, { "epoch": 2.27, "learning_rate": 4.886563837375179e-05, "loss": 0.0046, "step": 12730 }, { "epoch": 2.27, "learning_rate": 4.8864746790299574e-05, "loss": 0.0113, "step": 12740 }, { "epoch": 2.27, "learning_rate": 4.8863855206847365e-05, "loss": 0.0061, "step": 12750 }, { "epoch": 2.28, "learning_rate": 4.886296362339515e-05, "loss": 0.0102, "step": 12760 }, { "epoch": 2.28, "learning_rate": 4.886207203994294e-05, "loss": 0.0098, "step": 12770 }, { "epoch": 2.28, "learning_rate": 4.886118045649073e-05, "loss": 0.0073, "step": 12780 }, { "epoch": 2.28, "learning_rate": 4.8860288873038516e-05, "loss": 0.0091, "step": 12790 }, { "epoch": 2.28, "learning_rate": 4.885939728958631e-05, "loss": 0.0073, "step": 12800 }, { "epoch": 2.28, "learning_rate": 4.885850570613409e-05, "loss": 0.0072, "step": 12810 }, { "epoch": 2.29, "learning_rate": 4.885761412268188e-05, "loss": 0.0059, "step": 12820 }, { "epoch": 2.29, "learning_rate": 4.8856722539229675e-05, "loss": 0.0063, "step": 12830 }, { "epoch": 2.29, "learning_rate": 4.8855830955777466e-05, "loss": 0.0048, "step": 12840 }, { "epoch": 2.29, "learning_rate": 4.885493937232525e-05, "loss": 0.0079, "step": 12850 }, { "epoch": 2.29, "learning_rate": 4.885404778887304e-05, "loss": 0.0094, "step": 12860 }, { "epoch": 2.29, "learning_rate": 4.885315620542083e-05, "loss": 0.0074, "step": 12870 }, { "epoch": 2.3, "learning_rate": 4.885226462196862e-05, "loss": 0.0092, "step": 12880 }, { "epoch": 2.3, "learning_rate": 4.885137303851641e-05, "loss": 0.0055, "step": 12890 }, { "epoch": 2.3, "learning_rate": 4.885048145506419e-05, "loss": 0.0083, "step": 12900 }, { "epoch": 2.3, "learning_rate": 4.8849589871611984e-05, "loss": 0.0104, "step": 12910 }, { "epoch": 2.3, "learning_rate": 4.8848698288159775e-05, "loss": 0.0098, "step": 12920 }, { "epoch": 2.31, "learning_rate": 4.884780670470756e-05, "loss": 0.01, "step": 12930 }, { "epoch": 2.31, "learning_rate": 4.884691512125536e-05, "loss": 0.0068, "step": 12940 }, { "epoch": 2.31, "learning_rate": 4.884602353780314e-05, "loss": 0.0079, "step": 12950 }, { "epoch": 2.31, "learning_rate": 4.884513195435093e-05, "loss": 0.0115, "step": 12960 }, { "epoch": 2.31, "learning_rate": 4.884424037089872e-05, "loss": 0.01, "step": 12970 }, { "epoch": 2.31, "learning_rate": 4.884334878744651e-05, "loss": 0.01, "step": 12980 }, { "epoch": 2.32, "learning_rate": 4.884245720399429e-05, "loss": 0.0086, "step": 12990 }, { "epoch": 2.32, "learning_rate": 4.8841565620542084e-05, "loss": 0.0075, "step": 13000 }, { "epoch": 2.32, "learning_rate": 4.8840674037089876e-05, "loss": 0.0089, "step": 13010 }, { "epoch": 2.32, "learning_rate": 4.883978245363766e-05, "loss": 0.0095, "step": 13020 }, { "epoch": 2.32, "learning_rate": 4.883889087018545e-05, "loss": 0.0183, "step": 13030 }, { "epoch": 2.33, "learning_rate": 4.8837999286733236e-05, "loss": 0.0095, "step": 13040 }, { "epoch": 2.33, "learning_rate": 4.8837107703281034e-05, "loss": 0.0075, "step": 13050 }, { "epoch": 2.33, "learning_rate": 4.883621611982882e-05, "loss": 0.0073, "step": 13060 }, { "epoch": 2.33, "learning_rate": 4.883532453637661e-05, "loss": 0.0075, "step": 13070 }, { "epoch": 2.33, "learning_rate": 4.8834432952924394e-05, "loss": 0.0084, "step": 13080 }, { "epoch": 2.33, "learning_rate": 4.8833541369472185e-05, "loss": 0.0085, "step": 13090 }, { "epoch": 2.34, "learning_rate": 4.8832649786019976e-05, "loss": 0.0063, "step": 13100 }, { "epoch": 2.34, "learning_rate": 4.883175820256776e-05, "loss": 0.005, "step": 13110 }, { "epoch": 2.34, "learning_rate": 4.883086661911555e-05, "loss": 0.0066, "step": 13120 }, { "epoch": 2.34, "learning_rate": 4.8829975035663336e-05, "loss": 0.0088, "step": 13130 }, { "epoch": 2.34, "learning_rate": 4.882908345221113e-05, "loss": 0.0088, "step": 13140 }, { "epoch": 2.34, "learning_rate": 4.882819186875892e-05, "loss": 0.0079, "step": 13150 }, { "epoch": 2.35, "learning_rate": 4.882730028530671e-05, "loss": 0.0083, "step": 13160 }, { "epoch": 2.35, "learning_rate": 4.88264087018545e-05, "loss": 0.0087, "step": 13170 }, { "epoch": 2.35, "learning_rate": 4.8825517118402286e-05, "loss": 0.0085, "step": 13180 }, { "epoch": 2.35, "learning_rate": 4.882462553495008e-05, "loss": 0.0097, "step": 13190 }, { "epoch": 2.35, "learning_rate": 4.882373395149786e-05, "loss": 0.0074, "step": 13200 }, { "epoch": 2.36, "learning_rate": 4.882284236804565e-05, "loss": 0.0092, "step": 13210 }, { "epoch": 2.36, "learning_rate": 4.882195078459344e-05, "loss": 0.0057, "step": 13220 }, { "epoch": 2.36, "learning_rate": 4.882105920114123e-05, "loss": 0.0168, "step": 13230 }, { "epoch": 2.36, "learning_rate": 4.882016761768902e-05, "loss": 0.0094, "step": 13240 }, { "epoch": 2.36, "learning_rate": 4.8819276034236804e-05, "loss": 0.012, "step": 13250 }, { "epoch": 2.36, "learning_rate": 4.8818384450784595e-05, "loss": 0.0062, "step": 13260 }, { "epoch": 2.37, "learning_rate": 4.8817492867332386e-05, "loss": 0.0098, "step": 13270 }, { "epoch": 2.37, "learning_rate": 4.881660128388018e-05, "loss": 0.0093, "step": 13280 }, { "epoch": 2.37, "learning_rate": 4.881570970042796e-05, "loss": 0.009, "step": 13290 }, { "epoch": 2.37, "learning_rate": 4.881481811697575e-05, "loss": 0.0077, "step": 13300 }, { "epoch": 2.37, "learning_rate": 4.881392653352354e-05, "loss": 0.0064, "step": 13310 }, { "epoch": 2.38, "learning_rate": 4.881303495007133e-05, "loss": 0.0071, "step": 13320 }, { "epoch": 2.38, "learning_rate": 4.881214336661912e-05, "loss": 0.0092, "step": 13330 }, { "epoch": 2.38, "learning_rate": 4.8811251783166904e-05, "loss": 0.0124, "step": 13340 }, { "epoch": 2.38, "learning_rate": 4.8810360199714695e-05, "loss": 0.0128, "step": 13350 }, { "epoch": 2.38, "learning_rate": 4.880946861626248e-05, "loss": 0.0083, "step": 13360 }, { "epoch": 2.38, "learning_rate": 4.880857703281027e-05, "loss": 0.0058, "step": 13370 }, { "epoch": 2.39, "learning_rate": 4.880768544935806e-05, "loss": 0.0059, "step": 13380 }, { "epoch": 2.39, "learning_rate": 4.8806793865905854e-05, "loss": 0.0094, "step": 13390 }, { "epoch": 2.39, "learning_rate": 4.8805902282453645e-05, "loss": 0.0092, "step": 13400 }, { "epoch": 2.39, "learning_rate": 4.880501069900143e-05, "loss": 0.0096, "step": 13410 }, { "epoch": 2.39, "learning_rate": 4.880411911554922e-05, "loss": 0.0082, "step": 13420 }, { "epoch": 2.39, "learning_rate": 4.8803227532097005e-05, "loss": 0.0086, "step": 13430 }, { "epoch": 2.4, "learning_rate": 4.8802335948644796e-05, "loss": 0.0071, "step": 13440 }, { "epoch": 2.4, "learning_rate": 4.880144436519258e-05, "loss": 0.0106, "step": 13450 }, { "epoch": 2.4, "learning_rate": 4.880055278174037e-05, "loss": 0.0096, "step": 13460 }, { "epoch": 2.4, "learning_rate": 4.879966119828816e-05, "loss": 0.0107, "step": 13470 }, { "epoch": 2.4, "learning_rate": 4.879876961483595e-05, "loss": 0.01, "step": 13480 }, { "epoch": 2.41, "learning_rate": 4.8797878031383745e-05, "loss": 0.0061, "step": 13490 }, { "epoch": 2.41, "learning_rate": 4.879698644793153e-05, "loss": 0.0104, "step": 13500 }, { "epoch": 2.41, "learning_rate": 4.879609486447932e-05, "loss": 0.0098, "step": 13510 }, { "epoch": 2.41, "learning_rate": 4.8795203281027105e-05, "loss": 0.0078, "step": 13520 }, { "epoch": 2.41, "learning_rate": 4.8794311697574897e-05, "loss": 0.01, "step": 13530 }, { "epoch": 2.41, "learning_rate": 4.879342011412268e-05, "loss": 0.0079, "step": 13540 }, { "epoch": 2.42, "learning_rate": 4.879252853067047e-05, "loss": 0.0101, "step": 13550 }, { "epoch": 2.42, "learning_rate": 4.8791636947218263e-05, "loss": 0.0061, "step": 13560 }, { "epoch": 2.42, "learning_rate": 4.879074536376605e-05, "loss": 0.0071, "step": 13570 }, { "epoch": 2.42, "learning_rate": 4.878985378031384e-05, "loss": 0.0088, "step": 13580 }, { "epoch": 2.42, "learning_rate": 4.8788962196861624e-05, "loss": 0.0068, "step": 13590 }, { "epoch": 2.43, "learning_rate": 4.878807061340942e-05, "loss": 0.0073, "step": 13600 }, { "epoch": 2.43, "learning_rate": 4.8787179029957206e-05, "loss": 0.0077, "step": 13610 }, { "epoch": 2.43, "learning_rate": 4.8786287446505e-05, "loss": 0.0074, "step": 13620 }, { "epoch": 2.43, "learning_rate": 4.878539586305279e-05, "loss": 0.0093, "step": 13630 }, { "epoch": 2.43, "learning_rate": 4.878450427960057e-05, "loss": 0.0101, "step": 13640 }, { "epoch": 2.43, "learning_rate": 4.8783612696148364e-05, "loss": 0.0095, "step": 13650 }, { "epoch": 2.44, "learning_rate": 4.878272111269615e-05, "loss": 0.0097, "step": 13660 }, { "epoch": 2.44, "learning_rate": 4.878182952924394e-05, "loss": 0.007, "step": 13670 }, { "epoch": 2.44, "learning_rate": 4.8780937945791724e-05, "loss": 0.0081, "step": 13680 }, { "epoch": 2.44, "learning_rate": 4.8780046362339515e-05, "loss": 0.0087, "step": 13690 }, { "epoch": 2.44, "learning_rate": 4.8779154778887307e-05, "loss": 0.0115, "step": 13700 }, { "epoch": 2.44, "learning_rate": 4.87782631954351e-05, "loss": 0.0064, "step": 13710 }, { "epoch": 2.45, "learning_rate": 4.8777460770328106e-05, "loss": 0.0098, "step": 13720 }, { "epoch": 2.45, "learning_rate": 4.877656918687589e-05, "loss": 0.0081, "step": 13730 }, { "epoch": 2.45, "learning_rate": 4.877567760342368e-05, "loss": 0.0082, "step": 13740 }, { "epoch": 2.45, "learning_rate": 4.8774786019971466e-05, "loss": 0.0065, "step": 13750 }, { "epoch": 2.45, "learning_rate": 4.8773894436519264e-05, "loss": 0.0068, "step": 13760 }, { "epoch": 2.46, "learning_rate": 4.877300285306705e-05, "loss": 0.0088, "step": 13770 }, { "epoch": 2.46, "learning_rate": 4.877211126961484e-05, "loss": 0.0083, "step": 13780 }, { "epoch": 2.46, "learning_rate": 4.877121968616263e-05, "loss": 0.0075, "step": 13790 }, { "epoch": 2.46, "learning_rate": 4.8770328102710415e-05, "loss": 0.0063, "step": 13800 }, { "epoch": 2.46, "learning_rate": 4.8769436519258206e-05, "loss": 0.0082, "step": 13810 }, { "epoch": 2.46, "learning_rate": 4.876854493580599e-05, "loss": 0.0053, "step": 13820 }, { "epoch": 2.47, "learning_rate": 4.876765335235378e-05, "loss": 0.0085, "step": 13830 }, { "epoch": 2.47, "learning_rate": 4.876676176890157e-05, "loss": 0.0072, "step": 13840 }, { "epoch": 2.47, "learning_rate": 4.876587018544936e-05, "loss": 0.0052, "step": 13850 }, { "epoch": 2.47, "learning_rate": 4.876497860199715e-05, "loss": 0.0093, "step": 13860 }, { "epoch": 2.47, "learning_rate": 4.876408701854494e-05, "loss": 0.0132, "step": 13870 }, { "epoch": 2.48, "learning_rate": 4.876319543509273e-05, "loss": 0.0075, "step": 13880 }, { "epoch": 2.48, "learning_rate": 4.8762303851640516e-05, "loss": 0.0094, "step": 13890 }, { "epoch": 2.48, "learning_rate": 4.876141226818831e-05, "loss": 0.006, "step": 13900 }, { "epoch": 2.48, "learning_rate": 4.876052068473609e-05, "loss": 0.0098, "step": 13910 }, { "epoch": 2.48, "learning_rate": 4.875962910128388e-05, "loss": 0.01, "step": 13920 }, { "epoch": 2.48, "learning_rate": 4.8758737517831674e-05, "loss": 0.0059, "step": 13930 }, { "epoch": 2.49, "learning_rate": 4.875784593437946e-05, "loss": 0.0043, "step": 13940 }, { "epoch": 2.49, "learning_rate": 4.875695435092725e-05, "loss": 0.0104, "step": 13950 }, { "epoch": 2.49, "learning_rate": 4.8756062767475034e-05, "loss": 0.0081, "step": 13960 }, { "epoch": 2.49, "learning_rate": 4.8755171184022825e-05, "loss": 0.0068, "step": 13970 }, { "epoch": 2.49, "learning_rate": 4.8754279600570616e-05, "loss": 0.0077, "step": 13980 }, { "epoch": 2.49, "learning_rate": 4.875338801711841e-05, "loss": 0.0153, "step": 13990 }, { "epoch": 2.5, "learning_rate": 4.875249643366619e-05, "loss": 0.0111, "step": 14000 }, { "epoch": 2.5, "learning_rate": 4.875160485021398e-05, "loss": 0.0071, "step": 14010 }, { "epoch": 2.5, "learning_rate": 4.8750713266761774e-05, "loss": 0.0116, "step": 14020 }, { "epoch": 2.5, "learning_rate": 4.874982168330956e-05, "loss": 0.0092, "step": 14030 }, { "epoch": 2.5, "learning_rate": 4.874893009985735e-05, "loss": 0.0076, "step": 14040 }, { "epoch": 2.51, "learning_rate": 4.8748038516405134e-05, "loss": 0.009, "step": 14050 }, { "epoch": 2.51, "learning_rate": 4.8747146932952926e-05, "loss": 0.0067, "step": 14060 }, { "epoch": 2.51, "learning_rate": 4.874625534950072e-05, "loss": 0.0126, "step": 14070 }, { "epoch": 2.51, "learning_rate": 4.87453637660485e-05, "loss": 0.0079, "step": 14080 }, { "epoch": 2.51, "learning_rate": 4.874447218259629e-05, "loss": 0.0092, "step": 14090 }, { "epoch": 2.51, "learning_rate": 4.8743580599144084e-05, "loss": 0.0086, "step": 14100 }, { "epoch": 2.52, "learning_rate": 4.8742689015691875e-05, "loss": 0.0096, "step": 14110 }, { "epoch": 2.52, "learning_rate": 4.874179743223966e-05, "loss": 0.0099, "step": 14120 }, { "epoch": 2.52, "learning_rate": 4.874090584878745e-05, "loss": 0.0123, "step": 14130 }, { "epoch": 2.52, "learning_rate": 4.8740014265335235e-05, "loss": 0.008, "step": 14140 }, { "epoch": 2.52, "learning_rate": 4.8739122681883026e-05, "loss": 0.012, "step": 14150 }, { "epoch": 2.52, "learning_rate": 4.873823109843082e-05, "loss": 0.0082, "step": 14160 }, { "epoch": 2.53, "learning_rate": 4.87373395149786e-05, "loss": 0.01, "step": 14170 }, { "epoch": 2.53, "learning_rate": 4.873644793152639e-05, "loss": 0.0073, "step": 14180 }, { "epoch": 2.53, "learning_rate": 4.873555634807418e-05, "loss": 0.009, "step": 14190 }, { "epoch": 2.53, "learning_rate": 4.8734664764621975e-05, "loss": 0.0101, "step": 14200 }, { "epoch": 2.53, "learning_rate": 4.873377318116976e-05, "loss": 0.0101, "step": 14210 }, { "epoch": 2.54, "learning_rate": 4.873288159771755e-05, "loss": 0.0095, "step": 14220 }, { "epoch": 2.54, "learning_rate": 4.8731990014265335e-05, "loss": 0.0085, "step": 14230 }, { "epoch": 2.54, "learning_rate": 4.873109843081313e-05, "loss": 0.0075, "step": 14240 }, { "epoch": 2.54, "learning_rate": 4.873020684736092e-05, "loss": 0.0066, "step": 14250 }, { "epoch": 2.54, "learning_rate": 4.87293152639087e-05, "loss": 0.009, "step": 14260 }, { "epoch": 2.54, "learning_rate": 4.8728423680456494e-05, "loss": 0.0097, "step": 14270 }, { "epoch": 2.55, "learning_rate": 4.872753209700428e-05, "loss": 0.007, "step": 14280 }, { "epoch": 2.55, "learning_rate": 4.872664051355207e-05, "loss": 0.0062, "step": 14290 }, { "epoch": 2.55, "learning_rate": 4.872574893009986e-05, "loss": 0.0093, "step": 14300 }, { "epoch": 2.55, "learning_rate": 4.872485734664765e-05, "loss": 0.0065, "step": 14310 }, { "epoch": 2.55, "learning_rate": 4.8723965763195436e-05, "loss": 0.0089, "step": 14320 }, { "epoch": 2.56, "learning_rate": 4.872307417974323e-05, "loss": 0.0099, "step": 14330 }, { "epoch": 2.56, "learning_rate": 4.872218259629102e-05, "loss": 0.0056, "step": 14340 }, { "epoch": 2.56, "learning_rate": 4.87212910128388e-05, "loss": 0.0076, "step": 14350 }, { "epoch": 2.56, "learning_rate": 4.8720399429386594e-05, "loss": 0.0072, "step": 14360 }, { "epoch": 2.56, "learning_rate": 4.871950784593438e-05, "loss": 0.0077, "step": 14370 }, { "epoch": 2.56, "learning_rate": 4.871861626248217e-05, "loss": 0.007, "step": 14380 }, { "epoch": 2.57, "learning_rate": 4.871772467902996e-05, "loss": 0.0076, "step": 14390 }, { "epoch": 2.57, "learning_rate": 4.8716833095577745e-05, "loss": 0.0102, "step": 14400 }, { "epoch": 2.57, "learning_rate": 4.8715941512125537e-05, "loss": 0.0095, "step": 14410 }, { "epoch": 2.57, "learning_rate": 4.871504992867333e-05, "loss": 0.0114, "step": 14420 }, { "epoch": 2.57, "learning_rate": 4.871415834522112e-05, "loss": 0.0052, "step": 14430 }, { "epoch": 2.57, "learning_rate": 4.8713266761768903e-05, "loss": 0.0102, "step": 14440 }, { "epoch": 2.58, "learning_rate": 4.8712375178316695e-05, "loss": 0.0087, "step": 14450 }, { "epoch": 2.58, "learning_rate": 4.871148359486448e-05, "loss": 0.008, "step": 14460 }, { "epoch": 2.58, "learning_rate": 4.871059201141227e-05, "loss": 0.0096, "step": 14470 }, { "epoch": 2.58, "learning_rate": 4.870970042796006e-05, "loss": 0.0068, "step": 14480 }, { "epoch": 2.58, "learning_rate": 4.870889800285307e-05, "loss": 0.0066, "step": 14490 }, { "epoch": 2.59, "learning_rate": 4.870800641940086e-05, "loss": 0.0076, "step": 14500 }, { "epoch": 2.59, "learning_rate": 4.8707114835948645e-05, "loss": 0.009, "step": 14510 }, { "epoch": 2.59, "learning_rate": 4.8706223252496436e-05, "loss": 0.0099, "step": 14520 }, { "epoch": 2.59, "learning_rate": 4.870533166904422e-05, "loss": 0.0061, "step": 14530 }, { "epoch": 2.59, "learning_rate": 4.870444008559201e-05, "loss": 0.0089, "step": 14540 }, { "epoch": 2.59, "learning_rate": 4.87035485021398e-05, "loss": 0.0111, "step": 14550 }, { "epoch": 2.6, "learning_rate": 4.870265691868759e-05, "loss": 0.0073, "step": 14560 }, { "epoch": 2.6, "learning_rate": 4.8701765335235386e-05, "loss": 0.0067, "step": 14570 }, { "epoch": 2.6, "learning_rate": 4.870087375178317e-05, "loss": 0.0076, "step": 14580 }, { "epoch": 2.6, "learning_rate": 4.869998216833096e-05, "loss": 0.0075, "step": 14590 }, { "epoch": 2.6, "learning_rate": 4.8699090584878746e-05, "loss": 0.008, "step": 14600 }, { "epoch": 2.61, "learning_rate": 4.869819900142654e-05, "loss": 0.0092, "step": 14610 }, { "epoch": 2.61, "learning_rate": 4.869730741797433e-05, "loss": 0.0098, "step": 14620 }, { "epoch": 2.61, "learning_rate": 4.869641583452211e-05, "loss": 0.0104, "step": 14630 }, { "epoch": 2.61, "learning_rate": 4.8695524251069904e-05, "loss": 0.0072, "step": 14640 }, { "epoch": 2.61, "learning_rate": 4.869463266761769e-05, "loss": 0.0105, "step": 14650 }, { "epoch": 2.61, "learning_rate": 4.869374108416548e-05, "loss": 0.0081, "step": 14660 }, { "epoch": 2.62, "learning_rate": 4.8692849500713264e-05, "loss": 0.0101, "step": 14670 }, { "epoch": 2.62, "learning_rate": 4.869195791726106e-05, "loss": 0.0092, "step": 14680 }, { "epoch": 2.62, "learning_rate": 4.8691066333808846e-05, "loss": 0.0072, "step": 14690 }, { "epoch": 2.62, "learning_rate": 4.869017475035664e-05, "loss": 0.0075, "step": 14700 }, { "epoch": 2.62, "learning_rate": 4.868928316690443e-05, "loss": 0.0093, "step": 14710 }, { "epoch": 2.62, "learning_rate": 4.868839158345221e-05, "loss": 0.0062, "step": 14720 }, { "epoch": 2.63, "learning_rate": 4.8687500000000004e-05, "loss": 0.0121, "step": 14730 }, { "epoch": 2.63, "learning_rate": 4.868660841654779e-05, "loss": 0.0054, "step": 14740 }, { "epoch": 2.63, "learning_rate": 4.868571683309558e-05, "loss": 0.0071, "step": 14750 }, { "epoch": 2.63, "learning_rate": 4.8684825249643364e-05, "loss": 0.0084, "step": 14760 }, { "epoch": 2.63, "learning_rate": 4.8683933666191156e-05, "loss": 0.0112, "step": 14770 }, { "epoch": 2.64, "learning_rate": 4.868304208273895e-05, "loss": 0.008, "step": 14780 }, { "epoch": 2.64, "learning_rate": 4.868215049928674e-05, "loss": 0.008, "step": 14790 }, { "epoch": 2.64, "learning_rate": 4.868125891583453e-05, "loss": 0.0057, "step": 14800 }, { "epoch": 2.64, "learning_rate": 4.8680367332382314e-05, "loss": 0.0107, "step": 14810 }, { "epoch": 2.64, "learning_rate": 4.8679475748930105e-05, "loss": 0.0107, "step": 14820 }, { "epoch": 2.64, "learning_rate": 4.867858416547789e-05, "loss": 0.0061, "step": 14830 }, { "epoch": 2.65, "learning_rate": 4.867769258202568e-05, "loss": 0.0079, "step": 14840 }, { "epoch": 2.65, "learning_rate": 4.867680099857347e-05, "loss": 0.0072, "step": 14850 }, { "epoch": 2.65, "learning_rate": 4.8675909415121256e-05, "loss": 0.0098, "step": 14860 }, { "epoch": 2.65, "learning_rate": 4.867501783166905e-05, "loss": 0.0063, "step": 14870 }, { "epoch": 2.65, "learning_rate": 4.867412624821683e-05, "loss": 0.008, "step": 14880 }, { "epoch": 2.66, "learning_rate": 4.867323466476462e-05, "loss": 0.0118, "step": 14890 }, { "epoch": 2.66, "learning_rate": 4.8672343081312414e-05, "loss": 0.0062, "step": 14900 }, { "epoch": 2.66, "learning_rate": 4.8671451497860205e-05, "loss": 0.0094, "step": 14910 }, { "epoch": 2.66, "learning_rate": 4.867055991440799e-05, "loss": 0.0066, "step": 14920 }, { "epoch": 2.66, "learning_rate": 4.866966833095578e-05, "loss": 0.008, "step": 14930 }, { "epoch": 2.66, "learning_rate": 4.866877674750357e-05, "loss": 0.0068, "step": 14940 }, { "epoch": 2.67, "learning_rate": 4.866788516405136e-05, "loss": 0.009, "step": 14950 }, { "epoch": 2.67, "learning_rate": 4.866699358059915e-05, "loss": 0.0099, "step": 14960 }, { "epoch": 2.67, "learning_rate": 4.866610199714693e-05, "loss": 0.0092, "step": 14970 }, { "epoch": 2.67, "learning_rate": 4.8665210413694724e-05, "loss": 0.0103, "step": 14980 }, { "epoch": 2.67, "learning_rate": 4.866431883024251e-05, "loss": 0.0108, "step": 14990 }, { "epoch": 2.67, "learning_rate": 4.86634272467903e-05, "loss": 0.0096, "step": 15000 }, { "epoch": 2.68, "learning_rate": 4.866253566333809e-05, "loss": 0.0073, "step": 15010 }, { "epoch": 2.68, "learning_rate": 4.866164407988588e-05, "loss": 0.0046, "step": 15020 }, { "epoch": 2.68, "learning_rate": 4.866075249643367e-05, "loss": 0.0063, "step": 15030 }, { "epoch": 2.68, "learning_rate": 4.865986091298146e-05, "loss": 0.0089, "step": 15040 }, { "epoch": 2.68, "learning_rate": 4.865896932952925e-05, "loss": 0.0054, "step": 15050 }, { "epoch": 2.69, "learning_rate": 4.865807774607703e-05, "loss": 0.0056, "step": 15060 }, { "epoch": 2.69, "learning_rate": 4.8657186162624824e-05, "loss": 0.0073, "step": 15070 }, { "epoch": 2.69, "learning_rate": 4.8656294579172615e-05, "loss": 0.0088, "step": 15080 }, { "epoch": 2.69, "learning_rate": 4.86554029957204e-05, "loss": 0.0092, "step": 15090 }, { "epoch": 2.69, "learning_rate": 4.865451141226819e-05, "loss": 0.0056, "step": 15100 }, { "epoch": 2.69, "learning_rate": 4.8653619828815975e-05, "loss": 0.0089, "step": 15110 }, { "epoch": 2.7, "learning_rate": 4.865272824536377e-05, "loss": 0.0069, "step": 15120 }, { "epoch": 2.7, "learning_rate": 4.865183666191156e-05, "loss": 0.0086, "step": 15130 }, { "epoch": 2.7, "learning_rate": 4.865094507845935e-05, "loss": 0.0085, "step": 15140 }, { "epoch": 2.7, "learning_rate": 4.8650053495007134e-05, "loss": 0.0097, "step": 15150 }, { "epoch": 2.7, "learning_rate": 4.8649161911554925e-05, "loss": 0.0068, "step": 15160 }, { "epoch": 2.71, "learning_rate": 4.8648270328102716e-05, "loss": 0.0066, "step": 15170 }, { "epoch": 2.71, "learning_rate": 4.86473787446505e-05, "loss": 0.0065, "step": 15180 }, { "epoch": 2.71, "learning_rate": 4.864648716119829e-05, "loss": 0.0074, "step": 15190 }, { "epoch": 2.71, "learning_rate": 4.8645595577746076e-05, "loss": 0.0058, "step": 15200 }, { "epoch": 2.71, "learning_rate": 4.864470399429387e-05, "loss": 0.0075, "step": 15210 }, { "epoch": 2.71, "learning_rate": 4.864381241084165e-05, "loss": 0.0085, "step": 15220 }, { "epoch": 2.72, "learning_rate": 4.864292082738944e-05, "loss": 0.0093, "step": 15230 }, { "epoch": 2.72, "learning_rate": 4.8642029243937234e-05, "loss": 0.0092, "step": 15240 }, { "epoch": 2.72, "learning_rate": 4.8641137660485025e-05, "loss": 0.0073, "step": 15250 }, { "epoch": 2.72, "learning_rate": 4.8640246077032817e-05, "loss": 0.0075, "step": 15260 }, { "epoch": 2.72, "learning_rate": 4.86393544935806e-05, "loss": 0.0098, "step": 15270 }, { "epoch": 2.72, "learning_rate": 4.863846291012839e-05, "loss": 0.0116, "step": 15280 }, { "epoch": 2.73, "learning_rate": 4.8637571326676177e-05, "loss": 0.0084, "step": 15290 }, { "epoch": 2.73, "learning_rate": 4.863667974322397e-05, "loss": 0.0072, "step": 15300 }, { "epoch": 2.73, "learning_rate": 4.863578815977176e-05, "loss": 0.0071, "step": 15310 }, { "epoch": 2.73, "learning_rate": 4.8634896576319543e-05, "loss": 0.0108, "step": 15320 }, { "epoch": 2.73, "learning_rate": 4.8634004992867335e-05, "loss": 0.0064, "step": 15330 }, { "epoch": 2.74, "learning_rate": 4.863311340941512e-05, "loss": 0.0087, "step": 15340 }, { "epoch": 2.74, "learning_rate": 4.863222182596292e-05, "loss": 0.0054, "step": 15350 }, { "epoch": 2.74, "learning_rate": 4.86313302425107e-05, "loss": 0.009, "step": 15360 }, { "epoch": 2.74, "learning_rate": 4.863043865905849e-05, "loss": 0.0056, "step": 15370 }, { "epoch": 2.74, "learning_rate": 4.862954707560628e-05, "loss": 0.0088, "step": 15380 }, { "epoch": 2.74, "learning_rate": 4.862865549215407e-05, "loss": 0.0066, "step": 15390 }, { "epoch": 2.75, "learning_rate": 4.862776390870186e-05, "loss": 0.0075, "step": 15400 }, { "epoch": 2.75, "learning_rate": 4.8626872325249644e-05, "loss": 0.0103, "step": 15410 }, { "epoch": 2.75, "learning_rate": 4.8625980741797435e-05, "loss": 0.0069, "step": 15420 }, { "epoch": 2.75, "learning_rate": 4.862508915834522e-05, "loss": 0.0071, "step": 15430 }, { "epoch": 2.75, "learning_rate": 4.862419757489301e-05, "loss": 0.0077, "step": 15440 }, { "epoch": 2.75, "learning_rate": 4.8623305991440795e-05, "loss": 0.0098, "step": 15450 }, { "epoch": 2.76, "learning_rate": 4.862241440798859e-05, "loss": 0.0084, "step": 15460 }, { "epoch": 2.76, "learning_rate": 4.862152282453638e-05, "loss": 0.0103, "step": 15470 }, { "epoch": 2.76, "learning_rate": 4.862063124108417e-05, "loss": 0.0059, "step": 15480 }, { "epoch": 2.76, "learning_rate": 4.861973965763196e-05, "loss": 0.0057, "step": 15490 }, { "epoch": 2.76, "learning_rate": 4.8618848074179745e-05, "loss": 0.0093, "step": 15500 }, { "epoch": 2.77, "learning_rate": 4.8617956490727536e-05, "loss": 0.0076, "step": 15510 }, { "epoch": 2.77, "learning_rate": 4.861706490727532e-05, "loss": 0.0099, "step": 15520 }, { "epoch": 2.77, "learning_rate": 4.861617332382311e-05, "loss": 0.0069, "step": 15530 }, { "epoch": 2.77, "learning_rate": 4.86152817403709e-05, "loss": 0.0042, "step": 15540 }, { "epoch": 2.77, "learning_rate": 4.861439015691869e-05, "loss": 0.0066, "step": 15550 }, { "epoch": 2.77, "learning_rate": 4.861349857346648e-05, "loss": 0.0094, "step": 15560 }, { "epoch": 2.78, "learning_rate": 4.861260699001427e-05, "loss": 0.0058, "step": 15570 }, { "epoch": 2.78, "learning_rate": 4.861171540656206e-05, "loss": 0.0083, "step": 15580 }, { "epoch": 2.78, "learning_rate": 4.8610823823109845e-05, "loss": 0.0098, "step": 15590 }, { "epoch": 2.78, "learning_rate": 4.8609932239657636e-05, "loss": 0.0071, "step": 15600 }, { "epoch": 2.78, "learning_rate": 4.860904065620542e-05, "loss": 0.0091, "step": 15610 }, { "epoch": 2.79, "learning_rate": 4.860814907275321e-05, "loss": 0.0083, "step": 15620 }, { "epoch": 2.79, "learning_rate": 4.8607257489301e-05, "loss": 0.0079, "step": 15630 }, { "epoch": 2.79, "learning_rate": 4.860636590584879e-05, "loss": 0.0083, "step": 15640 }, { "epoch": 2.79, "learning_rate": 4.860547432239658e-05, "loss": 0.0065, "step": 15650 }, { "epoch": 2.79, "learning_rate": 4.860458273894436e-05, "loss": 0.0098, "step": 15660 }, { "epoch": 2.79, "learning_rate": 4.8603691155492154e-05, "loss": 0.0095, "step": 15670 }, { "epoch": 2.8, "learning_rate": 4.8602799572039946e-05, "loss": 0.0063, "step": 15680 }, { "epoch": 2.8, "learning_rate": 4.860190798858774e-05, "loss": 0.0049, "step": 15690 }, { "epoch": 2.8, "learning_rate": 4.860101640513552e-05, "loss": 0.0073, "step": 15700 }, { "epoch": 2.8, "learning_rate": 4.860012482168331e-05, "loss": 0.0085, "step": 15710 }, { "epoch": 2.8, "learning_rate": 4.8599233238231104e-05, "loss": 0.0066, "step": 15720 }, { "epoch": 2.8, "learning_rate": 4.859834165477889e-05, "loss": 0.0098, "step": 15730 }, { "epoch": 2.81, "learning_rate": 4.859745007132668e-05, "loss": 0.0059, "step": 15740 }, { "epoch": 2.81, "learning_rate": 4.8596558487874464e-05, "loss": 0.0087, "step": 15750 }, { "epoch": 2.81, "learning_rate": 4.8595666904422255e-05, "loss": 0.0063, "step": 15760 }, { "epoch": 2.81, "learning_rate": 4.8594775320970046e-05, "loss": 0.0095, "step": 15770 }, { "epoch": 2.81, "learning_rate": 4.859388373751783e-05, "loss": 0.0095, "step": 15780 }, { "epoch": 2.82, "learning_rate": 4.859299215406562e-05, "loss": 0.0122, "step": 15790 }, { "epoch": 2.82, "learning_rate": 4.859210057061341e-05, "loss": 0.0107, "step": 15800 }, { "epoch": 2.82, "learning_rate": 4.8591208987161204e-05, "loss": 0.0066, "step": 15810 }, { "epoch": 2.82, "learning_rate": 4.859031740370899e-05, "loss": 0.0107, "step": 15820 }, { "epoch": 2.82, "learning_rate": 4.858942582025678e-05, "loss": 0.0071, "step": 15830 }, { "epoch": 2.82, "learning_rate": 4.8588534236804564e-05, "loss": 0.0088, "step": 15840 }, { "epoch": 2.83, "learning_rate": 4.8587642653352356e-05, "loss": 0.0109, "step": 15850 }, { "epoch": 2.83, "learning_rate": 4.858675106990015e-05, "loss": 0.0085, "step": 15860 }, { "epoch": 2.83, "learning_rate": 4.858585948644793e-05, "loss": 0.0091, "step": 15870 }, { "epoch": 2.83, "learning_rate": 4.858496790299572e-05, "loss": 0.0061, "step": 15880 }, { "epoch": 2.83, "learning_rate": 4.858407631954351e-05, "loss": 0.0118, "step": 15890 }, { "epoch": 2.84, "learning_rate": 4.8583184736091305e-05, "loss": 0.0057, "step": 15900 }, { "epoch": 2.84, "learning_rate": 4.858229315263909e-05, "loss": 0.0076, "step": 15910 }, { "epoch": 2.84, "learning_rate": 4.858140156918688e-05, "loss": 0.0086, "step": 15920 }, { "epoch": 2.84, "learning_rate": 4.8580509985734665e-05, "loss": 0.0063, "step": 15930 }, { "epoch": 2.84, "learning_rate": 4.8579618402282456e-05, "loss": 0.0057, "step": 15940 }, { "epoch": 2.84, "learning_rate": 4.857872681883025e-05, "loss": 0.0086, "step": 15950 }, { "epoch": 2.85, "learning_rate": 4.857783523537803e-05, "loss": 0.0064, "step": 15960 }, { "epoch": 2.85, "learning_rate": 4.857694365192582e-05, "loss": 0.0079, "step": 15970 }, { "epoch": 2.85, "learning_rate": 4.857605206847361e-05, "loss": 0.0084, "step": 15980 }, { "epoch": 2.85, "learning_rate": 4.85751604850214e-05, "loss": 0.0083, "step": 15990 }, { "epoch": 2.85, "learning_rate": 4.857426890156919e-05, "loss": 0.0066, "step": 16000 }, { "epoch": 2.85, "learning_rate": 4.857337731811698e-05, "loss": 0.0083, "step": 16010 }, { "epoch": 2.86, "learning_rate": 4.8572485734664766e-05, "loss": 0.0124, "step": 16020 }, { "epoch": 2.86, "learning_rate": 4.857159415121256e-05, "loss": 0.0119, "step": 16030 }, { "epoch": 2.86, "learning_rate": 4.857070256776035e-05, "loss": 0.0074, "step": 16040 }, { "epoch": 2.86, "learning_rate": 4.856981098430813e-05, "loss": 0.0077, "step": 16050 }, { "epoch": 2.86, "learning_rate": 4.8568919400855924e-05, "loss": 0.007, "step": 16060 }, { "epoch": 2.87, "learning_rate": 4.856802781740371e-05, "loss": 0.0093, "step": 16070 }, { "epoch": 2.87, "learning_rate": 4.85671362339515e-05, "loss": 0.0047, "step": 16080 }, { "epoch": 2.87, "learning_rate": 4.856624465049929e-05, "loss": 0.0085, "step": 16090 }, { "epoch": 2.87, "learning_rate": 4.8565353067047075e-05, "loss": 0.0071, "step": 16100 }, { "epoch": 2.87, "learning_rate": 4.8564461483594866e-05, "loss": 0.009, "step": 16110 }, { "epoch": 2.87, "learning_rate": 4.856356990014266e-05, "loss": 0.0092, "step": 16120 }, { "epoch": 2.88, "learning_rate": 4.856267831669045e-05, "loss": 0.0145, "step": 16130 }, { "epoch": 2.88, "learning_rate": 4.856178673323823e-05, "loss": 0.0064, "step": 16140 }, { "epoch": 2.88, "learning_rate": 4.8560895149786024e-05, "loss": 0.0075, "step": 16150 }, { "epoch": 2.88, "learning_rate": 4.856000356633381e-05, "loss": 0.0098, "step": 16160 }, { "epoch": 2.88, "learning_rate": 4.85591119828816e-05, "loss": 0.0089, "step": 16170 }, { "epoch": 2.89, "learning_rate": 4.855822039942939e-05, "loss": 0.0087, "step": 16180 }, { "epoch": 2.89, "learning_rate": 4.8557328815977175e-05, "loss": 0.0083, "step": 16190 }, { "epoch": 2.89, "learning_rate": 4.855643723252497e-05, "loss": 0.0075, "step": 16200 }, { "epoch": 2.89, "learning_rate": 4.855554564907275e-05, "loss": 0.006, "step": 16210 }, { "epoch": 2.89, "learning_rate": 4.855465406562054e-05, "loss": 0.0053, "step": 16220 }, { "epoch": 2.89, "learning_rate": 4.8553762482168334e-05, "loss": 0.007, "step": 16230 }, { "epoch": 2.9, "learning_rate": 4.8552870898716125e-05, "loss": 0.0079, "step": 16240 }, { "epoch": 2.9, "learning_rate": 4.855197931526391e-05, "loss": 0.0065, "step": 16250 }, { "epoch": 2.9, "learning_rate": 4.85510877318117e-05, "loss": 0.0066, "step": 16260 }, { "epoch": 2.9, "learning_rate": 4.855019614835949e-05, "loss": 0.008, "step": 16270 }, { "epoch": 2.9, "learning_rate": 4.8549304564907276e-05, "loss": 0.0087, "step": 16280 }, { "epoch": 2.9, "learning_rate": 4.854841298145507e-05, "loss": 0.0103, "step": 16290 }, { "epoch": 2.91, "learning_rate": 4.854752139800285e-05, "loss": 0.0063, "step": 16300 }, { "epoch": 2.91, "learning_rate": 4.854662981455064e-05, "loss": 0.0064, "step": 16310 }, { "epoch": 2.91, "learning_rate": 4.8545738231098434e-05, "loss": 0.0073, "step": 16320 }, { "epoch": 2.91, "learning_rate": 4.854484664764622e-05, "loss": 0.0106, "step": 16330 }, { "epoch": 2.91, "learning_rate": 4.8543955064194016e-05, "loss": 0.0071, "step": 16340 }, { "epoch": 2.92, "learning_rate": 4.85430634807418e-05, "loss": 0.0107, "step": 16350 }, { "epoch": 2.92, "learning_rate": 4.854217189728959e-05, "loss": 0.006, "step": 16360 }, { "epoch": 2.92, "learning_rate": 4.8541280313837377e-05, "loss": 0.0145, "step": 16370 }, { "epoch": 2.92, "learning_rate": 4.854038873038517e-05, "loss": 0.0067, "step": 16380 }, { "epoch": 2.92, "learning_rate": 4.853949714693295e-05, "loss": 0.0073, "step": 16390 }, { "epoch": 2.92, "learning_rate": 4.8538605563480743e-05, "loss": 0.0068, "step": 16400 }, { "epoch": 2.93, "learning_rate": 4.8537713980028535e-05, "loss": 0.0052, "step": 16410 }, { "epoch": 2.93, "learning_rate": 4.853682239657632e-05, "loss": 0.0071, "step": 16420 }, { "epoch": 2.93, "learning_rate": 4.853593081312411e-05, "loss": 0.0081, "step": 16430 }, { "epoch": 2.93, "learning_rate": 4.8535039229671895e-05, "loss": 0.0107, "step": 16440 }, { "epoch": 2.93, "learning_rate": 4.853414764621969e-05, "loss": 0.0068, "step": 16450 }, { "epoch": 2.94, "learning_rate": 4.853325606276748e-05, "loss": 0.0041, "step": 16460 }, { "epoch": 2.94, "learning_rate": 4.853236447931527e-05, "loss": 0.0069, "step": 16470 }, { "epoch": 2.94, "learning_rate": 4.853147289586305e-05, "loss": 0.014, "step": 16480 }, { "epoch": 2.94, "learning_rate": 4.8530581312410844e-05, "loss": 0.0089, "step": 16490 }, { "epoch": 2.94, "learning_rate": 4.8529689728958635e-05, "loss": 0.01, "step": 16500 }, { "epoch": 2.94, "learning_rate": 4.852879814550642e-05, "loss": 0.0079, "step": 16510 }, { "epoch": 2.95, "learning_rate": 4.852790656205421e-05, "loss": 0.0102, "step": 16520 }, { "epoch": 2.95, "learning_rate": 4.8527014978601995e-05, "loss": 0.007, "step": 16530 }, { "epoch": 2.95, "learning_rate": 4.8526123395149786e-05, "loss": 0.0059, "step": 16540 }, { "epoch": 2.95, "learning_rate": 4.852523181169758e-05, "loss": 0.0069, "step": 16550 }, { "epoch": 2.95, "learning_rate": 4.852434022824537e-05, "loss": 0.0073, "step": 16560 }, { "epoch": 2.95, "learning_rate": 4.852344864479316e-05, "loss": 0.0083, "step": 16570 }, { "epoch": 2.96, "learning_rate": 4.8522557061340945e-05, "loss": 0.0086, "step": 16580 }, { "epoch": 2.96, "learning_rate": 4.8521665477888736e-05, "loss": 0.0094, "step": 16590 }, { "epoch": 2.96, "learning_rate": 4.852077389443652e-05, "loss": 0.0069, "step": 16600 }, { "epoch": 2.96, "learning_rate": 4.851988231098431e-05, "loss": 0.0091, "step": 16610 }, { "epoch": 2.96, "learning_rate": 4.8518990727532096e-05, "loss": 0.0151, "step": 16620 }, { "epoch": 2.97, "learning_rate": 4.851809914407989e-05, "loss": 0.008, "step": 16630 }, { "epoch": 2.97, "learning_rate": 4.851720756062768e-05, "loss": 0.006, "step": 16640 }, { "epoch": 2.97, "learning_rate": 4.851631597717546e-05, "loss": 0.0068, "step": 16650 }, { "epoch": 2.97, "learning_rate": 4.8515424393723254e-05, "loss": 0.0036, "step": 16660 }, { "epoch": 2.97, "learning_rate": 4.8514532810271045e-05, "loss": 0.0068, "step": 16670 }, { "epoch": 2.97, "learning_rate": 4.8513641226818836e-05, "loss": 0.0083, "step": 16680 }, { "epoch": 2.98, "learning_rate": 4.851274964336662e-05, "loss": 0.0074, "step": 16690 }, { "epoch": 2.98, "learning_rate": 4.851185805991441e-05, "loss": 0.0091, "step": 16700 }, { "epoch": 2.98, "learning_rate": 4.8510966476462196e-05, "loss": 0.0096, "step": 16710 }, { "epoch": 2.98, "learning_rate": 4.851007489300999e-05, "loss": 0.0084, "step": 16720 }, { "epoch": 2.98, "learning_rate": 4.850918330955778e-05, "loss": 0.0111, "step": 16730 }, { "epoch": 2.99, "learning_rate": 4.850829172610556e-05, "loss": 0.0121, "step": 16740 }, { "epoch": 2.99, "learning_rate": 4.8507400142653354e-05, "loss": 0.0082, "step": 16750 }, { "epoch": 2.99, "learning_rate": 4.850650855920114e-05, "loss": 0.0093, "step": 16760 }, { "epoch": 2.99, "learning_rate": 4.850561697574893e-05, "loss": 0.0081, "step": 16770 }, { "epoch": 2.99, "learning_rate": 4.850472539229672e-05, "loss": 0.0104, "step": 16780 }, { "epoch": 2.99, "learning_rate": 4.850383380884451e-05, "loss": 0.0065, "step": 16790 }, { "epoch": 3.0, "learning_rate": 4.8502942225392304e-05, "loss": 0.0086, "step": 16800 }, { "epoch": 3.0, "learning_rate": 4.850205064194009e-05, "loss": 0.0045, "step": 16810 }, { "epoch": 3.0, "learning_rate": 4.850115905848788e-05, "loss": 0.0074, "step": 16820 }, { "epoch": 3.0, "eval_loss": 0.016142597422003746, "eval_runtime": 195.6313, "eval_samples_per_second": 23.713, "eval_steps_per_second": 2.965, "step": 16824 }, { "epoch": 3.0, "learning_rate": 4.8500267475035664e-05, "loss": 0.0086, "step": 16830 }, { "epoch": 3.0, "learning_rate": 4.8499375891583455e-05, "loss": 0.0051, "step": 16840 }, { "epoch": 3.0, "learning_rate": 4.849848430813124e-05, "loss": 0.0065, "step": 16850 }, { "epoch": 3.01, "learning_rate": 4.849759272467903e-05, "loss": 0.0073, "step": 16860 }, { "epoch": 3.01, "learning_rate": 4.849670114122682e-05, "loss": 0.0099, "step": 16870 }, { "epoch": 3.01, "learning_rate": 4.8495809557774606e-05, "loss": 0.0062, "step": 16880 }, { "epoch": 3.01, "learning_rate": 4.8494917974322404e-05, "loss": 0.0057, "step": 16890 }, { "epoch": 3.01, "learning_rate": 4.849402639087019e-05, "loss": 0.0092, "step": 16900 }, { "epoch": 3.02, "learning_rate": 4.849313480741798e-05, "loss": 0.0087, "step": 16910 }, { "epoch": 3.02, "learning_rate": 4.8492243223965764e-05, "loss": 0.006, "step": 16920 }, { "epoch": 3.02, "learning_rate": 4.8491351640513556e-05, "loss": 0.0105, "step": 16930 }, { "epoch": 3.02, "learning_rate": 4.849046005706134e-05, "loss": 0.0067, "step": 16940 }, { "epoch": 3.02, "learning_rate": 4.848956847360913e-05, "loss": 0.0085, "step": 16950 }, { "epoch": 3.02, "learning_rate": 4.848867689015692e-05, "loss": 0.008, "step": 16960 }, { "epoch": 3.03, "learning_rate": 4.848778530670471e-05, "loss": 0.006, "step": 16970 }, { "epoch": 3.03, "learning_rate": 4.84868937232525e-05, "loss": 0.0069, "step": 16980 }, { "epoch": 3.03, "learning_rate": 4.848600213980028e-05, "loss": 0.0092, "step": 16990 }, { "epoch": 3.03, "learning_rate": 4.848511055634808e-05, "loss": 0.0079, "step": 17000 }, { "epoch": 3.03, "learning_rate": 4.8484218972895865e-05, "loss": 0.0079, "step": 17010 }, { "epoch": 3.03, "learning_rate": 4.8483327389443656e-05, "loss": 0.0062, "step": 17020 }, { "epoch": 3.04, "learning_rate": 4.848243580599145e-05, "loss": 0.0094, "step": 17030 }, { "epoch": 3.04, "learning_rate": 4.848154422253923e-05, "loss": 0.0095, "step": 17040 }, { "epoch": 3.04, "learning_rate": 4.848065263908702e-05, "loss": 0.0052, "step": 17050 }, { "epoch": 3.04, "learning_rate": 4.847976105563481e-05, "loss": 0.0083, "step": 17060 }, { "epoch": 3.04, "learning_rate": 4.84788694721826e-05, "loss": 0.0057, "step": 17070 }, { "epoch": 3.05, "learning_rate": 4.847797788873038e-05, "loss": 0.0127, "step": 17080 }, { "epoch": 3.05, "learning_rate": 4.8477086305278174e-05, "loss": 0.0091, "step": 17090 }, { "epoch": 3.05, "learning_rate": 4.8476194721825965e-05, "loss": 0.0083, "step": 17100 }, { "epoch": 3.05, "learning_rate": 4.847530313837376e-05, "loss": 0.0071, "step": 17110 }, { "epoch": 3.05, "learning_rate": 4.847441155492155e-05, "loss": 0.0065, "step": 17120 }, { "epoch": 3.05, "learning_rate": 4.847351997146933e-05, "loss": 0.0058, "step": 17130 }, { "epoch": 3.06, "learning_rate": 4.8472628388017124e-05, "loss": 0.0058, "step": 17140 }, { "epoch": 3.06, "learning_rate": 4.847173680456491e-05, "loss": 0.0085, "step": 17150 }, { "epoch": 3.06, "learning_rate": 4.84708452211127e-05, "loss": 0.007, "step": 17160 }, { "epoch": 3.06, "learning_rate": 4.8469953637660484e-05, "loss": 0.011, "step": 17170 }, { "epoch": 3.06, "learning_rate": 4.8469062054208275e-05, "loss": 0.0059, "step": 17180 }, { "epoch": 3.07, "learning_rate": 4.8468170470756066e-05, "loss": 0.0067, "step": 17190 }, { "epoch": 3.07, "learning_rate": 4.846727888730385e-05, "loss": 0.0075, "step": 17200 }, { "epoch": 3.07, "learning_rate": 4.846638730385164e-05, "loss": 0.0067, "step": 17210 }, { "epoch": 3.07, "learning_rate": 4.846549572039943e-05, "loss": 0.0072, "step": 17220 }, { "epoch": 3.07, "learning_rate": 4.8464604136947224e-05, "loss": 0.0088, "step": 17230 }, { "epoch": 3.07, "learning_rate": 4.846371255349501e-05, "loss": 0.0074, "step": 17240 }, { "epoch": 3.08, "learning_rate": 4.84628209700428e-05, "loss": 0.0082, "step": 17250 }, { "epoch": 3.08, "learning_rate": 4.846192938659059e-05, "loss": 0.0085, "step": 17260 }, { "epoch": 3.08, "learning_rate": 4.8461037803138375e-05, "loss": 0.0088, "step": 17270 }, { "epoch": 3.08, "learning_rate": 4.846014621968617e-05, "loss": 0.0027, "step": 17280 }, { "epoch": 3.08, "learning_rate": 4.845925463623395e-05, "loss": 0.0068, "step": 17290 }, { "epoch": 3.08, "learning_rate": 4.845836305278174e-05, "loss": 0.0065, "step": 17300 }, { "epoch": 3.09, "learning_rate": 4.845747146932953e-05, "loss": 0.0055, "step": 17310 }, { "epoch": 3.09, "learning_rate": 4.845657988587732e-05, "loss": 0.008, "step": 17320 }, { "epoch": 3.09, "learning_rate": 4.845568830242511e-05, "loss": 0.0056, "step": 17330 }, { "epoch": 3.09, "learning_rate": 4.84547967189729e-05, "loss": 0.0064, "step": 17340 }, { "epoch": 3.09, "learning_rate": 4.845390513552069e-05, "loss": 0.0073, "step": 17350 }, { "epoch": 3.1, "learning_rate": 4.8453013552068476e-05, "loss": 0.0086, "step": 17360 }, { "epoch": 3.1, "learning_rate": 4.845212196861627e-05, "loss": 0.0085, "step": 17370 }, { "epoch": 3.1, "learning_rate": 4.845123038516405e-05, "loss": 0.0069, "step": 17380 }, { "epoch": 3.1, "learning_rate": 4.845033880171184e-05, "loss": 0.0078, "step": 17390 }, { "epoch": 3.1, "learning_rate": 4.844944721825963e-05, "loss": 0.0078, "step": 17400 }, { "epoch": 3.1, "learning_rate": 4.844855563480742e-05, "loss": 0.0094, "step": 17410 }, { "epoch": 3.11, "learning_rate": 4.844766405135521e-05, "loss": 0.0082, "step": 17420 }, { "epoch": 3.11, "learning_rate": 4.8446772467902994e-05, "loss": 0.0076, "step": 17430 }, { "epoch": 3.11, "learning_rate": 4.844588088445079e-05, "loss": 0.0087, "step": 17440 }, { "epoch": 3.11, "learning_rate": 4.8444989300998577e-05, "loss": 0.0046, "step": 17450 }, { "epoch": 3.11, "learning_rate": 4.844409771754637e-05, "loss": 0.0066, "step": 17460 }, { "epoch": 3.12, "learning_rate": 4.844320613409415e-05, "loss": 0.0067, "step": 17470 }, { "epoch": 3.12, "learning_rate": 4.844231455064194e-05, "loss": 0.0053, "step": 17480 }, { "epoch": 3.12, "learning_rate": 4.8441422967189735e-05, "loss": 0.0056, "step": 17490 }, { "epoch": 3.12, "learning_rate": 4.844053138373752e-05, "loss": 0.0087, "step": 17500 }, { "epoch": 3.12, "learning_rate": 4.843963980028531e-05, "loss": 0.0057, "step": 17510 }, { "epoch": 3.12, "learning_rate": 4.8438748216833095e-05, "loss": 0.0076, "step": 17520 }, { "epoch": 3.13, "learning_rate": 4.8437856633380886e-05, "loss": 0.0062, "step": 17530 }, { "epoch": 3.13, "learning_rate": 4.843696504992867e-05, "loss": 0.0071, "step": 17540 }, { "epoch": 3.13, "learning_rate": 4.843607346647647e-05, "loss": 0.009, "step": 17550 }, { "epoch": 3.13, "learning_rate": 4.843518188302425e-05, "loss": 0.0059, "step": 17560 }, { "epoch": 3.13, "learning_rate": 4.8434290299572044e-05, "loss": 0.0055, "step": 17570 }, { "epoch": 3.13, "learning_rate": 4.8433398716119835e-05, "loss": 0.0106, "step": 17580 }, { "epoch": 3.14, "learning_rate": 4.843250713266762e-05, "loss": 0.0072, "step": 17590 }, { "epoch": 3.14, "learning_rate": 4.843161554921541e-05, "loss": 0.01, "step": 17600 }, { "epoch": 3.14, "learning_rate": 4.8430723965763195e-05, "loss": 0.0073, "step": 17610 }, { "epoch": 3.14, "learning_rate": 4.8429832382310986e-05, "loss": 0.0063, "step": 17620 }, { "epoch": 3.14, "learning_rate": 4.842894079885877e-05, "loss": 0.0073, "step": 17630 }, { "epoch": 3.15, "learning_rate": 4.842804921540656e-05, "loss": 0.005, "step": 17640 }, { "epoch": 3.15, "learning_rate": 4.842715763195435e-05, "loss": 0.0089, "step": 17650 }, { "epoch": 3.15, "learning_rate": 4.8426266048502145e-05, "loss": 0.0101, "step": 17660 }, { "epoch": 3.15, "learning_rate": 4.8425374465049936e-05, "loss": 0.0084, "step": 17670 }, { "epoch": 3.15, "learning_rate": 4.842448288159772e-05, "loss": 0.0069, "step": 17680 }, { "epoch": 3.15, "learning_rate": 4.842359129814551e-05, "loss": 0.0048, "step": 17690 }, { "epoch": 3.16, "learning_rate": 4.8422699714693296e-05, "loss": 0.008, "step": 17700 }, { "epoch": 3.16, "learning_rate": 4.842180813124109e-05, "loss": 0.0044, "step": 17710 }, { "epoch": 3.16, "learning_rate": 4.842091654778888e-05, "loss": 0.0114, "step": 17720 }, { "epoch": 3.16, "learning_rate": 4.842002496433666e-05, "loss": 0.0057, "step": 17730 }, { "epoch": 3.16, "learning_rate": 4.8419133380884454e-05, "loss": 0.005, "step": 17740 }, { "epoch": 3.17, "learning_rate": 4.841824179743224e-05, "loss": 0.0064, "step": 17750 }, { "epoch": 3.17, "learning_rate": 4.841735021398003e-05, "loss": 0.0084, "step": 17760 }, { "epoch": 3.17, "learning_rate": 4.841645863052782e-05, "loss": 0.0059, "step": 17770 }, { "epoch": 3.17, "learning_rate": 4.841556704707561e-05, "loss": 0.0071, "step": 17780 }, { "epoch": 3.17, "learning_rate": 4.8414675463623396e-05, "loss": 0.0058, "step": 17790 }, { "epoch": 3.17, "learning_rate": 4.841378388017119e-05, "loss": 0.0094, "step": 17800 }, { "epoch": 3.18, "learning_rate": 4.841289229671898e-05, "loss": 0.0057, "step": 17810 }, { "epoch": 3.18, "learning_rate": 4.841200071326676e-05, "loss": 0.005, "step": 17820 }, { "epoch": 3.18, "learning_rate": 4.8411109129814554e-05, "loss": 0.0082, "step": 17830 }, { "epoch": 3.18, "learning_rate": 4.841021754636234e-05, "loss": 0.0061, "step": 17840 }, { "epoch": 3.18, "learning_rate": 4.840932596291013e-05, "loss": 0.0095, "step": 17850 }, { "epoch": 3.18, "learning_rate": 4.8408434379457914e-05, "loss": 0.0076, "step": 17860 }, { "epoch": 3.19, "learning_rate": 4.8407542796005706e-05, "loss": 0.0059, "step": 17870 }, { "epoch": 3.19, "learning_rate": 4.84066512125535e-05, "loss": 0.008, "step": 17880 }, { "epoch": 3.19, "learning_rate": 4.840575962910129e-05, "loss": 0.0088, "step": 17890 }, { "epoch": 3.19, "learning_rate": 4.840486804564908e-05, "loss": 0.0065, "step": 17900 }, { "epoch": 3.19, "learning_rate": 4.8403976462196864e-05, "loss": 0.0076, "step": 17910 }, { "epoch": 3.2, "learning_rate": 4.8403084878744655e-05, "loss": 0.0073, "step": 17920 }, { "epoch": 3.2, "learning_rate": 4.840219329529244e-05, "loss": 0.0047, "step": 17930 }, { "epoch": 3.2, "learning_rate": 4.840130171184023e-05, "loss": 0.0057, "step": 17940 }, { "epoch": 3.2, "learning_rate": 4.840041012838802e-05, "loss": 0.0051, "step": 17950 }, { "epoch": 3.2, "learning_rate": 4.8399518544935806e-05, "loss": 0.009, "step": 17960 }, { "epoch": 3.2, "learning_rate": 4.83986269614836e-05, "loss": 0.0115, "step": 17970 }, { "epoch": 3.21, "learning_rate": 4.839773537803138e-05, "loss": 0.0066, "step": 17980 }, { "epoch": 3.21, "learning_rate": 4.839684379457918e-05, "loss": 0.0078, "step": 17990 }, { "epoch": 3.21, "learning_rate": 4.8395952211126964e-05, "loss": 0.009, "step": 18000 }, { "epoch": 3.21, "learning_rate": 4.8395060627674756e-05, "loss": 0.0058, "step": 18010 }, { "epoch": 3.21, "learning_rate": 4.839416904422254e-05, "loss": 0.0057, "step": 18020 }, { "epoch": 3.22, "learning_rate": 4.839327746077033e-05, "loss": 0.0074, "step": 18030 }, { "epoch": 3.22, "learning_rate": 4.839238587731812e-05, "loss": 0.0093, "step": 18040 }, { "epoch": 3.22, "learning_rate": 4.839149429386591e-05, "loss": 0.0076, "step": 18050 }, { "epoch": 3.22, "learning_rate": 4.83906027104137e-05, "loss": 0.0086, "step": 18060 }, { "epoch": 3.22, "learning_rate": 4.838971112696148e-05, "loss": 0.0077, "step": 18070 }, { "epoch": 3.22, "learning_rate": 4.8388819543509274e-05, "loss": 0.0065, "step": 18080 }, { "epoch": 3.23, "learning_rate": 4.838792796005706e-05, "loss": 0.006, "step": 18090 }, { "epoch": 3.23, "learning_rate": 4.8387036376604856e-05, "loss": 0.0048, "step": 18100 }, { "epoch": 3.23, "learning_rate": 4.838614479315264e-05, "loss": 0.0033, "step": 18110 }, { "epoch": 3.23, "learning_rate": 4.838525320970043e-05, "loss": 0.0058, "step": 18120 }, { "epoch": 3.23, "learning_rate": 4.838436162624822e-05, "loss": 0.0056, "step": 18130 }, { "epoch": 3.23, "learning_rate": 4.838347004279601e-05, "loss": 0.0114, "step": 18140 }, { "epoch": 3.24, "learning_rate": 4.83825784593438e-05, "loss": 0.0095, "step": 18150 }, { "epoch": 3.24, "learning_rate": 4.838168687589158e-05, "loss": 0.0046, "step": 18160 }, { "epoch": 3.24, "learning_rate": 4.8380795292439374e-05, "loss": 0.0079, "step": 18170 }, { "epoch": 3.24, "learning_rate": 4.8379903708987165e-05, "loss": 0.008, "step": 18180 }, { "epoch": 3.24, "learning_rate": 4.837901212553495e-05, "loss": 0.0098, "step": 18190 }, { "epoch": 3.25, "learning_rate": 4.837812054208274e-05, "loss": 0.0075, "step": 18200 }, { "epoch": 3.25, "learning_rate": 4.837722895863053e-05, "loss": 0.0058, "step": 18210 }, { "epoch": 3.25, "learning_rate": 4.8376337375178324e-05, "loss": 0.0064, "step": 18220 }, { "epoch": 3.25, "learning_rate": 4.837544579172611e-05, "loss": 0.0079, "step": 18230 }, { "epoch": 3.25, "learning_rate": 4.83745542082739e-05, "loss": 0.0058, "step": 18240 }, { "epoch": 3.25, "learning_rate": 4.8373662624821684e-05, "loss": 0.0072, "step": 18250 }, { "epoch": 3.26, "learning_rate": 4.8372771041369475e-05, "loss": 0.0102, "step": 18260 }, { "epoch": 3.26, "learning_rate": 4.8371879457917266e-05, "loss": 0.0061, "step": 18270 }, { "epoch": 3.26, "learning_rate": 4.837098787446505e-05, "loss": 0.0086, "step": 18280 }, { "epoch": 3.26, "learning_rate": 4.837009629101284e-05, "loss": 0.006, "step": 18290 }, { "epoch": 3.26, "learning_rate": 4.8369204707560626e-05, "loss": 0.0063, "step": 18300 }, { "epoch": 3.26, "learning_rate": 4.836831312410842e-05, "loss": 0.008, "step": 18310 }, { "epoch": 3.27, "learning_rate": 4.836742154065621e-05, "loss": 0.0077, "step": 18320 }, { "epoch": 3.27, "learning_rate": 4.8366529957204e-05, "loss": 0.0068, "step": 18330 }, { "epoch": 3.27, "learning_rate": 4.8365638373751784e-05, "loss": 0.0068, "step": 18340 }, { "epoch": 3.27, "learning_rate": 4.8364746790299575e-05, "loss": 0.0067, "step": 18350 }, { "epoch": 3.27, "learning_rate": 4.8363855206847367e-05, "loss": 0.0069, "step": 18360 }, { "epoch": 3.28, "learning_rate": 4.836296362339515e-05, "loss": 0.0091, "step": 18370 }, { "epoch": 3.28, "learning_rate": 4.836207203994294e-05, "loss": 0.0091, "step": 18380 }, { "epoch": 3.28, "learning_rate": 4.836118045649073e-05, "loss": 0.0091, "step": 18390 }, { "epoch": 3.28, "learning_rate": 4.836028887303852e-05, "loss": 0.006, "step": 18400 }, { "epoch": 3.28, "learning_rate": 4.83593972895863e-05, "loss": 0.0077, "step": 18410 }, { "epoch": 3.28, "learning_rate": 4.8358505706134094e-05, "loss": 0.0071, "step": 18420 }, { "epoch": 3.29, "learning_rate": 4.8357614122681885e-05, "loss": 0.0068, "step": 18430 }, { "epoch": 3.29, "learning_rate": 4.8356722539229676e-05, "loss": 0.0046, "step": 18440 }, { "epoch": 3.29, "learning_rate": 4.835583095577747e-05, "loss": 0.0042, "step": 18450 }, { "epoch": 3.29, "learning_rate": 4.835493937232525e-05, "loss": 0.0063, "step": 18460 }, { "epoch": 3.29, "learning_rate": 4.835404778887304e-05, "loss": 0.0067, "step": 18470 }, { "epoch": 3.3, "learning_rate": 4.835315620542083e-05, "loss": 0.0105, "step": 18480 }, { "epoch": 3.3, "learning_rate": 4.835226462196862e-05, "loss": 0.0073, "step": 18490 }, { "epoch": 3.3, "learning_rate": 4.835137303851641e-05, "loss": 0.0085, "step": 18500 }, { "epoch": 3.3, "learning_rate": 4.8350481455064194e-05, "loss": 0.0078, "step": 18510 }, { "epoch": 3.3, "learning_rate": 4.8349589871611985e-05, "loss": 0.0069, "step": 18520 }, { "epoch": 3.3, "learning_rate": 4.834869828815977e-05, "loss": 0.0078, "step": 18530 }, { "epoch": 3.31, "learning_rate": 4.834780670470757e-05, "loss": 0.0087, "step": 18540 }, { "epoch": 3.31, "learning_rate": 4.834691512125535e-05, "loss": 0.0051, "step": 18550 }, { "epoch": 3.31, "learning_rate": 4.834602353780314e-05, "loss": 0.0072, "step": 18560 }, { "epoch": 3.31, "learning_rate": 4.834513195435093e-05, "loss": 0.0077, "step": 18570 }, { "epoch": 3.31, "learning_rate": 4.834424037089872e-05, "loss": 0.0083, "step": 18580 }, { "epoch": 3.31, "learning_rate": 4.834334878744651e-05, "loss": 0.0056, "step": 18590 }, { "epoch": 3.32, "learning_rate": 4.8342457203994295e-05, "loss": 0.0093, "step": 18600 }, { "epoch": 3.32, "learning_rate": 4.8341565620542086e-05, "loss": 0.0052, "step": 18610 }, { "epoch": 3.32, "learning_rate": 4.834067403708987e-05, "loss": 0.0063, "step": 18620 }, { "epoch": 3.32, "learning_rate": 4.833978245363766e-05, "loss": 0.0062, "step": 18630 }, { "epoch": 3.32, "learning_rate": 4.8338890870185446e-05, "loss": 0.008, "step": 18640 }, { "epoch": 3.33, "learning_rate": 4.8337999286733244e-05, "loss": 0.0113, "step": 18650 }, { "epoch": 3.33, "learning_rate": 4.833710770328103e-05, "loss": 0.0082, "step": 18660 }, { "epoch": 3.33, "learning_rate": 4.833621611982882e-05, "loss": 0.0092, "step": 18670 }, { "epoch": 3.33, "learning_rate": 4.833532453637661e-05, "loss": 0.0063, "step": 18680 }, { "epoch": 3.33, "learning_rate": 4.833452211126961e-05, "loss": 0.0104, "step": 18690 }, { "epoch": 3.33, "learning_rate": 4.833363052781741e-05, "loss": 0.0088, "step": 18700 }, { "epoch": 3.34, "learning_rate": 4.8332738944365194e-05, "loss": 0.0066, "step": 18710 }, { "epoch": 3.34, "learning_rate": 4.8331847360912986e-05, "loss": 0.0061, "step": 18720 }, { "epoch": 3.34, "learning_rate": 4.833095577746078e-05, "loss": 0.0063, "step": 18730 }, { "epoch": 3.34, "learning_rate": 4.833006419400856e-05, "loss": 0.0103, "step": 18740 }, { "epoch": 3.34, "learning_rate": 4.832917261055635e-05, "loss": 0.0065, "step": 18750 }, { "epoch": 3.35, "learning_rate": 4.832828102710414e-05, "loss": 0.0056, "step": 18760 }, { "epoch": 3.35, "learning_rate": 4.832738944365193e-05, "loss": 0.0048, "step": 18770 }, { "epoch": 3.35, "learning_rate": 4.832649786019971e-05, "loss": 0.0077, "step": 18780 }, { "epoch": 3.35, "learning_rate": 4.8325606276747504e-05, "loss": 0.0058, "step": 18790 }, { "epoch": 3.35, "learning_rate": 4.8324714693295295e-05, "loss": 0.0051, "step": 18800 }, { "epoch": 3.35, "learning_rate": 4.8323823109843086e-05, "loss": 0.0089, "step": 18810 }, { "epoch": 3.36, "learning_rate": 4.832293152639088e-05, "loss": 0.0049, "step": 18820 }, { "epoch": 3.36, "learning_rate": 4.832203994293866e-05, "loss": 0.0073, "step": 18830 }, { "epoch": 3.36, "learning_rate": 4.832114835948645e-05, "loss": 0.0046, "step": 18840 }, { "epoch": 3.36, "learning_rate": 4.832025677603424e-05, "loss": 0.0048, "step": 18850 }, { "epoch": 3.36, "learning_rate": 4.831936519258203e-05, "loss": 0.005, "step": 18860 }, { "epoch": 3.36, "learning_rate": 4.831847360912981e-05, "loss": 0.007, "step": 18870 }, { "epoch": 3.37, "learning_rate": 4.8317582025677604e-05, "loss": 0.0039, "step": 18880 }, { "epoch": 3.37, "learning_rate": 4.8316690442225396e-05, "loss": 0.0065, "step": 18890 }, { "epoch": 3.37, "learning_rate": 4.831579885877318e-05, "loss": 0.0041, "step": 18900 }, { "epoch": 3.37, "learning_rate": 4.831490727532097e-05, "loss": 0.0086, "step": 18910 }, { "epoch": 3.37, "learning_rate": 4.831401569186876e-05, "loss": 0.0072, "step": 18920 }, { "epoch": 3.38, "learning_rate": 4.8313124108416554e-05, "loss": 0.006, "step": 18930 }, { "epoch": 3.38, "learning_rate": 4.831223252496434e-05, "loss": 0.0058, "step": 18940 }, { "epoch": 3.38, "learning_rate": 4.8311430099857346e-05, "loss": 0.0083, "step": 18950 }, { "epoch": 3.38, "learning_rate": 4.831053851640514e-05, "loss": 0.0077, "step": 18960 }, { "epoch": 3.38, "learning_rate": 4.830964693295293e-05, "loss": 0.0095, "step": 18970 }, { "epoch": 3.38, "learning_rate": 4.830875534950072e-05, "loss": 0.0078, "step": 18980 }, { "epoch": 3.39, "learning_rate": 4.8307863766048504e-05, "loss": 0.0073, "step": 18990 }, { "epoch": 3.39, "learning_rate": 4.8306972182596295e-05, "loss": 0.0089, "step": 19000 }, { "epoch": 3.39, "learning_rate": 4.830608059914408e-05, "loss": 0.0066, "step": 19010 }, { "epoch": 3.39, "learning_rate": 4.830518901569187e-05, "loss": 0.0087, "step": 19020 }, { "epoch": 3.39, "learning_rate": 4.830429743223966e-05, "loss": 0.0084, "step": 19030 }, { "epoch": 3.4, "learning_rate": 4.830340584878745e-05, "loss": 0.0082, "step": 19040 }, { "epoch": 3.4, "learning_rate": 4.830251426533524e-05, "loss": 0.0086, "step": 19050 }, { "epoch": 3.4, "learning_rate": 4.830162268188302e-05, "loss": 0.0067, "step": 19060 }, { "epoch": 3.4, "learning_rate": 4.830073109843082e-05, "loss": 0.0083, "step": 19070 }, { "epoch": 3.4, "learning_rate": 4.8299839514978605e-05, "loss": 0.0075, "step": 19080 }, { "epoch": 3.4, "learning_rate": 4.8298947931526396e-05, "loss": 0.0057, "step": 19090 }, { "epoch": 3.41, "learning_rate": 4.829805634807418e-05, "loss": 0.009, "step": 19100 }, { "epoch": 3.41, "learning_rate": 4.829716476462197e-05, "loss": 0.008, "step": 19110 }, { "epoch": 3.41, "learning_rate": 4.829627318116976e-05, "loss": 0.0062, "step": 19120 }, { "epoch": 3.41, "learning_rate": 4.829538159771755e-05, "loss": 0.0054, "step": 19130 }, { "epoch": 3.41, "learning_rate": 4.829449001426534e-05, "loss": 0.0072, "step": 19140 }, { "epoch": 3.41, "learning_rate": 4.829359843081312e-05, "loss": 0.0036, "step": 19150 }, { "epoch": 3.42, "learning_rate": 4.8292706847360914e-05, "loss": 0.0049, "step": 19160 }, { "epoch": 3.42, "learning_rate": 4.82918152639087e-05, "loss": 0.0074, "step": 19170 }, { "epoch": 3.42, "learning_rate": 4.8290923680456496e-05, "loss": 0.0073, "step": 19180 }, { "epoch": 3.42, "learning_rate": 4.829003209700428e-05, "loss": 0.0073, "step": 19190 }, { "epoch": 3.42, "learning_rate": 4.828914051355207e-05, "loss": 0.0065, "step": 19200 }, { "epoch": 3.43, "learning_rate": 4.828824893009986e-05, "loss": 0.0101, "step": 19210 }, { "epoch": 3.43, "learning_rate": 4.828735734664765e-05, "loss": 0.008, "step": 19220 }, { "epoch": 3.43, "learning_rate": 4.828646576319544e-05, "loss": 0.0058, "step": 19230 }, { "epoch": 3.43, "learning_rate": 4.8285574179743223e-05, "loss": 0.0074, "step": 19240 }, { "epoch": 3.43, "learning_rate": 4.8284682596291015e-05, "loss": 0.0068, "step": 19250 }, { "epoch": 3.43, "learning_rate": 4.8283791012838806e-05, "loss": 0.007, "step": 19260 }, { "epoch": 3.44, "learning_rate": 4.828289942938659e-05, "loss": 0.0079, "step": 19270 }, { "epoch": 3.44, "learning_rate": 4.828200784593438e-05, "loss": 0.0093, "step": 19280 }, { "epoch": 3.44, "learning_rate": 4.828111626248217e-05, "loss": 0.0083, "step": 19290 }, { "epoch": 3.44, "learning_rate": 4.8280224679029964e-05, "loss": 0.0084, "step": 19300 }, { "epoch": 3.44, "learning_rate": 4.827933309557775e-05, "loss": 0.0093, "step": 19310 }, { "epoch": 3.45, "learning_rate": 4.827844151212554e-05, "loss": 0.006, "step": 19320 }, { "epoch": 3.45, "learning_rate": 4.8277549928673324e-05, "loss": 0.0056, "step": 19330 }, { "epoch": 3.45, "learning_rate": 4.8276658345221115e-05, "loss": 0.0095, "step": 19340 }, { "epoch": 3.45, "learning_rate": 4.8275766761768906e-05, "loss": 0.0053, "step": 19350 }, { "epoch": 3.45, "learning_rate": 4.827487517831669e-05, "loss": 0.0078, "step": 19360 }, { "epoch": 3.45, "learning_rate": 4.827398359486448e-05, "loss": 0.0063, "step": 19370 }, { "epoch": 3.46, "learning_rate": 4.8273092011412266e-05, "loss": 0.0095, "step": 19380 }, { "epoch": 3.46, "learning_rate": 4.827220042796006e-05, "loss": 0.0106, "step": 19390 }, { "epoch": 3.46, "learning_rate": 4.827130884450785e-05, "loss": 0.0061, "step": 19400 }, { "epoch": 3.46, "learning_rate": 4.827041726105564e-05, "loss": 0.0088, "step": 19410 }, { "epoch": 3.46, "learning_rate": 4.8269525677603425e-05, "loss": 0.0099, "step": 19420 }, { "epoch": 3.46, "learning_rate": 4.8268634094151216e-05, "loss": 0.0086, "step": 19430 }, { "epoch": 3.47, "learning_rate": 4.826774251069901e-05, "loss": 0.0056, "step": 19440 }, { "epoch": 3.47, "learning_rate": 4.826685092724679e-05, "loss": 0.0071, "step": 19450 }, { "epoch": 3.47, "learning_rate": 4.826595934379458e-05, "loss": 0.0062, "step": 19460 }, { "epoch": 3.47, "learning_rate": 4.826506776034237e-05, "loss": 0.009, "step": 19470 }, { "epoch": 3.47, "learning_rate": 4.826417617689016e-05, "loss": 0.0077, "step": 19480 }, { "epoch": 3.48, "learning_rate": 4.826328459343795e-05, "loss": 0.009, "step": 19490 }, { "epoch": 3.48, "learning_rate": 4.8262393009985734e-05, "loss": 0.0065, "step": 19500 }, { "epoch": 3.48, "learning_rate": 4.826150142653353e-05, "loss": 0.0055, "step": 19510 }, { "epoch": 3.48, "learning_rate": 4.8260609843081316e-05, "loss": 0.0057, "step": 19520 }, { "epoch": 3.48, "learning_rate": 4.825971825962911e-05, "loss": 0.0055, "step": 19530 }, { "epoch": 3.48, "learning_rate": 4.825882667617689e-05, "loss": 0.0067, "step": 19540 }, { "epoch": 3.49, "learning_rate": 4.825793509272468e-05, "loss": 0.0065, "step": 19550 }, { "epoch": 3.49, "learning_rate": 4.825704350927247e-05, "loss": 0.0084, "step": 19560 }, { "epoch": 3.49, "learning_rate": 4.825615192582026e-05, "loss": 0.0067, "step": 19570 }, { "epoch": 3.49, "learning_rate": 4.825526034236805e-05, "loss": 0.0066, "step": 19580 }, { "epoch": 3.49, "learning_rate": 4.8254368758915834e-05, "loss": 0.0064, "step": 19590 }, { "epoch": 3.5, "learning_rate": 4.8253477175463626e-05, "loss": 0.0077, "step": 19600 }, { "epoch": 3.5, "learning_rate": 4.825258559201141e-05, "loss": 0.0064, "step": 19610 }, { "epoch": 3.5, "learning_rate": 4.825169400855921e-05, "loss": 0.0081, "step": 19620 }, { "epoch": 3.5, "learning_rate": 4.825080242510699e-05, "loss": 0.0072, "step": 19630 }, { "epoch": 3.5, "learning_rate": 4.8249910841654784e-05, "loss": 0.0076, "step": 19640 }, { "epoch": 3.5, "learning_rate": 4.824901925820257e-05, "loss": 0.0086, "step": 19650 }, { "epoch": 3.51, "learning_rate": 4.824812767475036e-05, "loss": 0.0082, "step": 19660 }, { "epoch": 3.51, "learning_rate": 4.824723609129815e-05, "loss": 0.0101, "step": 19670 }, { "epoch": 3.51, "learning_rate": 4.8246344507845935e-05, "loss": 0.009, "step": 19680 }, { "epoch": 3.51, "learning_rate": 4.8245452924393726e-05, "loss": 0.008, "step": 19690 }, { "epoch": 3.51, "learning_rate": 4.824456134094151e-05, "loss": 0.0106, "step": 19700 }, { "epoch": 3.51, "learning_rate": 4.82436697574893e-05, "loss": 0.0088, "step": 19710 }, { "epoch": 3.52, "learning_rate": 4.824277817403709e-05, "loss": 0.0067, "step": 19720 }, { "epoch": 3.52, "learning_rate": 4.8241886590584884e-05, "loss": 0.0059, "step": 19730 }, { "epoch": 3.52, "learning_rate": 4.8240995007132675e-05, "loss": 0.0053, "step": 19740 }, { "epoch": 3.52, "learning_rate": 4.824010342368046e-05, "loss": 0.0079, "step": 19750 }, { "epoch": 3.52, "learning_rate": 4.823921184022825e-05, "loss": 0.0056, "step": 19760 }, { "epoch": 3.53, "learning_rate": 4.8238320256776036e-05, "loss": 0.0102, "step": 19770 }, { "epoch": 3.53, "learning_rate": 4.823742867332383e-05, "loss": 0.007, "step": 19780 }, { "epoch": 3.53, "learning_rate": 4.823653708987161e-05, "loss": 0.0053, "step": 19790 }, { "epoch": 3.53, "learning_rate": 4.82356455064194e-05, "loss": 0.0076, "step": 19800 }, { "epoch": 3.53, "learning_rate": 4.8234753922967194e-05, "loss": 0.0071, "step": 19810 }, { "epoch": 3.53, "learning_rate": 4.823386233951498e-05, "loss": 0.0044, "step": 19820 }, { "epoch": 3.54, "learning_rate": 4.823297075606277e-05, "loss": 0.0065, "step": 19830 }, { "epoch": 3.54, "learning_rate": 4.823207917261056e-05, "loss": 0.0074, "step": 19840 }, { "epoch": 3.54, "learning_rate": 4.823118758915835e-05, "loss": 0.0093, "step": 19850 }, { "epoch": 3.54, "learning_rate": 4.8230296005706136e-05, "loss": 0.0092, "step": 19860 }, { "epoch": 3.54, "learning_rate": 4.822940442225393e-05, "loss": 0.0066, "step": 19870 }, { "epoch": 3.54, "learning_rate": 4.822851283880171e-05, "loss": 0.0104, "step": 19880 }, { "epoch": 3.55, "learning_rate": 4.82276212553495e-05, "loss": 0.0088, "step": 19890 }, { "epoch": 3.55, "learning_rate": 4.8226729671897294e-05, "loss": 0.0101, "step": 19900 }, { "epoch": 3.55, "learning_rate": 4.822583808844508e-05, "loss": 0.0098, "step": 19910 }, { "epoch": 3.55, "learning_rate": 4.822494650499287e-05, "loss": 0.0082, "step": 19920 }, { "epoch": 3.55, "learning_rate": 4.8224054921540654e-05, "loss": 0.0062, "step": 19930 }, { "epoch": 3.56, "learning_rate": 4.8223163338088445e-05, "loss": 0.007, "step": 19940 }, { "epoch": 3.56, "learning_rate": 4.822227175463624e-05, "loss": 0.0073, "step": 19950 }, { "epoch": 3.56, "learning_rate": 4.822138017118403e-05, "loss": 0.0069, "step": 19960 }, { "epoch": 3.56, "learning_rate": 4.822048858773182e-05, "loss": 0.0056, "step": 19970 }, { "epoch": 3.56, "learning_rate": 4.8219597004279604e-05, "loss": 0.0085, "step": 19980 }, { "epoch": 3.56, "learning_rate": 4.8218705420827395e-05, "loss": 0.0056, "step": 19990 }, { "epoch": 3.57, "learning_rate": 4.821781383737518e-05, "loss": 0.004, "step": 20000 }, { "epoch": 3.57, "learning_rate": 4.821692225392297e-05, "loss": 0.0043, "step": 20010 }, { "epoch": 3.57, "learning_rate": 4.8216030670470755e-05, "loss": 0.0077, "step": 20020 }, { "epoch": 3.57, "learning_rate": 4.8215139087018546e-05, "loss": 0.0079, "step": 20030 }, { "epoch": 3.57, "learning_rate": 4.821424750356634e-05, "loss": 0.0033, "step": 20040 }, { "epoch": 3.58, "learning_rate": 4.821335592011412e-05, "loss": 0.0071, "step": 20050 }, { "epoch": 3.58, "learning_rate": 4.821246433666191e-05, "loss": 0.0097, "step": 20060 }, { "epoch": 3.58, "learning_rate": 4.8211572753209704e-05, "loss": 0.0069, "step": 20070 }, { "epoch": 3.58, "learning_rate": 4.8210681169757495e-05, "loss": 0.0081, "step": 20080 }, { "epoch": 3.58, "learning_rate": 4.820978958630528e-05, "loss": 0.006, "step": 20090 }, { "epoch": 3.58, "learning_rate": 4.820889800285307e-05, "loss": 0.0097, "step": 20100 }, { "epoch": 3.59, "learning_rate": 4.8208006419400855e-05, "loss": 0.0066, "step": 20110 }, { "epoch": 3.59, "learning_rate": 4.8207114835948647e-05, "loss": 0.006, "step": 20120 }, { "epoch": 3.59, "learning_rate": 4.820622325249644e-05, "loss": 0.0082, "step": 20130 }, { "epoch": 3.59, "learning_rate": 4.820533166904422e-05, "loss": 0.0083, "step": 20140 }, { "epoch": 3.59, "learning_rate": 4.8204440085592013e-05, "loss": 0.0052, "step": 20150 }, { "epoch": 3.59, "learning_rate": 4.82035485021398e-05, "loss": 0.0086, "step": 20160 }, { "epoch": 3.6, "learning_rate": 4.820265691868759e-05, "loss": 0.0048, "step": 20170 }, { "epoch": 3.6, "learning_rate": 4.820176533523538e-05, "loss": 0.0051, "step": 20180 }, { "epoch": 3.6, "learning_rate": 4.820087375178317e-05, "loss": 0.0052, "step": 20190 }, { "epoch": 3.6, "learning_rate": 4.819998216833096e-05, "loss": 0.0072, "step": 20200 }, { "epoch": 3.6, "learning_rate": 4.819909058487875e-05, "loss": 0.0056, "step": 20210 }, { "epoch": 3.61, "learning_rate": 4.819819900142654e-05, "loss": 0.0063, "step": 20220 }, { "epoch": 3.61, "learning_rate": 4.819730741797432e-05, "loss": 0.0093, "step": 20230 }, { "epoch": 3.61, "learning_rate": 4.8196415834522114e-05, "loss": 0.0069, "step": 20240 }, { "epoch": 3.61, "learning_rate": 4.81955242510699e-05, "loss": 0.0049, "step": 20250 }, { "epoch": 3.61, "learning_rate": 4.819463266761769e-05, "loss": 0.0054, "step": 20260 }, { "epoch": 3.61, "learning_rate": 4.819374108416548e-05, "loss": 0.0101, "step": 20270 }, { "epoch": 3.62, "learning_rate": 4.8192849500713265e-05, "loss": 0.0144, "step": 20280 }, { "epoch": 3.62, "learning_rate": 4.819195791726106e-05, "loss": 0.0101, "step": 20290 }, { "epoch": 3.62, "learning_rate": 4.819106633380885e-05, "loss": 0.0078, "step": 20300 }, { "epoch": 3.62, "learning_rate": 4.819017475035664e-05, "loss": 0.0043, "step": 20310 }, { "epoch": 3.62, "learning_rate": 4.818928316690442e-05, "loss": 0.0063, "step": 20320 }, { "epoch": 3.63, "learning_rate": 4.8188391583452215e-05, "loss": 0.0048, "step": 20330 }, { "epoch": 3.63, "learning_rate": 4.81875e-05, "loss": 0.0105, "step": 20340 }, { "epoch": 3.63, "learning_rate": 4.818660841654779e-05, "loss": 0.0057, "step": 20350 }, { "epoch": 3.63, "learning_rate": 4.818571683309558e-05, "loss": 0.0077, "step": 20360 }, { "epoch": 3.63, "learning_rate": 4.8184825249643366e-05, "loss": 0.0087, "step": 20370 }, { "epoch": 3.63, "learning_rate": 4.818393366619116e-05, "loss": 0.0066, "step": 20380 }, { "epoch": 3.64, "learning_rate": 4.818304208273894e-05, "loss": 0.0054, "step": 20390 }, { "epoch": 3.64, "learning_rate": 4.818215049928674e-05, "loss": 0.005, "step": 20400 }, { "epoch": 3.64, "learning_rate": 4.8181258915834524e-05, "loss": 0.0083, "step": 20410 }, { "epoch": 3.64, "learning_rate": 4.8180367332382315e-05, "loss": 0.0068, "step": 20420 }, { "epoch": 3.64, "learning_rate": 4.8179475748930106e-05, "loss": 0.0044, "step": 20430 }, { "epoch": 3.64, "learning_rate": 4.817858416547789e-05, "loss": 0.009, "step": 20440 }, { "epoch": 3.65, "learning_rate": 4.817769258202568e-05, "loss": 0.0049, "step": 20450 }, { "epoch": 3.65, "learning_rate": 4.8176800998573466e-05, "loss": 0.0077, "step": 20460 }, { "epoch": 3.65, "learning_rate": 4.817590941512126e-05, "loss": 0.0057, "step": 20470 }, { "epoch": 3.65, "learning_rate": 4.817501783166904e-05, "loss": 0.0077, "step": 20480 }, { "epoch": 3.65, "learning_rate": 4.817412624821683e-05, "loss": 0.0054, "step": 20490 }, { "epoch": 3.66, "learning_rate": 4.8173234664764624e-05, "loss": 0.0074, "step": 20500 }, { "epoch": 3.66, "learning_rate": 4.8172343081312416e-05, "loss": 0.0071, "step": 20510 }, { "epoch": 3.66, "learning_rate": 4.817145149786021e-05, "loss": 0.0046, "step": 20520 }, { "epoch": 3.66, "learning_rate": 4.817055991440799e-05, "loss": 0.0093, "step": 20530 }, { "epoch": 3.66, "learning_rate": 4.816966833095578e-05, "loss": 0.0063, "step": 20540 }, { "epoch": 3.66, "learning_rate": 4.816877674750357e-05, "loss": 0.0053, "step": 20550 }, { "epoch": 3.67, "learning_rate": 4.816788516405136e-05, "loss": 0.0105, "step": 20560 }, { "epoch": 3.67, "learning_rate": 4.816699358059914e-05, "loss": 0.0093, "step": 20570 }, { "epoch": 3.67, "learning_rate": 4.8166101997146934e-05, "loss": 0.0059, "step": 20580 }, { "epoch": 3.67, "learning_rate": 4.8165210413694725e-05, "loss": 0.0081, "step": 20590 }, { "epoch": 3.67, "learning_rate": 4.816431883024251e-05, "loss": 0.0051, "step": 20600 }, { "epoch": 3.68, "learning_rate": 4.81634272467903e-05, "loss": 0.009, "step": 20610 }, { "epoch": 3.68, "learning_rate": 4.816253566333809e-05, "loss": 0.0063, "step": 20620 }, { "epoch": 3.68, "learning_rate": 4.816164407988588e-05, "loss": 0.0049, "step": 20630 }, { "epoch": 3.68, "learning_rate": 4.816075249643367e-05, "loss": 0.0048, "step": 20640 }, { "epoch": 3.68, "learning_rate": 4.815986091298146e-05, "loss": 0.0127, "step": 20650 }, { "epoch": 3.68, "learning_rate": 4.815896932952924e-05, "loss": 0.007, "step": 20660 }, { "epoch": 3.69, "learning_rate": 4.8158077746077034e-05, "loss": 0.0055, "step": 20670 }, { "epoch": 3.69, "learning_rate": 4.8157186162624826e-05, "loss": 0.0069, "step": 20680 }, { "epoch": 3.69, "learning_rate": 4.815629457917261e-05, "loss": 0.0066, "step": 20690 }, { "epoch": 3.69, "learning_rate": 4.81554029957204e-05, "loss": 0.008, "step": 20700 }, { "epoch": 3.69, "learning_rate": 4.8154511412268186e-05, "loss": 0.0089, "step": 20710 }, { "epoch": 3.69, "learning_rate": 4.815361982881598e-05, "loss": 0.0077, "step": 20720 }, { "epoch": 3.7, "learning_rate": 4.815272824536377e-05, "loss": 0.0076, "step": 20730 }, { "epoch": 3.7, "learning_rate": 4.815183666191156e-05, "loss": 0.0076, "step": 20740 }, { "epoch": 3.7, "learning_rate": 4.815094507845935e-05, "loss": 0.0028, "step": 20750 }, { "epoch": 3.7, "learning_rate": 4.8150053495007135e-05, "loss": 0.0061, "step": 20760 }, { "epoch": 3.7, "learning_rate": 4.8149161911554926e-05, "loss": 0.006, "step": 20770 }, { "epoch": 3.71, "learning_rate": 4.814827032810271e-05, "loss": 0.0076, "step": 20780 }, { "epoch": 3.71, "learning_rate": 4.81473787446505e-05, "loss": 0.004, "step": 20790 }, { "epoch": 3.71, "learning_rate": 4.8146487161198286e-05, "loss": 0.0056, "step": 20800 }, { "epoch": 3.71, "learning_rate": 4.814559557774608e-05, "loss": 0.0046, "step": 20810 }, { "epoch": 3.71, "learning_rate": 4.814470399429387e-05, "loss": 0.0089, "step": 20820 }, { "epoch": 3.71, "learning_rate": 4.814381241084165e-05, "loss": 0.0047, "step": 20830 }, { "epoch": 3.72, "learning_rate": 4.814292082738945e-05, "loss": 0.0063, "step": 20840 }, { "epoch": 3.72, "learning_rate": 4.8142029243937236e-05, "loss": 0.0043, "step": 20850 }, { "epoch": 3.72, "learning_rate": 4.814113766048503e-05, "loss": 0.01, "step": 20860 }, { "epoch": 3.72, "learning_rate": 4.814024607703281e-05, "loss": 0.0073, "step": 20870 }, { "epoch": 3.72, "learning_rate": 4.81393544935806e-05, "loss": 0.0053, "step": 20880 }, { "epoch": 3.73, "learning_rate": 4.813846291012839e-05, "loss": 0.0071, "step": 20890 }, { "epoch": 3.73, "learning_rate": 4.813757132667618e-05, "loss": 0.0067, "step": 20900 }, { "epoch": 3.73, "learning_rate": 4.813667974322397e-05, "loss": 0.0069, "step": 20910 }, { "epoch": 3.73, "learning_rate": 4.8135788159771754e-05, "loss": 0.0061, "step": 20920 }, { "epoch": 3.73, "learning_rate": 4.8134896576319545e-05, "loss": 0.0065, "step": 20930 }, { "epoch": 3.73, "learning_rate": 4.813400499286733e-05, "loss": 0.0073, "step": 20940 }, { "epoch": 3.74, "learning_rate": 4.813311340941513e-05, "loss": 0.008, "step": 20950 }, { "epoch": 3.74, "learning_rate": 4.813222182596291e-05, "loss": 0.0098, "step": 20960 }, { "epoch": 3.74, "learning_rate": 4.81313302425107e-05, "loss": 0.0092, "step": 20970 }, { "epoch": 3.74, "learning_rate": 4.8130438659058494e-05, "loss": 0.0059, "step": 20980 }, { "epoch": 3.74, "learning_rate": 4.812954707560628e-05, "loss": 0.005, "step": 20990 }, { "epoch": 3.74, "learning_rate": 4.812865549215407e-05, "loss": 0.009, "step": 21000 }, { "epoch": 3.75, "learning_rate": 4.8127763908701854e-05, "loss": 0.0059, "step": 21010 }, { "epoch": 3.75, "learning_rate": 4.8126872325249645e-05, "loss": 0.0079, "step": 21020 }, { "epoch": 3.75, "learning_rate": 4.812598074179743e-05, "loss": 0.0088, "step": 21030 }, { "epoch": 3.75, "learning_rate": 4.812508915834522e-05, "loss": 0.0093, "step": 21040 }, { "epoch": 3.75, "learning_rate": 4.812419757489301e-05, "loss": 0.0082, "step": 21050 }, { "epoch": 3.76, "learning_rate": 4.8123305991440803e-05, "loss": 0.0064, "step": 21060 }, { "epoch": 3.76, "learning_rate": 4.8122414407988595e-05, "loss": 0.0079, "step": 21070 }, { "epoch": 3.76, "learning_rate": 4.812152282453638e-05, "loss": 0.0077, "step": 21080 }, { "epoch": 3.76, "learning_rate": 4.812063124108417e-05, "loss": 0.0085, "step": 21090 }, { "epoch": 3.76, "learning_rate": 4.8119739657631955e-05, "loss": 0.0095, "step": 21100 }, { "epoch": 3.76, "learning_rate": 4.8118848074179746e-05, "loss": 0.0061, "step": 21110 }, { "epoch": 3.77, "learning_rate": 4.811795649072753e-05, "loss": 0.0067, "step": 21120 }, { "epoch": 3.77, "learning_rate": 4.811706490727532e-05, "loss": 0.0099, "step": 21130 }, { "epoch": 3.77, "learning_rate": 4.811617332382311e-05, "loss": 0.0076, "step": 21140 }, { "epoch": 3.77, "learning_rate": 4.81152817403709e-05, "loss": 0.008, "step": 21150 }, { "epoch": 3.77, "learning_rate": 4.811439015691869e-05, "loss": 0.0064, "step": 21160 }, { "epoch": 3.77, "learning_rate": 4.811349857346648e-05, "loss": 0.0078, "step": 21170 }, { "epoch": 3.78, "learning_rate": 4.811260699001427e-05, "loss": 0.0073, "step": 21180 }, { "epoch": 3.78, "learning_rate": 4.8111715406562055e-05, "loss": 0.0084, "step": 21190 }, { "epoch": 3.78, "learning_rate": 4.8110823823109847e-05, "loss": 0.0054, "step": 21200 }, { "epoch": 3.78, "learning_rate": 4.810993223965764e-05, "loss": 0.0066, "step": 21210 }, { "epoch": 3.78, "learning_rate": 4.810904065620542e-05, "loss": 0.0072, "step": 21220 }, { "epoch": 3.79, "learning_rate": 4.8108149072753213e-05, "loss": 0.0058, "step": 21230 }, { "epoch": 3.79, "learning_rate": 4.8107257489301e-05, "loss": 0.0065, "step": 21240 }, { "epoch": 3.79, "learning_rate": 4.810636590584879e-05, "loss": 0.0122, "step": 21250 }, { "epoch": 3.79, "learning_rate": 4.8105474322396573e-05, "loss": 0.005, "step": 21260 }, { "epoch": 3.79, "learning_rate": 4.8104582738944365e-05, "loss": 0.0064, "step": 21270 }, { "epoch": 3.79, "learning_rate": 4.8103691155492156e-05, "loss": 0.0094, "step": 21280 }, { "epoch": 3.8, "learning_rate": 4.810279957203995e-05, "loss": 0.0067, "step": 21290 }, { "epoch": 3.8, "learning_rate": 4.810190798858774e-05, "loss": 0.006, "step": 21300 }, { "epoch": 3.8, "learning_rate": 4.810101640513552e-05, "loss": 0.0076, "step": 21310 }, { "epoch": 3.8, "learning_rate": 4.8100124821683314e-05, "loss": 0.0067, "step": 21320 }, { "epoch": 3.8, "learning_rate": 4.80992332382311e-05, "loss": 0.008, "step": 21330 }, { "epoch": 3.81, "learning_rate": 4.809834165477889e-05, "loss": 0.0058, "step": 21340 }, { "epoch": 3.81, "learning_rate": 4.8097450071326674e-05, "loss": 0.0078, "step": 21350 }, { "epoch": 3.81, "learning_rate": 4.8096558487874465e-05, "loss": 0.0072, "step": 21360 }, { "epoch": 3.81, "learning_rate": 4.8095666904422256e-05, "loss": 0.0111, "step": 21370 }, { "epoch": 3.81, "learning_rate": 4.809477532097004e-05, "loss": 0.0104, "step": 21380 }, { "epoch": 3.81, "learning_rate": 4.809388373751784e-05, "loss": 0.0076, "step": 21390 }, { "epoch": 3.82, "learning_rate": 4.809299215406562e-05, "loss": 0.0083, "step": 21400 }, { "epoch": 3.82, "learning_rate": 4.8092100570613415e-05, "loss": 0.0063, "step": 21410 }, { "epoch": 3.82, "learning_rate": 4.80912089871612e-05, "loss": 0.0065, "step": 21420 }, { "epoch": 3.82, "learning_rate": 4.809031740370899e-05, "loss": 0.0104, "step": 21430 }, { "epoch": 3.82, "learning_rate": 4.808942582025678e-05, "loss": 0.0065, "step": 21440 }, { "epoch": 3.82, "learning_rate": 4.8088534236804566e-05, "loss": 0.0069, "step": 21450 }, { "epoch": 3.83, "learning_rate": 4.808764265335236e-05, "loss": 0.0086, "step": 21460 }, { "epoch": 3.83, "learning_rate": 4.808675106990014e-05, "loss": 0.0087, "step": 21470 }, { "epoch": 3.83, "learning_rate": 4.808585948644793e-05, "loss": 0.0052, "step": 21480 }, { "epoch": 3.83, "learning_rate": 4.808496790299572e-05, "loss": 0.009, "step": 21490 }, { "epoch": 3.83, "learning_rate": 4.8084076319543515e-05, "loss": 0.0054, "step": 21500 }, { "epoch": 3.84, "learning_rate": 4.80831847360913e-05, "loss": 0.0048, "step": 21510 }, { "epoch": 3.84, "learning_rate": 4.808229315263909e-05, "loss": 0.0076, "step": 21520 }, { "epoch": 3.84, "learning_rate": 4.808140156918688e-05, "loss": 0.0054, "step": 21530 }, { "epoch": 3.84, "learning_rate": 4.8080509985734666e-05, "loss": 0.0051, "step": 21540 }, { "epoch": 3.84, "learning_rate": 4.807961840228246e-05, "loss": 0.012, "step": 21550 }, { "epoch": 3.84, "learning_rate": 4.807872681883024e-05, "loss": 0.0093, "step": 21560 }, { "epoch": 3.85, "learning_rate": 4.807783523537803e-05, "loss": 0.0078, "step": 21570 }, { "epoch": 3.85, "learning_rate": 4.807694365192582e-05, "loss": 0.0129, "step": 21580 }, { "epoch": 3.85, "learning_rate": 4.807605206847361e-05, "loss": 0.0105, "step": 21590 }, { "epoch": 3.85, "learning_rate": 4.80751604850214e-05, "loss": 0.0106, "step": 21600 }, { "epoch": 3.85, "learning_rate": 4.807426890156919e-05, "loss": 0.0066, "step": 21610 }, { "epoch": 3.86, "learning_rate": 4.807337731811698e-05, "loss": 0.0067, "step": 21620 }, { "epoch": 3.86, "learning_rate": 4.807248573466477e-05, "loss": 0.0066, "step": 21630 }, { "epoch": 3.86, "learning_rate": 4.807159415121256e-05, "loss": 0.0082, "step": 21640 }, { "epoch": 3.86, "learning_rate": 4.807070256776034e-05, "loss": 0.006, "step": 21650 }, { "epoch": 3.86, "learning_rate": 4.8069810984308134e-05, "loss": 0.0088, "step": 21660 }, { "epoch": 3.86, "learning_rate": 4.8068919400855925e-05, "loss": 0.0053, "step": 21670 }, { "epoch": 3.87, "learning_rate": 4.806802781740371e-05, "loss": 0.0079, "step": 21680 }, { "epoch": 3.87, "learning_rate": 4.80671362339515e-05, "loss": 0.0059, "step": 21690 }, { "epoch": 3.87, "learning_rate": 4.8066244650499285e-05, "loss": 0.0088, "step": 21700 }, { "epoch": 3.87, "learning_rate": 4.8065353067047076e-05, "loss": 0.0067, "step": 21710 }, { "epoch": 3.87, "learning_rate": 4.806446148359487e-05, "loss": 0.0101, "step": 21720 }, { "epoch": 3.87, "learning_rate": 4.806356990014266e-05, "loss": 0.0089, "step": 21730 }, { "epoch": 3.88, "learning_rate": 4.806267831669044e-05, "loss": 0.0056, "step": 21740 }, { "epoch": 3.88, "learning_rate": 4.8061786733238234e-05, "loss": 0.0105, "step": 21750 }, { "epoch": 3.88, "learning_rate": 4.8060895149786026e-05, "loss": 0.0076, "step": 21760 }, { "epoch": 3.88, "learning_rate": 4.806000356633381e-05, "loss": 0.006, "step": 21770 }, { "epoch": 3.88, "learning_rate": 4.80591119828816e-05, "loss": 0.0071, "step": 21780 }, { "epoch": 3.89, "learning_rate": 4.8058220399429386e-05, "loss": 0.0075, "step": 21790 }, { "epoch": 3.89, "learning_rate": 4.805732881597718e-05, "loss": 0.0091, "step": 21800 }, { "epoch": 3.89, "learning_rate": 4.805643723252496e-05, "loss": 0.0058, "step": 21810 }, { "epoch": 3.89, "learning_rate": 4.805554564907275e-05, "loss": 0.0066, "step": 21820 }, { "epoch": 3.89, "learning_rate": 4.8054654065620544e-05, "loss": 0.0106, "step": 21830 }, { "epoch": 3.89, "learning_rate": 4.8053762482168335e-05, "loss": 0.0035, "step": 21840 }, { "epoch": 3.9, "learning_rate": 4.8052870898716126e-05, "loss": 0.0068, "step": 21850 }, { "epoch": 3.9, "learning_rate": 4.805197931526391e-05, "loss": 0.0056, "step": 21860 }, { "epoch": 3.9, "learning_rate": 4.80510877318117e-05, "loss": 0.0085, "step": 21870 }, { "epoch": 3.9, "learning_rate": 4.8050196148359486e-05, "loss": 0.0104, "step": 21880 }, { "epoch": 3.9, "learning_rate": 4.804930456490728e-05, "loss": 0.0125, "step": 21890 }, { "epoch": 3.91, "learning_rate": 4.804841298145507e-05, "loss": 0.0058, "step": 21900 }, { "epoch": 3.91, "learning_rate": 4.804752139800285e-05, "loss": 0.0071, "step": 21910 }, { "epoch": 3.91, "learning_rate": 4.8046629814550644e-05, "loss": 0.005, "step": 21920 }, { "epoch": 3.91, "learning_rate": 4.804573823109843e-05, "loss": 0.0077, "step": 21930 }, { "epoch": 3.91, "learning_rate": 4.804484664764623e-05, "loss": 0.0085, "step": 21940 }, { "epoch": 3.91, "learning_rate": 4.804395506419401e-05, "loss": 0.0054, "step": 21950 }, { "epoch": 3.92, "learning_rate": 4.80430634807418e-05, "loss": 0.0101, "step": 21960 }, { "epoch": 3.92, "learning_rate": 4.804217189728959e-05, "loss": 0.0088, "step": 21970 }, { "epoch": 3.92, "learning_rate": 4.804128031383738e-05, "loss": 0.0087, "step": 21980 }, { "epoch": 3.92, "learning_rate": 4.804038873038517e-05, "loss": 0.005, "step": 21990 }, { "epoch": 3.92, "learning_rate": 4.8039497146932954e-05, "loss": 0.0083, "step": 22000 }, { "epoch": 3.92, "learning_rate": 4.8038605563480745e-05, "loss": 0.0096, "step": 22010 }, { "epoch": 3.93, "learning_rate": 4.803771398002853e-05, "loss": 0.006, "step": 22020 }, { "epoch": 3.93, "learning_rate": 4.803682239657632e-05, "loss": 0.0049, "step": 22030 }, { "epoch": 3.93, "learning_rate": 4.8035930813124105e-05, "loss": 0.0087, "step": 22040 }, { "epoch": 3.93, "learning_rate": 4.80350392296719e-05, "loss": 0.0087, "step": 22050 }, { "epoch": 3.93, "learning_rate": 4.803414764621969e-05, "loss": 0.0074, "step": 22060 }, { "epoch": 3.94, "learning_rate": 4.8033345221112695e-05, "loss": 0.0077, "step": 22070 }, { "epoch": 3.94, "learning_rate": 4.8032453637660487e-05, "loss": 0.0091, "step": 22080 }, { "epoch": 3.94, "learning_rate": 4.803156205420828e-05, "loss": 0.0064, "step": 22090 }, { "epoch": 3.94, "learning_rate": 4.803067047075607e-05, "loss": 0.0131, "step": 22100 }, { "epoch": 3.94, "learning_rate": 4.8029778887303853e-05, "loss": 0.0069, "step": 22110 }, { "epoch": 3.94, "learning_rate": 4.8028887303851645e-05, "loss": 0.0071, "step": 22120 }, { "epoch": 3.95, "learning_rate": 4.802799572039943e-05, "loss": 0.0105, "step": 22130 }, { "epoch": 3.95, "learning_rate": 4.802710413694722e-05, "loss": 0.0071, "step": 22140 }, { "epoch": 3.95, "learning_rate": 4.802621255349501e-05, "loss": 0.0082, "step": 22150 }, { "epoch": 3.95, "learning_rate": 4.8025320970042796e-05, "loss": 0.0056, "step": 22160 }, { "epoch": 3.95, "learning_rate": 4.802442938659059e-05, "loss": 0.005, "step": 22170 }, { "epoch": 3.96, "learning_rate": 4.802353780313837e-05, "loss": 0.0085, "step": 22180 }, { "epoch": 3.96, "learning_rate": 4.802264621968616e-05, "loss": 0.0045, "step": 22190 }, { "epoch": 3.96, "learning_rate": 4.8021754636233954e-05, "loss": 0.0079, "step": 22200 }, { "epoch": 3.96, "learning_rate": 4.8020863052781745e-05, "loss": 0.006, "step": 22210 }, { "epoch": 3.96, "learning_rate": 4.8019971469329536e-05, "loss": 0.0049, "step": 22220 }, { "epoch": 3.96, "learning_rate": 4.801907988587732e-05, "loss": 0.0054, "step": 22230 }, { "epoch": 3.97, "learning_rate": 4.801818830242511e-05, "loss": 0.0049, "step": 22240 }, { "epoch": 3.97, "learning_rate": 4.8017296718972896e-05, "loss": 0.0112, "step": 22250 }, { "epoch": 3.97, "learning_rate": 4.801640513552069e-05, "loss": 0.0061, "step": 22260 }, { "epoch": 3.97, "learning_rate": 4.801551355206847e-05, "loss": 0.0059, "step": 22270 }, { "epoch": 3.97, "learning_rate": 4.801462196861626e-05, "loss": 0.0073, "step": 22280 }, { "epoch": 3.97, "learning_rate": 4.8013730385164055e-05, "loss": 0.0042, "step": 22290 }, { "epoch": 3.98, "learning_rate": 4.801283880171184e-05, "loss": 0.0072, "step": 22300 }, { "epoch": 3.98, "learning_rate": 4.801194721825964e-05, "loss": 0.006, "step": 22310 }, { "epoch": 3.98, "learning_rate": 4.801105563480742e-05, "loss": 0.0071, "step": 22320 }, { "epoch": 3.98, "learning_rate": 4.801016405135521e-05, "loss": 0.0083, "step": 22330 }, { "epoch": 3.98, "learning_rate": 4.8009272467903e-05, "loss": 0.0089, "step": 22340 }, { "epoch": 3.99, "learning_rate": 4.800838088445079e-05, "loss": 0.0078, "step": 22350 }, { "epoch": 3.99, "learning_rate": 4.800748930099857e-05, "loss": 0.0067, "step": 22360 }, { "epoch": 3.99, "learning_rate": 4.8006597717546364e-05, "loss": 0.0067, "step": 22370 }, { "epoch": 3.99, "learning_rate": 4.8005706134094155e-05, "loss": 0.0077, "step": 22380 }, { "epoch": 3.99, "learning_rate": 4.800481455064194e-05, "loss": 0.0047, "step": 22390 }, { "epoch": 3.99, "learning_rate": 4.800392296718973e-05, "loss": 0.0067, "step": 22400 }, { "epoch": 4.0, "learning_rate": 4.8003031383737515e-05, "loss": 0.0092, "step": 22410 }, { "epoch": 4.0, "learning_rate": 4.800213980028531e-05, "loss": 0.007, "step": 22420 }, { "epoch": 4.0, "learning_rate": 4.80012482168331e-05, "loss": 0.0062, "step": 22430 }, { "epoch": 4.0, "eval_loss": 0.014736342243850231, "eval_runtime": 195.8169, "eval_samples_per_second": 23.69, "eval_steps_per_second": 2.962, "step": 22432 }, { "epoch": 4.0, "learning_rate": 4.800035663338089e-05, "loss": 0.0056, "step": 22440 }, { "epoch": 4.0, "learning_rate": 4.799946504992868e-05, "loss": 0.0065, "step": 22450 }, { "epoch": 4.0, "learning_rate": 4.7998573466476464e-05, "loss": 0.004, "step": 22460 }, { "epoch": 4.01, "learning_rate": 4.7997681883024256e-05, "loss": 0.0041, "step": 22470 }, { "epoch": 4.01, "learning_rate": 4.799679029957204e-05, "loss": 0.005, "step": 22480 }, { "epoch": 4.01, "learning_rate": 4.799589871611983e-05, "loss": 0.0067, "step": 22490 }, { "epoch": 4.01, "learning_rate": 4.7995007132667616e-05, "loss": 0.0068, "step": 22500 }, { "epoch": 4.01, "learning_rate": 4.799411554921541e-05, "loss": 0.0066, "step": 22510 }, { "epoch": 4.02, "learning_rate": 4.79932239657632e-05, "loss": 0.008, "step": 22520 }, { "epoch": 4.02, "learning_rate": 4.799233238231099e-05, "loss": 0.0057, "step": 22530 }, { "epoch": 4.02, "learning_rate": 4.799144079885878e-05, "loss": 0.0062, "step": 22540 }, { "epoch": 4.02, "learning_rate": 4.7990549215406565e-05, "loss": 0.0073, "step": 22550 }, { "epoch": 4.02, "learning_rate": 4.7989657631954356e-05, "loss": 0.0072, "step": 22560 }, { "epoch": 4.02, "learning_rate": 4.798876604850214e-05, "loss": 0.0083, "step": 22570 }, { "epoch": 4.03, "learning_rate": 4.798787446504993e-05, "loss": 0.0085, "step": 22580 }, { "epoch": 4.03, "learning_rate": 4.7986982881597716e-05, "loss": 0.0051, "step": 22590 }, { "epoch": 4.03, "learning_rate": 4.798609129814551e-05, "loss": 0.0104, "step": 22600 }, { "epoch": 4.03, "learning_rate": 4.79851997146933e-05, "loss": 0.0107, "step": 22610 }, { "epoch": 4.03, "learning_rate": 4.798430813124108e-05, "loss": 0.0077, "step": 22620 }, { "epoch": 4.04, "learning_rate": 4.7983416547788874e-05, "loss": 0.0081, "step": 22630 }, { "epoch": 4.04, "learning_rate": 4.798252496433666e-05, "loss": 0.0061, "step": 22640 }, { "epoch": 4.04, "learning_rate": 4.798163338088446e-05, "loss": 0.0069, "step": 22650 }, { "epoch": 4.04, "learning_rate": 4.798074179743224e-05, "loss": 0.009, "step": 22660 }, { "epoch": 4.04, "learning_rate": 4.797985021398003e-05, "loss": 0.0058, "step": 22670 }, { "epoch": 4.04, "learning_rate": 4.7978958630527824e-05, "loss": 0.0083, "step": 22680 }, { "epoch": 4.05, "learning_rate": 4.797806704707561e-05, "loss": 0.0059, "step": 22690 }, { "epoch": 4.05, "learning_rate": 4.79771754636234e-05, "loss": 0.0101, "step": 22700 }, { "epoch": 4.05, "learning_rate": 4.7976283880171184e-05, "loss": 0.0063, "step": 22710 }, { "epoch": 4.05, "learning_rate": 4.7975392296718975e-05, "loss": 0.0082, "step": 22720 }, { "epoch": 4.05, "learning_rate": 4.797450071326676e-05, "loss": 0.0069, "step": 22730 }, { "epoch": 4.05, "learning_rate": 4.797360912981455e-05, "loss": 0.0054, "step": 22740 }, { "epoch": 4.06, "learning_rate": 4.797271754636234e-05, "loss": 0.0096, "step": 22750 }, { "epoch": 4.06, "learning_rate": 4.797182596291013e-05, "loss": 0.0085, "step": 22760 }, { "epoch": 4.06, "learning_rate": 4.7970934379457924e-05, "loss": 0.0058, "step": 22770 }, { "epoch": 4.06, "learning_rate": 4.797004279600571e-05, "loss": 0.0072, "step": 22780 }, { "epoch": 4.06, "learning_rate": 4.79691512125535e-05, "loss": 0.0058, "step": 22790 }, { "epoch": 4.07, "learning_rate": 4.7968259629101284e-05, "loss": 0.0063, "step": 22800 }, { "epoch": 4.07, "learning_rate": 4.7967368045649075e-05, "loss": 0.0076, "step": 22810 }, { "epoch": 4.07, "learning_rate": 4.796647646219686e-05, "loss": 0.0075, "step": 22820 }, { "epoch": 4.07, "learning_rate": 4.796558487874465e-05, "loss": 0.0068, "step": 22830 }, { "epoch": 4.07, "learning_rate": 4.796469329529244e-05, "loss": 0.0059, "step": 22840 }, { "epoch": 4.07, "learning_rate": 4.796380171184023e-05, "loss": 0.0066, "step": 22850 }, { "epoch": 4.08, "learning_rate": 4.796291012838802e-05, "loss": 0.0044, "step": 22860 }, { "epoch": 4.08, "learning_rate": 4.796201854493581e-05, "loss": 0.0047, "step": 22870 }, { "epoch": 4.08, "learning_rate": 4.79611269614836e-05, "loss": 0.0044, "step": 22880 }, { "epoch": 4.08, "learning_rate": 4.7960235378031385e-05, "loss": 0.0075, "step": 22890 }, { "epoch": 4.08, "learning_rate": 4.7959343794579176e-05, "loss": 0.0053, "step": 22900 }, { "epoch": 4.09, "learning_rate": 4.795845221112697e-05, "loss": 0.0059, "step": 22910 }, { "epoch": 4.09, "learning_rate": 4.795756062767475e-05, "loss": 0.0077, "step": 22920 }, { "epoch": 4.09, "learning_rate": 4.795666904422254e-05, "loss": 0.0061, "step": 22930 }, { "epoch": 4.09, "learning_rate": 4.795577746077033e-05, "loss": 0.0059, "step": 22940 }, { "epoch": 4.09, "learning_rate": 4.795488587731812e-05, "loss": 0.0061, "step": 22950 }, { "epoch": 4.09, "learning_rate": 4.79539942938659e-05, "loss": 0.0047, "step": 22960 }, { "epoch": 4.1, "learning_rate": 4.7953102710413694e-05, "loss": 0.0081, "step": 22970 }, { "epoch": 4.1, "learning_rate": 4.7952211126961485e-05, "loss": 0.006, "step": 22980 }, { "epoch": 4.1, "learning_rate": 4.7951319543509277e-05, "loss": 0.0053, "step": 22990 }, { "epoch": 4.1, "learning_rate": 4.795042796005707e-05, "loss": 0.0052, "step": 23000 }, { "epoch": 4.1, "learning_rate": 4.794953637660485e-05, "loss": 0.0085, "step": 23010 }, { "epoch": 4.1, "learning_rate": 4.7948644793152643e-05, "loss": 0.0054, "step": 23020 }, { "epoch": 4.11, "learning_rate": 4.794775320970043e-05, "loss": 0.0066, "step": 23030 }, { "epoch": 4.11, "learning_rate": 4.794686162624822e-05, "loss": 0.0077, "step": 23040 }, { "epoch": 4.11, "learning_rate": 4.7945970042796004e-05, "loss": 0.0042, "step": 23050 }, { "epoch": 4.11, "learning_rate": 4.7945078459343795e-05, "loss": 0.0073, "step": 23060 }, { "epoch": 4.11, "learning_rate": 4.7944186875891586e-05, "loss": 0.0092, "step": 23070 }, { "epoch": 4.12, "learning_rate": 4.794329529243937e-05, "loss": 0.0054, "step": 23080 }, { "epoch": 4.12, "learning_rate": 4.794240370898717e-05, "loss": 0.0053, "step": 23090 }, { "epoch": 4.12, "learning_rate": 4.794151212553495e-05, "loss": 0.0064, "step": 23100 }, { "epoch": 4.12, "learning_rate": 4.7940620542082744e-05, "loss": 0.0055, "step": 23110 }, { "epoch": 4.12, "learning_rate": 4.793972895863053e-05, "loss": 0.0081, "step": 23120 }, { "epoch": 4.12, "learning_rate": 4.793883737517832e-05, "loss": 0.0087, "step": 23130 }, { "epoch": 4.13, "learning_rate": 4.793794579172611e-05, "loss": 0.0058, "step": 23140 }, { "epoch": 4.13, "learning_rate": 4.7937054208273895e-05, "loss": 0.0065, "step": 23150 }, { "epoch": 4.13, "learning_rate": 4.7936162624821687e-05, "loss": 0.0091, "step": 23160 }, { "epoch": 4.13, "learning_rate": 4.793527104136947e-05, "loss": 0.0051, "step": 23170 }, { "epoch": 4.13, "learning_rate": 4.793437945791726e-05, "loss": 0.0048, "step": 23180 }, { "epoch": 4.14, "learning_rate": 4.7933487874465047e-05, "loss": 0.0067, "step": 23190 }, { "epoch": 4.14, "learning_rate": 4.7932596291012845e-05, "loss": 0.0039, "step": 23200 }, { "epoch": 4.14, "learning_rate": 4.793170470756063e-05, "loss": 0.0055, "step": 23210 }, { "epoch": 4.14, "learning_rate": 4.793081312410842e-05, "loss": 0.0058, "step": 23220 }, { "epoch": 4.14, "learning_rate": 4.792992154065621e-05, "loss": 0.0058, "step": 23230 }, { "epoch": 4.14, "learning_rate": 4.7929029957203996e-05, "loss": 0.0062, "step": 23240 }, { "epoch": 4.15, "learning_rate": 4.792813837375179e-05, "loss": 0.0045, "step": 23250 }, { "epoch": 4.15, "learning_rate": 4.792724679029957e-05, "loss": 0.0033, "step": 23260 }, { "epoch": 4.15, "learning_rate": 4.792635520684736e-05, "loss": 0.0052, "step": 23270 }, { "epoch": 4.15, "learning_rate": 4.792546362339515e-05, "loss": 0.0078, "step": 23280 }, { "epoch": 4.15, "learning_rate": 4.792457203994294e-05, "loss": 0.0053, "step": 23290 }, { "epoch": 4.15, "learning_rate": 4.792368045649073e-05, "loss": 0.0052, "step": 23300 }, { "epoch": 4.16, "learning_rate": 4.792278887303852e-05, "loss": 0.0043, "step": 23310 }, { "epoch": 4.16, "learning_rate": 4.792189728958631e-05, "loss": 0.0061, "step": 23320 }, { "epoch": 4.16, "learning_rate": 4.7921005706134096e-05, "loss": 0.0062, "step": 23330 }, { "epoch": 4.16, "learning_rate": 4.792011412268189e-05, "loss": 0.0056, "step": 23340 }, { "epoch": 4.16, "learning_rate": 4.791922253922967e-05, "loss": 0.0065, "step": 23350 }, { "epoch": 4.17, "learning_rate": 4.791833095577746e-05, "loss": 0.0091, "step": 23360 }, { "epoch": 4.17, "learning_rate": 4.7917439372325254e-05, "loss": 0.0077, "step": 23370 }, { "epoch": 4.17, "learning_rate": 4.791654778887304e-05, "loss": 0.0059, "step": 23380 }, { "epoch": 4.17, "learning_rate": 4.791565620542083e-05, "loss": 0.0074, "step": 23390 }, { "epoch": 4.17, "learning_rate": 4.7914764621968615e-05, "loss": 0.006, "step": 23400 }, { "epoch": 4.17, "learning_rate": 4.7913873038516406e-05, "loss": 0.006, "step": 23410 }, { "epoch": 4.18, "learning_rate": 4.79129814550642e-05, "loss": 0.0071, "step": 23420 }, { "epoch": 4.18, "learning_rate": 4.791208987161199e-05, "loss": 0.008, "step": 23430 }, { "epoch": 4.18, "learning_rate": 4.791119828815977e-05, "loss": 0.0063, "step": 23440 }, { "epoch": 4.18, "learning_rate": 4.7910306704707564e-05, "loss": 0.0083, "step": 23450 }, { "epoch": 4.18, "learning_rate": 4.7909415121255355e-05, "loss": 0.0054, "step": 23460 }, { "epoch": 4.19, "learning_rate": 4.790852353780314e-05, "loss": 0.0066, "step": 23470 }, { "epoch": 4.19, "learning_rate": 4.790763195435093e-05, "loss": 0.0077, "step": 23480 }, { "epoch": 4.19, "learning_rate": 4.7906740370898715e-05, "loss": 0.0053, "step": 23490 }, { "epoch": 4.19, "learning_rate": 4.7905848787446506e-05, "loss": 0.01, "step": 23500 }, { "epoch": 4.19, "learning_rate": 4.790495720399429e-05, "loss": 0.007, "step": 23510 }, { "epoch": 4.19, "learning_rate": 4.790406562054208e-05, "loss": 0.006, "step": 23520 }, { "epoch": 4.2, "learning_rate": 4.790317403708987e-05, "loss": 0.0082, "step": 23530 }, { "epoch": 4.2, "learning_rate": 4.7902282453637664e-05, "loss": 0.0068, "step": 23540 }, { "epoch": 4.2, "learning_rate": 4.7901390870185456e-05, "loss": 0.0062, "step": 23550 }, { "epoch": 4.2, "learning_rate": 4.790049928673324e-05, "loss": 0.0094, "step": 23560 }, { "epoch": 4.2, "learning_rate": 4.789960770328103e-05, "loss": 0.0075, "step": 23570 }, { "epoch": 4.2, "learning_rate": 4.7898716119828816e-05, "loss": 0.0055, "step": 23580 }, { "epoch": 4.21, "learning_rate": 4.789782453637661e-05, "loss": 0.0039, "step": 23590 }, { "epoch": 4.21, "learning_rate": 4.78969329529244e-05, "loss": 0.0093, "step": 23600 }, { "epoch": 4.21, "learning_rate": 4.789604136947218e-05, "loss": 0.0067, "step": 23610 }, { "epoch": 4.21, "learning_rate": 4.7895149786019974e-05, "loss": 0.0061, "step": 23620 }, { "epoch": 4.21, "learning_rate": 4.789425820256776e-05, "loss": 0.0089, "step": 23630 }, { "epoch": 4.22, "learning_rate": 4.7893366619115556e-05, "loss": 0.0072, "step": 23640 }, { "epoch": 4.22, "learning_rate": 4.789247503566334e-05, "loss": 0.0079, "step": 23650 }, { "epoch": 4.22, "learning_rate": 4.789158345221113e-05, "loss": 0.0091, "step": 23660 }, { "epoch": 4.22, "learning_rate": 4.7890691868758916e-05, "loss": 0.008, "step": 23670 }, { "epoch": 4.22, "learning_rate": 4.788980028530671e-05, "loss": 0.0069, "step": 23680 }, { "epoch": 4.22, "learning_rate": 4.78889087018545e-05, "loss": 0.0064, "step": 23690 }, { "epoch": 4.23, "learning_rate": 4.788801711840228e-05, "loss": 0.0077, "step": 23700 }, { "epoch": 4.23, "learning_rate": 4.7887125534950074e-05, "loss": 0.0059, "step": 23710 }, { "epoch": 4.23, "learning_rate": 4.788623395149786e-05, "loss": 0.0073, "step": 23720 }, { "epoch": 4.23, "learning_rate": 4.788534236804565e-05, "loss": 0.0045, "step": 23730 }, { "epoch": 4.23, "learning_rate": 4.7884450784593434e-05, "loss": 0.0075, "step": 23740 }, { "epoch": 4.24, "learning_rate": 4.788355920114123e-05, "loss": 0.0099, "step": 23750 }, { "epoch": 4.24, "learning_rate": 4.788266761768902e-05, "loss": 0.0038, "step": 23760 }, { "epoch": 4.24, "learning_rate": 4.788177603423681e-05, "loss": 0.0057, "step": 23770 }, { "epoch": 4.24, "learning_rate": 4.78808844507846e-05, "loss": 0.0072, "step": 23780 }, { "epoch": 4.24, "learning_rate": 4.7879992867332384e-05, "loss": 0.0096, "step": 23790 }, { "epoch": 4.24, "learning_rate": 4.7879101283880175e-05, "loss": 0.0043, "step": 23800 }, { "epoch": 4.25, "learning_rate": 4.787820970042796e-05, "loss": 0.0074, "step": 23810 }, { "epoch": 4.25, "learning_rate": 4.787731811697575e-05, "loss": 0.0084, "step": 23820 }, { "epoch": 4.25, "learning_rate": 4.787642653352354e-05, "loss": 0.0056, "step": 23830 }, { "epoch": 4.25, "learning_rate": 4.7875534950071326e-05, "loss": 0.0079, "step": 23840 }, { "epoch": 4.25, "learning_rate": 4.787464336661912e-05, "loss": 0.0046, "step": 23850 }, { "epoch": 4.25, "learning_rate": 4.787375178316691e-05, "loss": 0.0055, "step": 23860 }, { "epoch": 4.26, "learning_rate": 4.78728601997147e-05, "loss": 0.0081, "step": 23870 }, { "epoch": 4.26, "learning_rate": 4.7871968616262484e-05, "loss": 0.0129, "step": 23880 }, { "epoch": 4.26, "learning_rate": 4.7871077032810275e-05, "loss": 0.0078, "step": 23890 }, { "epoch": 4.26, "learning_rate": 4.787018544935806e-05, "loss": 0.0065, "step": 23900 }, { "epoch": 4.26, "learning_rate": 4.786929386590585e-05, "loss": 0.0058, "step": 23910 }, { "epoch": 4.27, "learning_rate": 4.786840228245364e-05, "loss": 0.0088, "step": 23920 }, { "epoch": 4.27, "learning_rate": 4.786751069900143e-05, "loss": 0.0071, "step": 23930 }, { "epoch": 4.27, "learning_rate": 4.786661911554922e-05, "loss": 0.0057, "step": 23940 }, { "epoch": 4.27, "learning_rate": 4.7865727532097e-05, "loss": 0.0066, "step": 23950 }, { "epoch": 4.27, "learning_rate": 4.7864835948644794e-05, "loss": 0.0053, "step": 23960 }, { "epoch": 4.27, "learning_rate": 4.7863944365192585e-05, "loss": 0.0059, "step": 23970 }, { "epoch": 4.28, "learning_rate": 4.7863052781740376e-05, "loss": 0.005, "step": 23980 }, { "epoch": 4.28, "learning_rate": 4.786216119828816e-05, "loss": 0.0043, "step": 23990 }, { "epoch": 4.28, "learning_rate": 4.786126961483595e-05, "loss": 0.0058, "step": 24000 }, { "epoch": 4.28, "learning_rate": 4.786037803138374e-05, "loss": 0.0054, "step": 24010 }, { "epoch": 4.28, "learning_rate": 4.785948644793153e-05, "loss": 0.0061, "step": 24020 }, { "epoch": 4.28, "learning_rate": 4.785859486447932e-05, "loss": 0.007, "step": 24030 }, { "epoch": 4.29, "learning_rate": 4.78577032810271e-05, "loss": 0.0089, "step": 24040 }, { "epoch": 4.29, "learning_rate": 4.7856811697574894e-05, "loss": 0.009, "step": 24050 }, { "epoch": 4.29, "learning_rate": 4.7855920114122685e-05, "loss": 0.01, "step": 24060 }, { "epoch": 4.29, "learning_rate": 4.785502853067047e-05, "loss": 0.0056, "step": 24070 }, { "epoch": 4.29, "learning_rate": 4.785413694721826e-05, "loss": 0.0055, "step": 24080 }, { "epoch": 4.3, "learning_rate": 4.785324536376605e-05, "loss": 0.0062, "step": 24090 }, { "epoch": 4.3, "learning_rate": 4.7852353780313843e-05, "loss": 0.0076, "step": 24100 }, { "epoch": 4.3, "learning_rate": 4.785146219686163e-05, "loss": 0.0091, "step": 24110 }, { "epoch": 4.3, "learning_rate": 4.785057061340942e-05, "loss": 0.0081, "step": 24120 }, { "epoch": 4.3, "learning_rate": 4.7849679029957203e-05, "loss": 0.0035, "step": 24130 }, { "epoch": 4.3, "learning_rate": 4.7848787446504995e-05, "loss": 0.0057, "step": 24140 }, { "epoch": 4.31, "learning_rate": 4.7847895863052786e-05, "loss": 0.0076, "step": 24150 }, { "epoch": 4.31, "learning_rate": 4.784700427960057e-05, "loss": 0.007, "step": 24160 }, { "epoch": 4.31, "learning_rate": 4.784611269614836e-05, "loss": 0.0072, "step": 24170 }, { "epoch": 4.31, "learning_rate": 4.7845221112696146e-05, "loss": 0.0056, "step": 24180 }, { "epoch": 4.31, "learning_rate": 4.7844329529243944e-05, "loss": 0.0117, "step": 24190 }, { "epoch": 4.32, "learning_rate": 4.784343794579173e-05, "loss": 0.007, "step": 24200 }, { "epoch": 4.32, "learning_rate": 4.784254636233952e-05, "loss": 0.0058, "step": 24210 }, { "epoch": 4.32, "learning_rate": 4.7841654778887304e-05, "loss": 0.008, "step": 24220 }, { "epoch": 4.32, "learning_rate": 4.7840763195435095e-05, "loss": 0.0044, "step": 24230 }, { "epoch": 4.32, "learning_rate": 4.7839871611982886e-05, "loss": 0.0067, "step": 24240 }, { "epoch": 4.32, "learning_rate": 4.783898002853067e-05, "loss": 0.0064, "step": 24250 }, { "epoch": 4.33, "learning_rate": 4.783808844507846e-05, "loss": 0.0084, "step": 24260 }, { "epoch": 4.33, "learning_rate": 4.7837196861626247e-05, "loss": 0.0068, "step": 24270 }, { "epoch": 4.33, "learning_rate": 4.783630527817404e-05, "loss": 0.0073, "step": 24280 }, { "epoch": 4.33, "learning_rate": 4.783541369472183e-05, "loss": 0.0066, "step": 24290 }, { "epoch": 4.33, "learning_rate": 4.783452211126962e-05, "loss": 0.0091, "step": 24300 }, { "epoch": 4.33, "learning_rate": 4.7833630527817405e-05, "loss": 0.0068, "step": 24310 }, { "epoch": 4.34, "learning_rate": 4.7832738944365196e-05, "loss": 0.009, "step": 24320 }, { "epoch": 4.34, "learning_rate": 4.783184736091299e-05, "loss": 0.0074, "step": 24330 }, { "epoch": 4.34, "learning_rate": 4.783095577746077e-05, "loss": 0.0056, "step": 24340 }, { "epoch": 4.34, "learning_rate": 4.783006419400856e-05, "loss": 0.0075, "step": 24350 }, { "epoch": 4.34, "learning_rate": 4.782917261055635e-05, "loss": 0.0039, "step": 24360 }, { "epoch": 4.35, "learning_rate": 4.782828102710414e-05, "loss": 0.007, "step": 24370 }, { "epoch": 4.35, "learning_rate": 4.782738944365193e-05, "loss": 0.0057, "step": 24380 }, { "epoch": 4.35, "learning_rate": 4.7826497860199714e-05, "loss": 0.0024, "step": 24390 }, { "epoch": 4.35, "learning_rate": 4.7825606276747505e-05, "loss": 0.0067, "step": 24400 }, { "epoch": 4.35, "learning_rate": 4.7824714693295296e-05, "loss": 0.0089, "step": 24410 }, { "epoch": 4.35, "learning_rate": 4.782382310984309e-05, "loss": 0.0046, "step": 24420 }, { "epoch": 4.36, "learning_rate": 4.782293152639087e-05, "loss": 0.0081, "step": 24430 }, { "epoch": 4.36, "learning_rate": 4.782203994293866e-05, "loss": 0.0096, "step": 24440 }, { "epoch": 4.36, "learning_rate": 4.782114835948645e-05, "loss": 0.0039, "step": 24450 }, { "epoch": 4.36, "learning_rate": 4.782025677603424e-05, "loss": 0.0053, "step": 24460 }, { "epoch": 4.36, "learning_rate": 4.781936519258203e-05, "loss": 0.0111, "step": 24470 }, { "epoch": 4.37, "learning_rate": 4.7818473609129815e-05, "loss": 0.0041, "step": 24480 }, { "epoch": 4.37, "learning_rate": 4.7817582025677606e-05, "loss": 0.0083, "step": 24490 }, { "epoch": 4.37, "learning_rate": 4.7816779600570614e-05, "loss": 0.0113, "step": 24500 }, { "epoch": 4.37, "learning_rate": 4.7815888017118405e-05, "loss": 0.0046, "step": 24510 }, { "epoch": 4.37, "learning_rate": 4.781499643366619e-05, "loss": 0.0039, "step": 24520 }, { "epoch": 4.37, "learning_rate": 4.781410485021398e-05, "loss": 0.0054, "step": 24530 }, { "epoch": 4.38, "learning_rate": 4.781321326676177e-05, "loss": 0.0062, "step": 24540 }, { "epoch": 4.38, "learning_rate": 4.7812321683309556e-05, "loss": 0.0071, "step": 24550 }, { "epoch": 4.38, "learning_rate": 4.7811430099857354e-05, "loss": 0.0047, "step": 24560 }, { "epoch": 4.38, "learning_rate": 4.781053851640514e-05, "loss": 0.0066, "step": 24570 }, { "epoch": 4.38, "learning_rate": 4.780964693295293e-05, "loss": 0.0051, "step": 24580 }, { "epoch": 4.38, "learning_rate": 4.7808755349500714e-05, "loss": 0.0061, "step": 24590 }, { "epoch": 4.39, "learning_rate": 4.7807863766048506e-05, "loss": 0.01, "step": 24600 }, { "epoch": 4.39, "learning_rate": 4.78069721825963e-05, "loss": 0.0059, "step": 24610 }, { "epoch": 4.39, "learning_rate": 4.780608059914408e-05, "loss": 0.0082, "step": 24620 }, { "epoch": 4.39, "learning_rate": 4.780518901569187e-05, "loss": 0.0046, "step": 24630 }, { "epoch": 4.39, "learning_rate": 4.780429743223966e-05, "loss": 0.0093, "step": 24640 }, { "epoch": 4.4, "learning_rate": 4.780340584878745e-05, "loss": 0.0047, "step": 24650 }, { "epoch": 4.4, "learning_rate": 4.780251426533523e-05, "loss": 0.0073, "step": 24660 }, { "epoch": 4.4, "learning_rate": 4.780162268188303e-05, "loss": 0.0062, "step": 24670 }, { "epoch": 4.4, "learning_rate": 4.7800731098430815e-05, "loss": 0.0081, "step": 24680 }, { "epoch": 4.4, "learning_rate": 4.7799839514978606e-05, "loss": 0.0055, "step": 24690 }, { "epoch": 4.4, "learning_rate": 4.77989479315264e-05, "loss": 0.0069, "step": 24700 }, { "epoch": 4.41, "learning_rate": 4.779805634807418e-05, "loss": 0.0076, "step": 24710 }, { "epoch": 4.41, "learning_rate": 4.779716476462197e-05, "loss": 0.0069, "step": 24720 }, { "epoch": 4.41, "learning_rate": 4.779627318116976e-05, "loss": 0.0082, "step": 24730 }, { "epoch": 4.41, "learning_rate": 4.779538159771755e-05, "loss": 0.0077, "step": 24740 }, { "epoch": 4.41, "learning_rate": 4.779449001426533e-05, "loss": 0.0058, "step": 24750 }, { "epoch": 4.42, "learning_rate": 4.7793598430813124e-05, "loss": 0.008, "step": 24760 }, { "epoch": 4.42, "learning_rate": 4.7792706847360915e-05, "loss": 0.0072, "step": 24770 }, { "epoch": 4.42, "learning_rate": 4.779181526390871e-05, "loss": 0.0088, "step": 24780 }, { "epoch": 4.42, "learning_rate": 4.77909236804565e-05, "loss": 0.0081, "step": 24790 }, { "epoch": 4.42, "learning_rate": 4.779003209700428e-05, "loss": 0.0042, "step": 24800 }, { "epoch": 4.42, "learning_rate": 4.7789140513552074e-05, "loss": 0.0076, "step": 24810 }, { "epoch": 4.43, "learning_rate": 4.778824893009986e-05, "loss": 0.0097, "step": 24820 }, { "epoch": 4.43, "learning_rate": 4.778735734664765e-05, "loss": 0.0103, "step": 24830 }, { "epoch": 4.43, "learning_rate": 4.778646576319544e-05, "loss": 0.0061, "step": 24840 }, { "epoch": 4.43, "learning_rate": 4.7785574179743225e-05, "loss": 0.0069, "step": 24850 }, { "epoch": 4.43, "learning_rate": 4.7784682596291016e-05, "loss": 0.0057, "step": 24860 }, { "epoch": 4.43, "learning_rate": 4.7783880171184024e-05, "loss": 0.0083, "step": 24870 }, { "epoch": 4.44, "learning_rate": 4.7782988587731815e-05, "loss": 0.0047, "step": 24880 }, { "epoch": 4.44, "learning_rate": 4.77820970042796e-05, "loss": 0.0057, "step": 24890 }, { "epoch": 4.44, "learning_rate": 4.778120542082739e-05, "loss": 0.0067, "step": 24900 }, { "epoch": 4.44, "learning_rate": 4.778031383737518e-05, "loss": 0.0067, "step": 24910 }, { "epoch": 4.44, "learning_rate": 4.7779422253922967e-05, "loss": 0.0053, "step": 24920 }, { "epoch": 4.45, "learning_rate": 4.777853067047076e-05, "loss": 0.0065, "step": 24930 }, { "epoch": 4.45, "learning_rate": 4.777763908701855e-05, "loss": 0.0093, "step": 24940 }, { "epoch": 4.45, "learning_rate": 4.777674750356634e-05, "loss": 0.0054, "step": 24950 }, { "epoch": 4.45, "learning_rate": 4.7775855920114125e-05, "loss": 0.0075, "step": 24960 }, { "epoch": 4.45, "learning_rate": 4.7774964336661916e-05, "loss": 0.0059, "step": 24970 }, { "epoch": 4.45, "learning_rate": 4.77740727532097e-05, "loss": 0.008, "step": 24980 }, { "epoch": 4.46, "learning_rate": 4.777318116975749e-05, "loss": 0.0063, "step": 24990 }, { "epoch": 4.46, "learning_rate": 4.777228958630528e-05, "loss": 0.0108, "step": 25000 }, { "epoch": 4.46, "learning_rate": 4.777139800285307e-05, "loss": 0.0067, "step": 25010 }, { "epoch": 4.46, "learning_rate": 4.777050641940086e-05, "loss": 0.0046, "step": 25020 }, { "epoch": 4.46, "learning_rate": 4.776961483594864e-05, "loss": 0.0039, "step": 25030 }, { "epoch": 4.47, "learning_rate": 4.7768723252496434e-05, "loss": 0.0082, "step": 25040 }, { "epoch": 4.47, "learning_rate": 4.7767831669044225e-05, "loss": 0.0039, "step": 25050 }, { "epoch": 4.47, "learning_rate": 4.7766940085592016e-05, "loss": 0.0076, "step": 25060 }, { "epoch": 4.47, "learning_rate": 4.77660485021398e-05, "loss": 0.0063, "step": 25070 }, { "epoch": 4.47, "learning_rate": 4.776515691868759e-05, "loss": 0.0056, "step": 25080 }, { "epoch": 4.47, "learning_rate": 4.776426533523538e-05, "loss": 0.0099, "step": 25090 }, { "epoch": 4.48, "learning_rate": 4.776337375178317e-05, "loss": 0.0078, "step": 25100 }, { "epoch": 4.48, "learning_rate": 4.776248216833096e-05, "loss": 0.0056, "step": 25110 }, { "epoch": 4.48, "learning_rate": 4.776159058487874e-05, "loss": 0.0059, "step": 25120 }, { "epoch": 4.48, "learning_rate": 4.7760699001426535e-05, "loss": 0.0066, "step": 25130 }, { "epoch": 4.48, "learning_rate": 4.7759807417974326e-05, "loss": 0.0059, "step": 25140 }, { "epoch": 4.48, "learning_rate": 4.775891583452211e-05, "loss": 0.0063, "step": 25150 }, { "epoch": 4.49, "learning_rate": 4.775802425106991e-05, "loss": 0.0053, "step": 25160 }, { "epoch": 4.49, "learning_rate": 4.775713266761769e-05, "loss": 0.0062, "step": 25170 }, { "epoch": 4.49, "learning_rate": 4.7756241084165484e-05, "loss": 0.0118, "step": 25180 }, { "epoch": 4.49, "learning_rate": 4.775534950071327e-05, "loss": 0.0072, "step": 25190 }, { "epoch": 4.49, "learning_rate": 4.775445791726106e-05, "loss": 0.0049, "step": 25200 }, { "epoch": 4.5, "learning_rate": 4.7753566333808844e-05, "loss": 0.0076, "step": 25210 }, { "epoch": 4.5, "learning_rate": 4.7752674750356635e-05, "loss": 0.0102, "step": 25220 }, { "epoch": 4.5, "learning_rate": 4.7751783166904426e-05, "loss": 0.0071, "step": 25230 }, { "epoch": 4.5, "learning_rate": 4.775089158345221e-05, "loss": 0.0033, "step": 25240 }, { "epoch": 4.5, "learning_rate": 4.775e-05, "loss": 0.0055, "step": 25250 }, { "epoch": 4.5, "learning_rate": 4.7749108416547786e-05, "loss": 0.0058, "step": 25260 }, { "epoch": 4.51, "learning_rate": 4.7748216833095584e-05, "loss": 0.0062, "step": 25270 }, { "epoch": 4.51, "learning_rate": 4.774732524964337e-05, "loss": 0.0038, "step": 25280 }, { "epoch": 4.51, "learning_rate": 4.774643366619116e-05, "loss": 0.0066, "step": 25290 }, { "epoch": 4.51, "learning_rate": 4.7745542082738944e-05, "loss": 0.0059, "step": 25300 }, { "epoch": 4.51, "learning_rate": 4.7744650499286736e-05, "loss": 0.0066, "step": 25310 }, { "epoch": 4.51, "learning_rate": 4.774375891583453e-05, "loss": 0.0047, "step": 25320 }, { "epoch": 4.52, "learning_rate": 4.774286733238231e-05, "loss": 0.0063, "step": 25330 }, { "epoch": 4.52, "learning_rate": 4.77419757489301e-05, "loss": 0.0057, "step": 25340 }, { "epoch": 4.52, "learning_rate": 4.774108416547789e-05, "loss": 0.0073, "step": 25350 }, { "epoch": 4.52, "learning_rate": 4.774019258202568e-05, "loss": 0.0078, "step": 25360 }, { "epoch": 4.52, "learning_rate": 4.773930099857347e-05, "loss": 0.0059, "step": 25370 }, { "epoch": 4.53, "learning_rate": 4.773840941512126e-05, "loss": 0.0078, "step": 25380 }, { "epoch": 4.53, "learning_rate": 4.773751783166905e-05, "loss": 0.0063, "step": 25390 }, { "epoch": 4.53, "learning_rate": 4.7736626248216836e-05, "loss": 0.006, "step": 25400 }, { "epoch": 4.53, "learning_rate": 4.773573466476463e-05, "loss": 0.0055, "step": 25410 }, { "epoch": 4.53, "learning_rate": 4.773484308131241e-05, "loss": 0.0081, "step": 25420 }, { "epoch": 4.53, "learning_rate": 4.77339514978602e-05, "loss": 0.0054, "step": 25430 }, { "epoch": 4.54, "learning_rate": 4.773305991440799e-05, "loss": 0.0066, "step": 25440 }, { "epoch": 4.54, "learning_rate": 4.773216833095578e-05, "loss": 0.0088, "step": 25450 }, { "epoch": 4.54, "learning_rate": 4.773127674750357e-05, "loss": 0.0059, "step": 25460 }, { "epoch": 4.54, "learning_rate": 4.7730385164051354e-05, "loss": 0.0082, "step": 25470 }, { "epoch": 4.54, "learning_rate": 4.7729493580599146e-05, "loss": 0.0062, "step": 25480 }, { "epoch": 4.55, "learning_rate": 4.772860199714694e-05, "loss": 0.004, "step": 25490 }, { "epoch": 4.55, "learning_rate": 4.772771041369473e-05, "loss": 0.0062, "step": 25500 }, { "epoch": 4.55, "learning_rate": 4.772681883024251e-05, "loss": 0.0087, "step": 25510 }, { "epoch": 4.55, "learning_rate": 4.7725927246790304e-05, "loss": 0.0075, "step": 25520 }, { "epoch": 4.55, "learning_rate": 4.772503566333809e-05, "loss": 0.0046, "step": 25530 }, { "epoch": 4.55, "learning_rate": 4.772414407988588e-05, "loss": 0.0054, "step": 25540 }, { "epoch": 4.56, "learning_rate": 4.772325249643367e-05, "loss": 0.0075, "step": 25550 }, { "epoch": 4.56, "learning_rate": 4.7722360912981455e-05, "loss": 0.0062, "step": 25560 }, { "epoch": 4.56, "learning_rate": 4.7721469329529246e-05, "loss": 0.0052, "step": 25570 }, { "epoch": 4.56, "learning_rate": 4.772057774607703e-05, "loss": 0.0074, "step": 25580 }, { "epoch": 4.56, "learning_rate": 4.771968616262482e-05, "loss": 0.0065, "step": 25590 }, { "epoch": 4.56, "learning_rate": 4.771879457917261e-05, "loss": 0.0095, "step": 25600 }, { "epoch": 4.57, "learning_rate": 4.7717902995720404e-05, "loss": 0.0031, "step": 25610 }, { "epoch": 4.57, "learning_rate": 4.7717011412268195e-05, "loss": 0.0044, "step": 25620 }, { "epoch": 4.57, "learning_rate": 4.771611982881598e-05, "loss": 0.0077, "step": 25630 }, { "epoch": 4.57, "learning_rate": 4.771522824536377e-05, "loss": 0.005, "step": 25640 }, { "epoch": 4.57, "learning_rate": 4.7714336661911555e-05, "loss": 0.0058, "step": 25650 }, { "epoch": 4.58, "learning_rate": 4.771344507845935e-05, "loss": 0.0057, "step": 25660 }, { "epoch": 4.58, "learning_rate": 4.771255349500713e-05, "loss": 0.0076, "step": 25670 }, { "epoch": 4.58, "learning_rate": 4.771166191155492e-05, "loss": 0.0065, "step": 25680 }, { "epoch": 4.58, "learning_rate": 4.7710770328102714e-05, "loss": 0.0048, "step": 25690 }, { "epoch": 4.58, "learning_rate": 4.77098787446505e-05, "loss": 0.0058, "step": 25700 }, { "epoch": 4.58, "learning_rate": 4.7708987161198296e-05, "loss": 0.0049, "step": 25710 }, { "epoch": 4.59, "learning_rate": 4.770809557774608e-05, "loss": 0.0087, "step": 25720 }, { "epoch": 4.59, "learning_rate": 4.770720399429387e-05, "loss": 0.0094, "step": 25730 }, { "epoch": 4.59, "learning_rate": 4.7706312410841656e-05, "loss": 0.0038, "step": 25740 }, { "epoch": 4.59, "learning_rate": 4.770542082738945e-05, "loss": 0.0074, "step": 25750 }, { "epoch": 4.59, "learning_rate": 4.770452924393723e-05, "loss": 0.0053, "step": 25760 }, { "epoch": 4.6, "learning_rate": 4.770363766048502e-05, "loss": 0.006, "step": 25770 }, { "epoch": 4.6, "learning_rate": 4.7702746077032814e-05, "loss": 0.0068, "step": 25780 }, { "epoch": 4.6, "learning_rate": 4.77018544935806e-05, "loss": 0.0045, "step": 25790 }, { "epoch": 4.6, "learning_rate": 4.770096291012839e-05, "loss": 0.0088, "step": 25800 }, { "epoch": 4.6, "learning_rate": 4.7700071326676174e-05, "loss": 0.0066, "step": 25810 }, { "epoch": 4.6, "learning_rate": 4.769917974322397e-05, "loss": 0.009, "step": 25820 }, { "epoch": 4.61, "learning_rate": 4.7698288159771757e-05, "loss": 0.0052, "step": 25830 }, { "epoch": 4.61, "learning_rate": 4.769739657631955e-05, "loss": 0.0061, "step": 25840 }, { "epoch": 4.61, "learning_rate": 4.769650499286734e-05, "loss": 0.0061, "step": 25850 }, { "epoch": 4.61, "learning_rate": 4.7695613409415123e-05, "loss": 0.0052, "step": 25860 }, { "epoch": 4.61, "learning_rate": 4.7694721825962915e-05, "loss": 0.007, "step": 25870 }, { "epoch": 4.61, "learning_rate": 4.76938302425107e-05, "loss": 0.0031, "step": 25880 }, { "epoch": 4.62, "learning_rate": 4.769293865905849e-05, "loss": 0.0069, "step": 25890 }, { "epoch": 4.62, "learning_rate": 4.7692047075606275e-05, "loss": 0.0038, "step": 25900 }, { "epoch": 4.62, "learning_rate": 4.7691155492154066e-05, "loss": 0.006, "step": 25910 }, { "epoch": 4.62, "learning_rate": 4.769026390870186e-05, "loss": 0.0071, "step": 25920 }, { "epoch": 4.62, "learning_rate": 4.768937232524965e-05, "loss": 0.0047, "step": 25930 }, { "epoch": 4.63, "learning_rate": 4.768848074179744e-05, "loss": 0.0085, "step": 25940 }, { "epoch": 4.63, "learning_rate": 4.7687589158345224e-05, "loss": 0.0089, "step": 25950 }, { "epoch": 4.63, "learning_rate": 4.7686697574893015e-05, "loss": 0.0101, "step": 25960 }, { "epoch": 4.63, "learning_rate": 4.76858059914408e-05, "loss": 0.0068, "step": 25970 }, { "epoch": 4.63, "learning_rate": 4.768491440798859e-05, "loss": 0.0083, "step": 25980 }, { "epoch": 4.63, "learning_rate": 4.7684022824536375e-05, "loss": 0.0069, "step": 25990 }, { "epoch": 4.64, "learning_rate": 4.7683131241084166e-05, "loss": 0.0081, "step": 26000 }, { "epoch": 4.64, "learning_rate": 4.768223965763196e-05, "loss": 0.0059, "step": 26010 }, { "epoch": 4.64, "learning_rate": 4.768134807417974e-05, "loss": 0.0071, "step": 26020 }, { "epoch": 4.64, "learning_rate": 4.768045649072753e-05, "loss": 0.0036, "step": 26030 }, { "epoch": 4.64, "learning_rate": 4.7679564907275325e-05, "loss": 0.0062, "step": 26040 }, { "epoch": 4.65, "learning_rate": 4.7678673323823116e-05, "loss": 0.0069, "step": 26050 }, { "epoch": 4.65, "learning_rate": 4.76777817403709e-05, "loss": 0.0056, "step": 26060 }, { "epoch": 4.65, "learning_rate": 4.767689015691869e-05, "loss": 0.0077, "step": 26070 }, { "epoch": 4.65, "learning_rate": 4.767599857346648e-05, "loss": 0.0079, "step": 26080 }, { "epoch": 4.65, "learning_rate": 4.767510699001427e-05, "loss": 0.0056, "step": 26090 }, { "epoch": 4.65, "learning_rate": 4.767421540656206e-05, "loss": 0.0058, "step": 26100 }, { "epoch": 4.66, "learning_rate": 4.767332382310984e-05, "loss": 0.0063, "step": 26110 }, { "epoch": 4.66, "learning_rate": 4.7672432239657634e-05, "loss": 0.0055, "step": 26120 }, { "epoch": 4.66, "learning_rate": 4.767154065620542e-05, "loss": 0.0087, "step": 26130 }, { "epoch": 4.66, "learning_rate": 4.767064907275321e-05, "loss": 0.0074, "step": 26140 }, { "epoch": 4.66, "learning_rate": 4.7669757489301e-05, "loss": 0.007, "step": 26150 }, { "epoch": 4.66, "learning_rate": 4.766886590584879e-05, "loss": 0.0088, "step": 26160 }, { "epoch": 4.67, "learning_rate": 4.766797432239658e-05, "loss": 0.0076, "step": 26170 }, { "epoch": 4.67, "learning_rate": 4.766708273894437e-05, "loss": 0.0092, "step": 26180 }, { "epoch": 4.67, "learning_rate": 4.766619115549216e-05, "loss": 0.006, "step": 26190 }, { "epoch": 4.67, "learning_rate": 4.766529957203994e-05, "loss": 0.01, "step": 26200 }, { "epoch": 4.67, "learning_rate": 4.7664407988587734e-05, "loss": 0.007, "step": 26210 }, { "epoch": 4.68, "learning_rate": 4.766351640513552e-05, "loss": 0.004, "step": 26220 }, { "epoch": 4.68, "learning_rate": 4.766262482168331e-05, "loss": 0.0088, "step": 26230 }, { "epoch": 4.68, "learning_rate": 4.76617332382311e-05, "loss": 0.0081, "step": 26240 }, { "epoch": 4.68, "learning_rate": 4.7660841654778886e-05, "loss": 0.0062, "step": 26250 }, { "epoch": 4.68, "learning_rate": 4.7659950071326684e-05, "loss": 0.0079, "step": 26260 }, { "epoch": 4.68, "learning_rate": 4.765905848787447e-05, "loss": 0.0067, "step": 26270 }, { "epoch": 4.69, "learning_rate": 4.765816690442226e-05, "loss": 0.0056, "step": 26280 }, { "epoch": 4.69, "learning_rate": 4.7657275320970044e-05, "loss": 0.0081, "step": 26290 }, { "epoch": 4.69, "learning_rate": 4.7656383737517835e-05, "loss": 0.0064, "step": 26300 }, { "epoch": 4.69, "learning_rate": 4.7655492154065626e-05, "loss": 0.0071, "step": 26310 }, { "epoch": 4.69, "learning_rate": 4.765460057061341e-05, "loss": 0.0056, "step": 26320 }, { "epoch": 4.7, "learning_rate": 4.76537089871612e-05, "loss": 0.0054, "step": 26330 }, { "epoch": 4.7, "learning_rate": 4.7652817403708986e-05, "loss": 0.0065, "step": 26340 }, { "epoch": 4.7, "learning_rate": 4.765192582025678e-05, "loss": 0.0059, "step": 26350 }, { "epoch": 4.7, "learning_rate": 4.765103423680456e-05, "loss": 0.01, "step": 26360 }, { "epoch": 4.7, "learning_rate": 4.765014265335236e-05, "loss": 0.0055, "step": 26370 }, { "epoch": 4.7, "learning_rate": 4.7649251069900144e-05, "loss": 0.0065, "step": 26380 }, { "epoch": 4.71, "learning_rate": 4.7648359486447936e-05, "loss": 0.0057, "step": 26390 }, { "epoch": 4.71, "learning_rate": 4.764746790299573e-05, "loss": 0.0066, "step": 26400 }, { "epoch": 4.71, "learning_rate": 4.764657631954351e-05, "loss": 0.0067, "step": 26410 }, { "epoch": 4.71, "learning_rate": 4.76456847360913e-05, "loss": 0.0067, "step": 26420 }, { "epoch": 4.71, "learning_rate": 4.764479315263909e-05, "loss": 0.0065, "step": 26430 }, { "epoch": 4.71, "learning_rate": 4.764390156918688e-05, "loss": 0.0083, "step": 26440 }, { "epoch": 4.72, "learning_rate": 4.764300998573466e-05, "loss": 0.0083, "step": 26450 }, { "epoch": 4.72, "learning_rate": 4.7642118402282454e-05, "loss": 0.0071, "step": 26460 }, { "epoch": 4.72, "learning_rate": 4.7641226818830245e-05, "loss": 0.0047, "step": 26470 }, { "epoch": 4.72, "learning_rate": 4.7640335235378036e-05, "loss": 0.0052, "step": 26480 }, { "epoch": 4.72, "learning_rate": 4.763944365192583e-05, "loss": 0.0044, "step": 26490 }, { "epoch": 4.73, "learning_rate": 4.763855206847361e-05, "loss": 0.0066, "step": 26500 }, { "epoch": 4.73, "learning_rate": 4.76376604850214e-05, "loss": 0.0075, "step": 26510 }, { "epoch": 4.73, "learning_rate": 4.763676890156919e-05, "loss": 0.0065, "step": 26520 }, { "epoch": 4.73, "learning_rate": 4.763587731811698e-05, "loss": 0.011, "step": 26530 }, { "epoch": 4.73, "learning_rate": 4.763498573466477e-05, "loss": 0.0061, "step": 26540 }, { "epoch": 4.73, "learning_rate": 4.7634094151212554e-05, "loss": 0.0045, "step": 26550 }, { "epoch": 4.74, "learning_rate": 4.7633202567760345e-05, "loss": 0.0065, "step": 26560 }, { "epoch": 4.74, "learning_rate": 4.763231098430813e-05, "loss": 0.0059, "step": 26570 }, { "epoch": 4.74, "learning_rate": 4.763141940085592e-05, "loss": 0.006, "step": 26580 }, { "epoch": 4.74, "learning_rate": 4.763052781740371e-05, "loss": 0.005, "step": 26590 }, { "epoch": 4.74, "learning_rate": 4.7629636233951504e-05, "loss": 0.0071, "step": 26600 }, { "epoch": 4.75, "learning_rate": 4.762874465049929e-05, "loss": 0.0033, "step": 26610 }, { "epoch": 4.75, "learning_rate": 4.762785306704708e-05, "loss": 0.0097, "step": 26620 }, { "epoch": 4.75, "learning_rate": 4.762696148359487e-05, "loss": 0.0078, "step": 26630 }, { "epoch": 4.75, "learning_rate": 4.7626069900142655e-05, "loss": 0.0051, "step": 26640 }, { "epoch": 4.75, "learning_rate": 4.7625178316690446e-05, "loss": 0.0058, "step": 26650 }, { "epoch": 4.75, "learning_rate": 4.762428673323823e-05, "loss": 0.0059, "step": 26660 }, { "epoch": 4.76, "learning_rate": 4.762339514978602e-05, "loss": 0.0081, "step": 26670 }, { "epoch": 4.76, "learning_rate": 4.7622503566333806e-05, "loss": 0.0084, "step": 26680 }, { "epoch": 4.76, "learning_rate": 4.76216119828816e-05, "loss": 0.0075, "step": 26690 }, { "epoch": 4.76, "learning_rate": 4.762072039942939e-05, "loss": 0.005, "step": 26700 }, { "epoch": 4.76, "learning_rate": 4.761982881597718e-05, "loss": 0.0068, "step": 26710 }, { "epoch": 4.76, "learning_rate": 4.761893723252497e-05, "loss": 0.0051, "step": 26720 }, { "epoch": 4.77, "learning_rate": 4.7618045649072755e-05, "loss": 0.0094, "step": 26730 }, { "epoch": 4.77, "learning_rate": 4.761715406562055e-05, "loss": 0.0066, "step": 26740 }, { "epoch": 4.77, "learning_rate": 4.761626248216833e-05, "loss": 0.0052, "step": 26750 }, { "epoch": 4.77, "learning_rate": 4.761537089871612e-05, "loss": 0.0095, "step": 26760 }, { "epoch": 4.77, "learning_rate": 4.7614479315263913e-05, "loss": 0.0061, "step": 26770 }, { "epoch": 4.78, "learning_rate": 4.76135877318117e-05, "loss": 0.0069, "step": 26780 }, { "epoch": 4.78, "learning_rate": 4.761269614835949e-05, "loss": 0.0072, "step": 26790 }, { "epoch": 4.78, "learning_rate": 4.7611804564907274e-05, "loss": 0.0092, "step": 26800 }, { "epoch": 4.78, "learning_rate": 4.761091298145507e-05, "loss": 0.0081, "step": 26810 }, { "epoch": 4.78, "learning_rate": 4.7610021398002856e-05, "loss": 0.0061, "step": 26820 }, { "epoch": 4.78, "learning_rate": 4.760912981455065e-05, "loss": 0.0083, "step": 26830 }, { "epoch": 4.79, "learning_rate": 4.760823823109843e-05, "loss": 0.0071, "step": 26840 }, { "epoch": 4.79, "learning_rate": 4.760734664764622e-05, "loss": 0.0051, "step": 26850 }, { "epoch": 4.79, "learning_rate": 4.7606455064194014e-05, "loss": 0.0086, "step": 26860 }, { "epoch": 4.79, "learning_rate": 4.76055634807418e-05, "loss": 0.0088, "step": 26870 }, { "epoch": 4.79, "learning_rate": 4.760467189728959e-05, "loss": 0.0038, "step": 26880 }, { "epoch": 4.79, "learning_rate": 4.7603780313837374e-05, "loss": 0.0062, "step": 26890 }, { "epoch": 4.8, "learning_rate": 4.7602888730385165e-05, "loss": 0.0052, "step": 26900 }, { "epoch": 4.8, "learning_rate": 4.760199714693295e-05, "loss": 0.0065, "step": 26910 }, { "epoch": 4.8, "learning_rate": 4.760110556348075e-05, "loss": 0.0042, "step": 26920 }, { "epoch": 4.8, "learning_rate": 4.760021398002853e-05, "loss": 0.0064, "step": 26930 }, { "epoch": 4.8, "learning_rate": 4.759932239657632e-05, "loss": 0.0056, "step": 26940 }, { "epoch": 4.81, "learning_rate": 4.7598430813124115e-05, "loss": 0.007, "step": 26950 }, { "epoch": 4.81, "learning_rate": 4.75975392296719e-05, "loss": 0.0079, "step": 26960 }, { "epoch": 4.81, "learning_rate": 4.759664764621969e-05, "loss": 0.0095, "step": 26970 }, { "epoch": 4.81, "learning_rate": 4.7595756062767475e-05, "loss": 0.0045, "step": 26980 }, { "epoch": 4.81, "learning_rate": 4.7594864479315266e-05, "loss": 0.0081, "step": 26990 }, { "epoch": 4.81, "learning_rate": 4.759397289586306e-05, "loss": 0.0054, "step": 27000 }, { "epoch": 4.82, "learning_rate": 4.759308131241084e-05, "loss": 0.0048, "step": 27010 }, { "epoch": 4.82, "learning_rate": 4.759218972895863e-05, "loss": 0.0091, "step": 27020 }, { "epoch": 4.82, "learning_rate": 4.7591298145506424e-05, "loss": 0.0064, "step": 27030 }, { "epoch": 4.82, "learning_rate": 4.7590406562054215e-05, "loss": 0.0034, "step": 27040 }, { "epoch": 4.82, "learning_rate": 4.7589514978602e-05, "loss": 0.0069, "step": 27050 }, { "epoch": 4.83, "learning_rate": 4.758862339514979e-05, "loss": 0.0058, "step": 27060 }, { "epoch": 4.83, "learning_rate": 4.7587731811697575e-05, "loss": 0.0078, "step": 27070 }, { "epoch": 4.83, "learning_rate": 4.7586840228245366e-05, "loss": 0.0044, "step": 27080 }, { "epoch": 4.83, "learning_rate": 4.758594864479316e-05, "loss": 0.0056, "step": 27090 }, { "epoch": 4.83, "learning_rate": 4.758505706134094e-05, "loss": 0.0044, "step": 27100 }, { "epoch": 4.83, "learning_rate": 4.758416547788873e-05, "loss": 0.0052, "step": 27110 }, { "epoch": 4.84, "learning_rate": 4.758327389443652e-05, "loss": 0.0087, "step": 27120 }, { "epoch": 4.84, "learning_rate": 4.758238231098431e-05, "loss": 0.0048, "step": 27130 }, { "epoch": 4.84, "learning_rate": 4.75814907275321e-05, "loss": 0.0092, "step": 27140 }, { "epoch": 4.84, "learning_rate": 4.758059914407989e-05, "loss": 0.0054, "step": 27150 }, { "epoch": 4.84, "learning_rate": 4.7579707560627676e-05, "loss": 0.0053, "step": 27160 }, { "epoch": 4.84, "learning_rate": 4.757881597717547e-05, "loss": 0.0073, "step": 27170 }, { "epoch": 4.85, "learning_rate": 4.757792439372326e-05, "loss": 0.0075, "step": 27180 }, { "epoch": 4.85, "learning_rate": 4.757703281027104e-05, "loss": 0.0104, "step": 27190 }, { "epoch": 4.85, "learning_rate": 4.7576141226818834e-05, "loss": 0.0084, "step": 27200 }, { "epoch": 4.85, "learning_rate": 4.757524964336662e-05, "loss": 0.0062, "step": 27210 }, { "epoch": 4.85, "learning_rate": 4.757435805991441e-05, "loss": 0.0054, "step": 27220 }, { "epoch": 4.86, "learning_rate": 4.75734664764622e-05, "loss": 0.005, "step": 27230 }, { "epoch": 4.86, "learning_rate": 4.7572574893009985e-05, "loss": 0.0044, "step": 27240 }, { "epoch": 4.86, "learning_rate": 4.7571683309557776e-05, "loss": 0.0054, "step": 27250 }, { "epoch": 4.86, "learning_rate": 4.757079172610557e-05, "loss": 0.0105, "step": 27260 }, { "epoch": 4.86, "learning_rate": 4.756990014265336e-05, "loss": 0.0056, "step": 27270 }, { "epoch": 4.86, "learning_rate": 4.756900855920114e-05, "loss": 0.0053, "step": 27280 }, { "epoch": 4.87, "learning_rate": 4.7568116975748934e-05, "loss": 0.0069, "step": 27290 }, { "epoch": 4.87, "learning_rate": 4.756722539229672e-05, "loss": 0.0052, "step": 27300 }, { "epoch": 4.87, "learning_rate": 4.756633380884451e-05, "loss": 0.0042, "step": 27310 }, { "epoch": 4.87, "learning_rate": 4.75654422253923e-05, "loss": 0.0069, "step": 27320 }, { "epoch": 4.87, "learning_rate": 4.7564550641940086e-05, "loss": 0.0044, "step": 27330 }, { "epoch": 4.88, "learning_rate": 4.756365905848788e-05, "loss": 0.0094, "step": 27340 }, { "epoch": 4.88, "learning_rate": 4.756276747503566e-05, "loss": 0.0048, "step": 27350 }, { "epoch": 4.88, "learning_rate": 4.756187589158346e-05, "loss": 0.0083, "step": 27360 }, { "epoch": 4.88, "learning_rate": 4.7560984308131244e-05, "loss": 0.0079, "step": 27370 }, { "epoch": 4.88, "learning_rate": 4.7560092724679035e-05, "loss": 0.0056, "step": 27380 }, { "epoch": 4.88, "learning_rate": 4.755920114122682e-05, "loss": 0.0051, "step": 27390 }, { "epoch": 4.89, "learning_rate": 4.755830955777461e-05, "loss": 0.0098, "step": 27400 }, { "epoch": 4.89, "learning_rate": 4.75574179743224e-05, "loss": 0.007, "step": 27410 }, { "epoch": 4.89, "learning_rate": 4.7556526390870186e-05, "loss": 0.0055, "step": 27420 }, { "epoch": 4.89, "learning_rate": 4.755563480741798e-05, "loss": 0.0063, "step": 27430 }, { "epoch": 4.89, "learning_rate": 4.755474322396576e-05, "loss": 0.0041, "step": 27440 }, { "epoch": 4.89, "learning_rate": 4.755385164051355e-05, "loss": 0.0045, "step": 27450 }, { "epoch": 4.9, "learning_rate": 4.755296005706134e-05, "loss": 0.0072, "step": 27460 }, { "epoch": 4.9, "learning_rate": 4.7552068473609136e-05, "loss": 0.0093, "step": 27470 }, { "epoch": 4.9, "learning_rate": 4.755117689015692e-05, "loss": 0.0062, "step": 27480 }, { "epoch": 4.9, "learning_rate": 4.755028530670471e-05, "loss": 0.0056, "step": 27490 }, { "epoch": 4.9, "learning_rate": 4.75493937232525e-05, "loss": 0.0072, "step": 27500 }, { "epoch": 4.91, "learning_rate": 4.754850213980029e-05, "loss": 0.0088, "step": 27510 }, { "epoch": 4.91, "learning_rate": 4.754761055634808e-05, "loss": 0.006, "step": 27520 }, { "epoch": 4.91, "learning_rate": 4.754671897289586e-05, "loss": 0.0068, "step": 27530 }, { "epoch": 4.91, "learning_rate": 4.7545827389443654e-05, "loss": 0.0053, "step": 27540 }, { "epoch": 4.91, "learning_rate": 4.7544935805991445e-05, "loss": 0.005, "step": 27550 }, { "epoch": 4.91, "learning_rate": 4.754404422253923e-05, "loss": 0.0056, "step": 27560 }, { "epoch": 4.92, "learning_rate": 4.754315263908702e-05, "loss": 0.0081, "step": 27570 }, { "epoch": 4.92, "learning_rate": 4.7542261055634805e-05, "loss": 0.0073, "step": 27580 }, { "epoch": 4.92, "learning_rate": 4.75413694721826e-05, "loss": 0.0072, "step": 27590 }, { "epoch": 4.92, "learning_rate": 4.754047788873039e-05, "loss": 0.0069, "step": 27600 }, { "epoch": 4.92, "learning_rate": 4.753958630527818e-05, "loss": 0.0091, "step": 27610 }, { "epoch": 4.93, "learning_rate": 4.753869472182596e-05, "loss": 0.0064, "step": 27620 }, { "epoch": 4.93, "learning_rate": 4.7537803138373754e-05, "loss": 0.0063, "step": 27630 }, { "epoch": 4.93, "learning_rate": 4.7536911554921545e-05, "loss": 0.0038, "step": 27640 }, { "epoch": 4.93, "learning_rate": 4.753601997146933e-05, "loss": 0.0088, "step": 27650 }, { "epoch": 4.93, "learning_rate": 4.753512838801712e-05, "loss": 0.0055, "step": 27660 }, { "epoch": 4.93, "learning_rate": 4.7534236804564906e-05, "loss": 0.007, "step": 27670 }, { "epoch": 4.94, "learning_rate": 4.75333452211127e-05, "loss": 0.0056, "step": 27680 }, { "epoch": 4.94, "learning_rate": 4.753245363766048e-05, "loss": 0.0053, "step": 27690 }, { "epoch": 4.94, "learning_rate": 4.753156205420828e-05, "loss": 0.008, "step": 27700 }, { "epoch": 4.94, "learning_rate": 4.7530670470756064e-05, "loss": 0.0067, "step": 27710 }, { "epoch": 4.94, "learning_rate": 4.7529778887303855e-05, "loss": 0.0072, "step": 27720 }, { "epoch": 4.94, "learning_rate": 4.7528887303851646e-05, "loss": 0.0063, "step": 27730 }, { "epoch": 4.95, "learning_rate": 4.752799572039943e-05, "loss": 0.011, "step": 27740 }, { "epoch": 4.95, "learning_rate": 4.752710413694722e-05, "loss": 0.0062, "step": 27750 }, { "epoch": 4.95, "learning_rate": 4.7526212553495006e-05, "loss": 0.0055, "step": 27760 }, { "epoch": 4.95, "learning_rate": 4.75253209700428e-05, "loss": 0.0054, "step": 27770 }, { "epoch": 4.95, "learning_rate": 4.752442938659059e-05, "loss": 0.0077, "step": 27780 }, { "epoch": 4.96, "learning_rate": 4.752353780313837e-05, "loss": 0.0058, "step": 27790 }, { "epoch": 4.96, "learning_rate": 4.7522646219686164e-05, "loss": 0.005, "step": 27800 }, { "epoch": 4.96, "learning_rate": 4.7521754636233955e-05, "loss": 0.0054, "step": 27810 }, { "epoch": 4.96, "learning_rate": 4.7520863052781747e-05, "loss": 0.0034, "step": 27820 }, { "epoch": 4.96, "learning_rate": 4.751997146932953e-05, "loss": 0.0051, "step": 27830 }, { "epoch": 4.96, "learning_rate": 4.751907988587732e-05, "loss": 0.0051, "step": 27840 }, { "epoch": 4.97, "learning_rate": 4.751818830242511e-05, "loss": 0.0038, "step": 27850 }, { "epoch": 4.97, "learning_rate": 4.75172967189729e-05, "loss": 0.0089, "step": 27860 }, { "epoch": 4.97, "learning_rate": 4.751640513552069e-05, "loss": 0.0043, "step": 27870 }, { "epoch": 4.97, "learning_rate": 4.7515513552068474e-05, "loss": 0.0051, "step": 27880 }, { "epoch": 4.97, "learning_rate": 4.7514621968616265e-05, "loss": 0.0083, "step": 27890 }, { "epoch": 4.98, "learning_rate": 4.751373038516405e-05, "loss": 0.0093, "step": 27900 }, { "epoch": 4.98, "learning_rate": 4.751283880171184e-05, "loss": 0.0075, "step": 27910 }, { "epoch": 4.98, "learning_rate": 4.751194721825963e-05, "loss": 0.0052, "step": 27920 }, { "epoch": 4.98, "learning_rate": 4.751105563480742e-05, "loss": 0.0096, "step": 27930 }, { "epoch": 4.98, "learning_rate": 4.751016405135521e-05, "loss": 0.0079, "step": 27940 }, { "epoch": 4.98, "learning_rate": 4.7509272467903e-05, "loss": 0.005, "step": 27950 }, { "epoch": 4.99, "learning_rate": 4.750838088445079e-05, "loss": 0.0074, "step": 27960 }, { "epoch": 4.99, "learning_rate": 4.7507489300998574e-05, "loss": 0.0042, "step": 27970 }, { "epoch": 4.99, "learning_rate": 4.7506597717546365e-05, "loss": 0.008, "step": 27980 }, { "epoch": 4.99, "learning_rate": 4.750570613409415e-05, "loss": 0.0048, "step": 27990 }, { "epoch": 4.99, "learning_rate": 4.750481455064194e-05, "loss": 0.0077, "step": 28000 }, { "epoch": 4.99, "learning_rate": 4.750392296718973e-05, "loss": 0.0074, "step": 28010 }, { "epoch": 5.0, "learning_rate": 4.7503031383737517e-05, "loss": 0.007, "step": 28020 }, { "epoch": 5.0, "learning_rate": 4.7502139800285315e-05, "loss": 0.0052, "step": 28030 }, { "epoch": 5.0, "learning_rate": 4.75012482168331e-05, "loss": 0.0043, "step": 28040 }, { "epoch": 5.0, "eval_loss": 0.013962038792669773, "eval_runtime": 195.5702, "eval_samples_per_second": 23.72, "eval_steps_per_second": 2.966, "step": 28040 }, { "epoch": 5.0, "learning_rate": 4.750035663338089e-05, "loss": 0.005, "step": 28050 }, { "epoch": 5.0, "learning_rate": 4.7499465049928675e-05, "loss": 0.005, "step": 28060 }, { "epoch": 5.01, "learning_rate": 4.7498573466476466e-05, "loss": 0.0057, "step": 28070 }, { "epoch": 5.01, "learning_rate": 4.749768188302425e-05, "loss": 0.0064, "step": 28080 }, { "epoch": 5.01, "learning_rate": 4.749679029957204e-05, "loss": 0.0053, "step": 28090 }, { "epoch": 5.01, "learning_rate": 4.749589871611983e-05, "loss": 0.0059, "step": 28100 }, { "epoch": 5.01, "learning_rate": 4.749500713266762e-05, "loss": 0.0045, "step": 28110 }, { "epoch": 5.01, "learning_rate": 4.749411554921541e-05, "loss": 0.004, "step": 28120 }, { "epoch": 5.02, "learning_rate": 4.749322396576319e-05, "loss": 0.0062, "step": 28130 }, { "epoch": 5.02, "learning_rate": 4.749233238231099e-05, "loss": 0.0084, "step": 28140 }, { "epoch": 5.02, "learning_rate": 4.7491440798858775e-05, "loss": 0.0058, "step": 28150 }, { "epoch": 5.02, "learning_rate": 4.7490549215406566e-05, "loss": 0.0061, "step": 28160 }, { "epoch": 5.02, "learning_rate": 4.748965763195435e-05, "loss": 0.0061, "step": 28170 }, { "epoch": 5.02, "learning_rate": 4.748876604850214e-05, "loss": 0.0093, "step": 28180 }, { "epoch": 5.03, "learning_rate": 4.748787446504993e-05, "loss": 0.0031, "step": 28190 }, { "epoch": 5.03, "learning_rate": 4.748698288159772e-05, "loss": 0.0065, "step": 28200 }, { "epoch": 5.03, "learning_rate": 4.748609129814551e-05, "loss": 0.0066, "step": 28210 }, { "epoch": 5.03, "learning_rate": 4.748519971469329e-05, "loss": 0.0061, "step": 28220 }, { "epoch": 5.03, "learning_rate": 4.7484308131241085e-05, "loss": 0.0046, "step": 28230 }, { "epoch": 5.04, "learning_rate": 4.7483416547788876e-05, "loss": 0.0051, "step": 28240 }, { "epoch": 5.04, "learning_rate": 4.748252496433667e-05, "loss": 0.0073, "step": 28250 }, { "epoch": 5.04, "learning_rate": 4.748163338088446e-05, "loss": 0.0104, "step": 28260 }, { "epoch": 5.04, "learning_rate": 4.748074179743224e-05, "loss": 0.0056, "step": 28270 }, { "epoch": 5.04, "learning_rate": 4.7479850213980034e-05, "loss": 0.0079, "step": 28280 }, { "epoch": 5.04, "learning_rate": 4.747895863052782e-05, "loss": 0.0082, "step": 28290 }, { "epoch": 5.05, "learning_rate": 4.747806704707561e-05, "loss": 0.0134, "step": 28300 }, { "epoch": 5.05, "learning_rate": 4.7477175463623394e-05, "loss": 0.0038, "step": 28310 }, { "epoch": 5.05, "learning_rate": 4.7476283880171185e-05, "loss": 0.0054, "step": 28320 }, { "epoch": 5.05, "learning_rate": 4.7475392296718976e-05, "loss": 0.0073, "step": 28330 }, { "epoch": 5.05, "learning_rate": 4.747450071326676e-05, "loss": 0.0069, "step": 28340 }, { "epoch": 5.06, "learning_rate": 4.747360912981455e-05, "loss": 0.0061, "step": 28350 }, { "epoch": 5.06, "learning_rate": 4.747271754636234e-05, "loss": 0.0028, "step": 28360 }, { "epoch": 5.06, "learning_rate": 4.7471825962910134e-05, "loss": 0.0078, "step": 28370 }, { "epoch": 5.06, "learning_rate": 4.747093437945792e-05, "loss": 0.0066, "step": 28380 }, { "epoch": 5.06, "learning_rate": 4.747004279600571e-05, "loss": 0.0068, "step": 28390 }, { "epoch": 5.06, "learning_rate": 4.7469151212553494e-05, "loss": 0.0039, "step": 28400 }, { "epoch": 5.07, "learning_rate": 4.7468259629101286e-05, "loss": 0.0066, "step": 28410 }, { "epoch": 5.07, "learning_rate": 4.746736804564908e-05, "loss": 0.0039, "step": 28420 }, { "epoch": 5.07, "learning_rate": 4.746647646219686e-05, "loss": 0.0042, "step": 28430 }, { "epoch": 5.07, "learning_rate": 4.746558487874465e-05, "loss": 0.0066, "step": 28440 }, { "epoch": 5.07, "learning_rate": 4.746469329529244e-05, "loss": 0.0058, "step": 28450 }, { "epoch": 5.07, "learning_rate": 4.746380171184023e-05, "loss": 0.0071, "step": 28460 }, { "epoch": 5.08, "learning_rate": 4.746291012838802e-05, "loss": 0.0056, "step": 28470 }, { "epoch": 5.08, "learning_rate": 4.746201854493581e-05, "loss": 0.0062, "step": 28480 }, { "epoch": 5.08, "learning_rate": 4.74611269614836e-05, "loss": 0.004, "step": 28490 }, { "epoch": 5.08, "learning_rate": 4.7460235378031386e-05, "loss": 0.0054, "step": 28500 }, { "epoch": 5.08, "learning_rate": 4.745934379457918e-05, "loss": 0.0096, "step": 28510 }, { "epoch": 5.09, "learning_rate": 4.745845221112696e-05, "loss": 0.0098, "step": 28520 }, { "epoch": 5.09, "learning_rate": 4.745756062767475e-05, "loss": 0.0065, "step": 28530 }, { "epoch": 5.09, "learning_rate": 4.745666904422254e-05, "loss": 0.009, "step": 28540 }, { "epoch": 5.09, "learning_rate": 4.745577746077033e-05, "loss": 0.0062, "step": 28550 }, { "epoch": 5.09, "learning_rate": 4.745488587731812e-05, "loss": 0.0058, "step": 28560 }, { "epoch": 5.09, "learning_rate": 4.7453994293865904e-05, "loss": 0.0052, "step": 28570 }, { "epoch": 5.1, "learning_rate": 4.74531027104137e-05, "loss": 0.0091, "step": 28580 }, { "epoch": 5.1, "learning_rate": 4.745221112696149e-05, "loss": 0.0049, "step": 28590 }, { "epoch": 5.1, "learning_rate": 4.745131954350928e-05, "loss": 0.0064, "step": 28600 }, { "epoch": 5.1, "learning_rate": 4.745042796005706e-05, "loss": 0.0064, "step": 28610 }, { "epoch": 5.1, "learning_rate": 4.7449536376604854e-05, "loss": 0.007, "step": 28620 }, { "epoch": 5.11, "learning_rate": 4.744864479315264e-05, "loss": 0.0041, "step": 28630 }, { "epoch": 5.11, "learning_rate": 4.744775320970043e-05, "loss": 0.0051, "step": 28640 }, { "epoch": 5.11, "learning_rate": 4.744686162624822e-05, "loss": 0.0096, "step": 28650 }, { "epoch": 5.11, "learning_rate": 4.7445970042796005e-05, "loss": 0.0054, "step": 28660 }, { "epoch": 5.11, "learning_rate": 4.7445078459343796e-05, "loss": 0.0052, "step": 28670 }, { "epoch": 5.11, "learning_rate": 4.744418687589158e-05, "loss": 0.0067, "step": 28680 }, { "epoch": 5.12, "learning_rate": 4.744329529243938e-05, "loss": 0.0068, "step": 28690 }, { "epoch": 5.12, "learning_rate": 4.744240370898716e-05, "loss": 0.0079, "step": 28700 }, { "epoch": 5.12, "learning_rate": 4.7441512125534954e-05, "loss": 0.0036, "step": 28710 }, { "epoch": 5.12, "learning_rate": 4.7440620542082745e-05, "loss": 0.0069, "step": 28720 }, { "epoch": 5.12, "learning_rate": 4.743972895863053e-05, "loss": 0.0046, "step": 28730 }, { "epoch": 5.12, "learning_rate": 4.743883737517832e-05, "loss": 0.0052, "step": 28740 }, { "epoch": 5.13, "learning_rate": 4.7437945791726105e-05, "loss": 0.0054, "step": 28750 }, { "epoch": 5.13, "learning_rate": 4.74370542082739e-05, "loss": 0.0052, "step": 28760 }, { "epoch": 5.13, "learning_rate": 4.743616262482168e-05, "loss": 0.0056, "step": 28770 }, { "epoch": 5.13, "learning_rate": 4.743527104136947e-05, "loss": 0.0047, "step": 28780 }, { "epoch": 5.13, "learning_rate": 4.7434379457917264e-05, "loss": 0.0055, "step": 28790 }, { "epoch": 5.14, "learning_rate": 4.7433487874465055e-05, "loss": 0.0061, "step": 28800 }, { "epoch": 5.14, "learning_rate": 4.7432596291012846e-05, "loss": 0.0051, "step": 28810 }, { "epoch": 5.14, "learning_rate": 4.743170470756063e-05, "loss": 0.0052, "step": 28820 }, { "epoch": 5.14, "learning_rate": 4.743081312410842e-05, "loss": 0.0098, "step": 28830 }, { "epoch": 5.14, "learning_rate": 4.7429921540656206e-05, "loss": 0.0077, "step": 28840 }, { "epoch": 5.14, "learning_rate": 4.7429029957204e-05, "loss": 0.0063, "step": 28850 }, { "epoch": 5.15, "learning_rate": 4.742813837375178e-05, "loss": 0.0064, "step": 28860 }, { "epoch": 5.15, "learning_rate": 4.742724679029957e-05, "loss": 0.0081, "step": 28870 }, { "epoch": 5.15, "learning_rate": 4.7426355206847364e-05, "loss": 0.0048, "step": 28880 }, { "epoch": 5.15, "learning_rate": 4.742546362339515e-05, "loss": 0.0079, "step": 28890 }, { "epoch": 5.15, "learning_rate": 4.742457203994294e-05, "loss": 0.0081, "step": 28900 }, { "epoch": 5.16, "learning_rate": 4.742368045649073e-05, "loss": 0.0064, "step": 28910 }, { "epoch": 5.16, "learning_rate": 4.742278887303852e-05, "loss": 0.0041, "step": 28920 }, { "epoch": 5.16, "learning_rate": 4.742189728958631e-05, "loss": 0.0072, "step": 28930 }, { "epoch": 5.16, "learning_rate": 4.74210057061341e-05, "loss": 0.0058, "step": 28940 }, { "epoch": 5.16, "learning_rate": 4.742011412268189e-05, "loss": 0.0041, "step": 28950 }, { "epoch": 5.16, "learning_rate": 4.7419222539229673e-05, "loss": 0.0047, "step": 28960 }, { "epoch": 5.17, "learning_rate": 4.7418330955777465e-05, "loss": 0.0037, "step": 28970 }, { "epoch": 5.17, "learning_rate": 4.741743937232525e-05, "loss": 0.0025, "step": 28980 }, { "epoch": 5.17, "learning_rate": 4.741654778887304e-05, "loss": 0.0024, "step": 28990 }, { "epoch": 5.17, "learning_rate": 4.7415656205420825e-05, "loss": 0.0045, "step": 29000 }, { "epoch": 5.17, "learning_rate": 4.7414764621968616e-05, "loss": 0.0066, "step": 29010 }, { "epoch": 5.17, "learning_rate": 4.741387303851641e-05, "loss": 0.0066, "step": 29020 }, { "epoch": 5.18, "learning_rate": 4.74129814550642e-05, "loss": 0.006, "step": 29030 }, { "epoch": 5.18, "learning_rate": 4.741208987161199e-05, "loss": 0.009, "step": 29040 }, { "epoch": 5.18, "learning_rate": 4.7411198288159774e-05, "loss": 0.0073, "step": 29050 }, { "epoch": 5.18, "learning_rate": 4.7410306704707565e-05, "loss": 0.0093, "step": 29060 }, { "epoch": 5.18, "learning_rate": 4.740941512125535e-05, "loss": 0.0075, "step": 29070 }, { "epoch": 5.19, "learning_rate": 4.740852353780314e-05, "loss": 0.0042, "step": 29080 }, { "epoch": 5.19, "learning_rate": 4.7407631954350925e-05, "loss": 0.0072, "step": 29090 }, { "epoch": 5.19, "learning_rate": 4.740682952924394e-05, "loss": 0.0058, "step": 29100 }, { "epoch": 5.19, "learning_rate": 4.740593794579173e-05, "loss": 0.0073, "step": 29110 }, { "epoch": 5.19, "learning_rate": 4.7405046362339516e-05, "loss": 0.0061, "step": 29120 }, { "epoch": 5.19, "learning_rate": 4.740415477888731e-05, "loss": 0.0041, "step": 29130 }, { "epoch": 5.2, "learning_rate": 4.740326319543509e-05, "loss": 0.0075, "step": 29140 }, { "epoch": 5.2, "learning_rate": 4.740237161198288e-05, "loss": 0.0052, "step": 29150 }, { "epoch": 5.2, "learning_rate": 4.740148002853067e-05, "loss": 0.0052, "step": 29160 }, { "epoch": 5.2, "learning_rate": 4.7400588445078465e-05, "loss": 0.0048, "step": 29170 }, { "epoch": 5.2, "learning_rate": 4.739969686162625e-05, "loss": 0.0057, "step": 29180 }, { "epoch": 5.21, "learning_rate": 4.739880527817404e-05, "loss": 0.0052, "step": 29190 }, { "epoch": 5.21, "learning_rate": 4.739791369472183e-05, "loss": 0.0071, "step": 29200 }, { "epoch": 5.21, "learning_rate": 4.7397022111269616e-05, "loss": 0.0078, "step": 29210 }, { "epoch": 5.21, "learning_rate": 4.739613052781741e-05, "loss": 0.0045, "step": 29220 }, { "epoch": 5.21, "learning_rate": 4.739523894436519e-05, "loss": 0.0036, "step": 29230 }, { "epoch": 5.21, "learning_rate": 4.739434736091298e-05, "loss": 0.004, "step": 29240 }, { "epoch": 5.22, "learning_rate": 4.7393455777460774e-05, "loss": 0.0036, "step": 29250 }, { "epoch": 5.22, "learning_rate": 4.739256419400856e-05, "loss": 0.0077, "step": 29260 }, { "epoch": 5.22, "learning_rate": 4.739167261055635e-05, "loss": 0.0044, "step": 29270 }, { "epoch": 5.22, "learning_rate": 4.739078102710414e-05, "loss": 0.0051, "step": 29280 }, { "epoch": 5.22, "learning_rate": 4.738988944365193e-05, "loss": 0.0086, "step": 29290 }, { "epoch": 5.22, "learning_rate": 4.738899786019972e-05, "loss": 0.0075, "step": 29300 }, { "epoch": 5.23, "learning_rate": 4.738810627674751e-05, "loss": 0.0097, "step": 29310 }, { "epoch": 5.23, "learning_rate": 4.738721469329529e-05, "loss": 0.0056, "step": 29320 }, { "epoch": 5.23, "learning_rate": 4.7386323109843084e-05, "loss": 0.0045, "step": 29330 }, { "epoch": 5.23, "learning_rate": 4.7385431526390875e-05, "loss": 0.0047, "step": 29340 }, { "epoch": 5.23, "learning_rate": 4.738453994293866e-05, "loss": 0.0045, "step": 29350 }, { "epoch": 5.24, "learning_rate": 4.738364835948645e-05, "loss": 0.0049, "step": 29360 }, { "epoch": 5.24, "learning_rate": 4.7382756776034235e-05, "loss": 0.0037, "step": 29370 }, { "epoch": 5.24, "learning_rate": 4.7381865192582026e-05, "loss": 0.0038, "step": 29380 }, { "epoch": 5.24, "learning_rate": 4.738097360912982e-05, "loss": 0.0069, "step": 29390 }, { "epoch": 5.24, "learning_rate": 4.738008202567761e-05, "loss": 0.006, "step": 29400 }, { "epoch": 5.24, "learning_rate": 4.737919044222539e-05, "loss": 0.0055, "step": 29410 }, { "epoch": 5.25, "learning_rate": 4.7378298858773184e-05, "loss": 0.007, "step": 29420 }, { "epoch": 5.25, "learning_rate": 4.7377407275320976e-05, "loss": 0.0057, "step": 29430 }, { "epoch": 5.25, "learning_rate": 4.737651569186876e-05, "loss": 0.0053, "step": 29440 }, { "epoch": 5.25, "learning_rate": 4.737562410841655e-05, "loss": 0.0058, "step": 29450 }, { "epoch": 5.25, "learning_rate": 4.7374732524964336e-05, "loss": 0.0058, "step": 29460 }, { "epoch": 5.25, "learning_rate": 4.737384094151213e-05, "loss": 0.0048, "step": 29470 }, { "epoch": 5.26, "learning_rate": 4.737294935805992e-05, "loss": 0.0046, "step": 29480 }, { "epoch": 5.26, "learning_rate": 4.73720577746077e-05, "loss": 0.0083, "step": 29490 }, { "epoch": 5.26, "learning_rate": 4.73711661911555e-05, "loss": 0.0038, "step": 29500 }, { "epoch": 5.26, "learning_rate": 4.7370274607703285e-05, "loss": 0.0039, "step": 29510 }, { "epoch": 5.26, "learning_rate": 4.7369383024251076e-05, "loss": 0.0029, "step": 29520 }, { "epoch": 5.27, "learning_rate": 4.736849144079886e-05, "loss": 0.0059, "step": 29530 }, { "epoch": 5.27, "learning_rate": 4.736759985734665e-05, "loss": 0.0065, "step": 29540 }, { "epoch": 5.27, "learning_rate": 4.7366708273894436e-05, "loss": 0.0066, "step": 29550 }, { "epoch": 5.27, "learning_rate": 4.736581669044223e-05, "loss": 0.0074, "step": 29560 }, { "epoch": 5.27, "learning_rate": 4.736492510699002e-05, "loss": 0.0059, "step": 29570 }, { "epoch": 5.27, "learning_rate": 4.73640335235378e-05, "loss": 0.0049, "step": 29580 }, { "epoch": 5.28, "learning_rate": 4.7363141940085594e-05, "loss": 0.0053, "step": 29590 }, { "epoch": 5.28, "learning_rate": 4.736225035663338e-05, "loss": 0.006, "step": 29600 }, { "epoch": 5.28, "learning_rate": 4.736135877318118e-05, "loss": 0.0073, "step": 29610 }, { "epoch": 5.28, "learning_rate": 4.736046718972896e-05, "loss": 0.01, "step": 29620 }, { "epoch": 5.28, "learning_rate": 4.735957560627675e-05, "loss": 0.006, "step": 29630 }, { "epoch": 5.29, "learning_rate": 4.735868402282454e-05, "loss": 0.005, "step": 29640 }, { "epoch": 5.29, "learning_rate": 4.735779243937233e-05, "loss": 0.0052, "step": 29650 }, { "epoch": 5.29, "learning_rate": 4.735690085592012e-05, "loss": 0.0048, "step": 29660 }, { "epoch": 5.29, "learning_rate": 4.7356009272467904e-05, "loss": 0.0048, "step": 29670 }, { "epoch": 5.29, "learning_rate": 4.7355117689015695e-05, "loss": 0.0068, "step": 29680 }, { "epoch": 5.29, "learning_rate": 4.735422610556348e-05, "loss": 0.0052, "step": 29690 }, { "epoch": 5.3, "learning_rate": 4.735333452211127e-05, "loss": 0.0056, "step": 29700 }, { "epoch": 5.3, "learning_rate": 4.735244293865906e-05, "loss": 0.0055, "step": 29710 }, { "epoch": 5.3, "learning_rate": 4.735155135520685e-05, "loss": 0.0064, "step": 29720 }, { "epoch": 5.3, "learning_rate": 4.7350659771754644e-05, "loss": 0.0048, "step": 29730 }, { "epoch": 5.3, "learning_rate": 4.734976818830243e-05, "loss": 0.0036, "step": 29740 }, { "epoch": 5.3, "learning_rate": 4.734887660485022e-05, "loss": 0.0077, "step": 29750 }, { "epoch": 5.31, "learning_rate": 4.7347985021398004e-05, "loss": 0.0035, "step": 29760 }, { "epoch": 5.31, "learning_rate": 4.7347093437945795e-05, "loss": 0.0053, "step": 29770 }, { "epoch": 5.31, "learning_rate": 4.734620185449358e-05, "loss": 0.0056, "step": 29780 }, { "epoch": 5.31, "learning_rate": 4.734531027104137e-05, "loss": 0.008, "step": 29790 }, { "epoch": 5.31, "learning_rate": 4.734441868758916e-05, "loss": 0.0067, "step": 29800 }, { "epoch": 5.32, "learning_rate": 4.734352710413695e-05, "loss": 0.0129, "step": 29810 }, { "epoch": 5.32, "learning_rate": 4.734263552068474e-05, "loss": 0.0057, "step": 29820 }, { "epoch": 5.32, "learning_rate": 4.734174393723253e-05, "loss": 0.0043, "step": 29830 }, { "epoch": 5.32, "learning_rate": 4.734085235378032e-05, "loss": 0.0033, "step": 29840 }, { "epoch": 5.32, "learning_rate": 4.7339960770328105e-05, "loss": 0.007, "step": 29850 }, { "epoch": 5.32, "learning_rate": 4.7339069186875896e-05, "loss": 0.0059, "step": 29860 }, { "epoch": 5.33, "learning_rate": 4.733817760342368e-05, "loss": 0.0036, "step": 29870 }, { "epoch": 5.33, "learning_rate": 4.733728601997147e-05, "loss": 0.0043, "step": 29880 }, { "epoch": 5.33, "learning_rate": 4.733639443651926e-05, "loss": 0.0047, "step": 29890 }, { "epoch": 5.33, "learning_rate": 4.733550285306705e-05, "loss": 0.004, "step": 29900 }, { "epoch": 5.33, "learning_rate": 4.733461126961484e-05, "loss": 0.0061, "step": 29910 }, { "epoch": 5.34, "learning_rate": 4.733371968616262e-05, "loss": 0.0068, "step": 29920 }, { "epoch": 5.34, "learning_rate": 4.7332828102710414e-05, "loss": 0.0075, "step": 29930 }, { "epoch": 5.34, "learning_rate": 4.7331936519258205e-05, "loss": 0.0062, "step": 29940 }, { "epoch": 5.34, "learning_rate": 4.7331044935805996e-05, "loss": 0.0054, "step": 29950 }, { "epoch": 5.34, "learning_rate": 4.733015335235379e-05, "loss": 0.004, "step": 29960 }, { "epoch": 5.34, "learning_rate": 4.732926176890157e-05, "loss": 0.0058, "step": 29970 }, { "epoch": 5.35, "learning_rate": 4.732837018544936e-05, "loss": 0.0095, "step": 29980 }, { "epoch": 5.35, "learning_rate": 4.732747860199715e-05, "loss": 0.0025, "step": 29990 }, { "epoch": 5.35, "learning_rate": 4.732658701854494e-05, "loss": 0.0052, "step": 30000 }, { "epoch": 5.35, "learning_rate": 4.732569543509272e-05, "loss": 0.0053, "step": 30010 }, { "epoch": 5.35, "learning_rate": 4.7324803851640515e-05, "loss": 0.009, "step": 30020 }, { "epoch": 5.35, "learning_rate": 4.7323912268188306e-05, "loss": 0.0081, "step": 30030 }, { "epoch": 5.36, "learning_rate": 4.732302068473609e-05, "loss": 0.0058, "step": 30040 }, { "epoch": 5.36, "learning_rate": 4.732212910128389e-05, "loss": 0.0052, "step": 30050 }, { "epoch": 5.36, "learning_rate": 4.732123751783167e-05, "loss": 0.0065, "step": 30060 }, { "epoch": 5.36, "learning_rate": 4.7320345934379464e-05, "loss": 0.0043, "step": 30070 }, { "epoch": 5.36, "learning_rate": 4.731945435092725e-05, "loss": 0.0089, "step": 30080 }, { "epoch": 5.37, "learning_rate": 4.731856276747504e-05, "loss": 0.0031, "step": 30090 }, { "epoch": 5.37, "learning_rate": 4.7317671184022824e-05, "loss": 0.0059, "step": 30100 }, { "epoch": 5.37, "learning_rate": 4.7316779600570615e-05, "loss": 0.0055, "step": 30110 }, { "epoch": 5.37, "learning_rate": 4.7315888017118406e-05, "loss": 0.0067, "step": 30120 }, { "epoch": 5.37, "learning_rate": 4.731499643366619e-05, "loss": 0.0066, "step": 30130 }, { "epoch": 5.37, "learning_rate": 4.731410485021398e-05, "loss": 0.0046, "step": 30140 }, { "epoch": 5.38, "learning_rate": 4.7313213266761766e-05, "loss": 0.0035, "step": 30150 }, { "epoch": 5.38, "learning_rate": 4.731232168330956e-05, "loss": 0.0046, "step": 30160 }, { "epoch": 5.38, "learning_rate": 4.731143009985735e-05, "loss": 0.0061, "step": 30170 }, { "epoch": 5.38, "learning_rate": 4.731053851640514e-05, "loss": 0.0057, "step": 30180 }, { "epoch": 5.38, "learning_rate": 4.730964693295293e-05, "loss": 0.0066, "step": 30190 }, { "epoch": 5.39, "learning_rate": 4.7308755349500716e-05, "loss": 0.006, "step": 30200 }, { "epoch": 5.39, "learning_rate": 4.730786376604851e-05, "loss": 0.0116, "step": 30210 }, { "epoch": 5.39, "learning_rate": 4.730697218259629e-05, "loss": 0.0072, "step": 30220 }, { "epoch": 5.39, "learning_rate": 4.730608059914408e-05, "loss": 0.0061, "step": 30230 }, { "epoch": 5.39, "learning_rate": 4.730518901569187e-05, "loss": 0.0049, "step": 30240 }, { "epoch": 5.39, "learning_rate": 4.730429743223966e-05, "loss": 0.0051, "step": 30250 }, { "epoch": 5.4, "learning_rate": 4.730340584878745e-05, "loss": 0.0045, "step": 30260 }, { "epoch": 5.4, "learning_rate": 4.7302514265335234e-05, "loss": 0.0062, "step": 30270 }, { "epoch": 5.4, "learning_rate": 4.730162268188303e-05, "loss": 0.0075, "step": 30280 }, { "epoch": 5.4, "learning_rate": 4.7300731098430816e-05, "loss": 0.0055, "step": 30290 }, { "epoch": 5.4, "learning_rate": 4.729983951497861e-05, "loss": 0.008, "step": 30300 }, { "epoch": 5.4, "learning_rate": 4.729894793152639e-05, "loss": 0.0085, "step": 30310 }, { "epoch": 5.41, "learning_rate": 4.729805634807418e-05, "loss": 0.0064, "step": 30320 }, { "epoch": 5.41, "learning_rate": 4.729716476462197e-05, "loss": 0.0068, "step": 30330 }, { "epoch": 5.41, "learning_rate": 4.729627318116976e-05, "loss": 0.0041, "step": 30340 }, { "epoch": 5.41, "learning_rate": 4.729538159771755e-05, "loss": 0.0038, "step": 30350 }, { "epoch": 5.41, "learning_rate": 4.7294490014265334e-05, "loss": 0.0053, "step": 30360 }, { "epoch": 5.42, "learning_rate": 4.7293598430813126e-05, "loss": 0.006, "step": 30370 }, { "epoch": 5.42, "learning_rate": 4.729270684736091e-05, "loss": 0.0088, "step": 30380 }, { "epoch": 5.42, "learning_rate": 4.729181526390871e-05, "loss": 0.0067, "step": 30390 }, { "epoch": 5.42, "learning_rate": 4.729092368045649e-05, "loss": 0.0067, "step": 30400 }, { "epoch": 5.42, "learning_rate": 4.7290032097004284e-05, "loss": 0.0085, "step": 30410 }, { "epoch": 5.42, "learning_rate": 4.7289140513552075e-05, "loss": 0.005, "step": 30420 }, { "epoch": 5.43, "learning_rate": 4.728824893009986e-05, "loss": 0.0046, "step": 30430 }, { "epoch": 5.43, "learning_rate": 4.728735734664765e-05, "loss": 0.0079, "step": 30440 }, { "epoch": 5.43, "learning_rate": 4.7286465763195435e-05, "loss": 0.009, "step": 30450 }, { "epoch": 5.43, "learning_rate": 4.7285574179743226e-05, "loss": 0.0062, "step": 30460 }, { "epoch": 5.43, "learning_rate": 4.728468259629101e-05, "loss": 0.006, "step": 30470 }, { "epoch": 5.44, "learning_rate": 4.72837910128388e-05, "loss": 0.0048, "step": 30480 }, { "epoch": 5.44, "learning_rate": 4.728289942938659e-05, "loss": 0.0073, "step": 30490 }, { "epoch": 5.44, "learning_rate": 4.7282007845934384e-05, "loss": 0.0054, "step": 30500 }, { "epoch": 5.44, "learning_rate": 4.7281116262482175e-05, "loss": 0.0076, "step": 30510 }, { "epoch": 5.44, "learning_rate": 4.728022467902996e-05, "loss": 0.0055, "step": 30520 }, { "epoch": 5.44, "learning_rate": 4.727933309557775e-05, "loss": 0.0038, "step": 30530 }, { "epoch": 5.45, "learning_rate": 4.7278441512125536e-05, "loss": 0.0074, "step": 30540 }, { "epoch": 5.45, "learning_rate": 4.727754992867333e-05, "loss": 0.0049, "step": 30550 }, { "epoch": 5.45, "learning_rate": 4.727665834522111e-05, "loss": 0.0046, "step": 30560 }, { "epoch": 5.45, "learning_rate": 4.72757667617689e-05, "loss": 0.0069, "step": 30570 }, { "epoch": 5.45, "learning_rate": 4.7274875178316694e-05, "loss": 0.0098, "step": 30580 }, { "epoch": 5.45, "learning_rate": 4.727398359486448e-05, "loss": 0.0041, "step": 30590 }, { "epoch": 5.46, "learning_rate": 4.727309201141227e-05, "loss": 0.0066, "step": 30600 }, { "epoch": 5.46, "learning_rate": 4.727220042796006e-05, "loss": 0.0042, "step": 30610 }, { "epoch": 5.46, "learning_rate": 4.727130884450785e-05, "loss": 0.0084, "step": 30620 }, { "epoch": 5.46, "learning_rate": 4.7270417261055636e-05, "loss": 0.0077, "step": 30630 }, { "epoch": 5.46, "learning_rate": 4.726952567760343e-05, "loss": 0.0068, "step": 30640 }, { "epoch": 5.47, "learning_rate": 4.726863409415121e-05, "loss": 0.0054, "step": 30650 }, { "epoch": 5.47, "learning_rate": 4.7267742510699e-05, "loss": 0.0082, "step": 30660 }, { "epoch": 5.47, "learning_rate": 4.7266850927246794e-05, "loss": 0.0065, "step": 30670 }, { "epoch": 5.47, "learning_rate": 4.726595934379458e-05, "loss": 0.0065, "step": 30680 }, { "epoch": 5.47, "learning_rate": 4.726506776034237e-05, "loss": 0.0041, "step": 30690 }, { "epoch": 5.47, "learning_rate": 4.7264176176890154e-05, "loss": 0.0025, "step": 30700 }, { "epoch": 5.48, "learning_rate": 4.7263284593437945e-05, "loss": 0.0041, "step": 30710 }, { "epoch": 5.48, "learning_rate": 4.726239300998574e-05, "loss": 0.0054, "step": 30720 }, { "epoch": 5.48, "learning_rate": 4.726150142653353e-05, "loss": 0.0052, "step": 30730 }, { "epoch": 5.48, "learning_rate": 4.726060984308132e-05, "loss": 0.0034, "step": 30740 }, { "epoch": 5.48, "learning_rate": 4.7259718259629104e-05, "loss": 0.0057, "step": 30750 }, { "epoch": 5.49, "learning_rate": 4.7258826676176895e-05, "loss": 0.0059, "step": 30760 }, { "epoch": 5.49, "learning_rate": 4.725793509272468e-05, "loss": 0.011, "step": 30770 }, { "epoch": 5.49, "learning_rate": 4.725704350927247e-05, "loss": 0.0047, "step": 30780 }, { "epoch": 5.49, "learning_rate": 4.7256151925820255e-05, "loss": 0.0055, "step": 30790 }, { "epoch": 5.49, "learning_rate": 4.7255260342368046e-05, "loss": 0.006, "step": 30800 }, { "epoch": 5.49, "learning_rate": 4.725436875891584e-05, "loss": 0.0068, "step": 30810 }, { "epoch": 5.5, "learning_rate": 4.725347717546362e-05, "loss": 0.006, "step": 30820 }, { "epoch": 5.5, "learning_rate": 4.725258559201142e-05, "loss": 0.0069, "step": 30830 }, { "epoch": 5.5, "learning_rate": 4.7251694008559204e-05, "loss": 0.0045, "step": 30840 }, { "epoch": 5.5, "learning_rate": 4.7250802425106995e-05, "loss": 0.0049, "step": 30850 }, { "epoch": 5.5, "learning_rate": 4.724991084165478e-05, "loss": 0.0061, "step": 30860 }, { "epoch": 5.5, "learning_rate": 4.724901925820257e-05, "loss": 0.0052, "step": 30870 }, { "epoch": 5.51, "learning_rate": 4.7248127674750355e-05, "loss": 0.0054, "step": 30880 }, { "epoch": 5.51, "learning_rate": 4.7247236091298147e-05, "loss": 0.0053, "step": 30890 }, { "epoch": 5.51, "learning_rate": 4.724634450784594e-05, "loss": 0.0111, "step": 30900 }, { "epoch": 5.51, "learning_rate": 4.724545292439372e-05, "loss": 0.007, "step": 30910 }, { "epoch": 5.51, "learning_rate": 4.7244561340941513e-05, "loss": 0.0057, "step": 30920 }, { "epoch": 5.52, "learning_rate": 4.72436697574893e-05, "loss": 0.0051, "step": 30930 }, { "epoch": 5.52, "learning_rate": 4.7242778174037096e-05, "loss": 0.0026, "step": 30940 }, { "epoch": 5.52, "learning_rate": 4.724188659058488e-05, "loss": 0.0056, "step": 30950 }, { "epoch": 5.52, "learning_rate": 4.724099500713267e-05, "loss": 0.006, "step": 30960 }, { "epoch": 5.52, "learning_rate": 4.724010342368046e-05, "loss": 0.0067, "step": 30970 }, { "epoch": 5.52, "learning_rate": 4.723921184022825e-05, "loss": 0.0044, "step": 30980 }, { "epoch": 5.53, "learning_rate": 4.723832025677604e-05, "loss": 0.0086, "step": 30990 }, { "epoch": 5.53, "learning_rate": 4.723742867332382e-05, "loss": 0.0057, "step": 31000 }, { "epoch": 5.53, "learning_rate": 4.7236537089871614e-05, "loss": 0.0044, "step": 31010 }, { "epoch": 5.53, "learning_rate": 4.72356455064194e-05, "loss": 0.0048, "step": 31020 }, { "epoch": 5.53, "learning_rate": 4.723475392296719e-05, "loss": 0.0055, "step": 31030 }, { "epoch": 5.53, "learning_rate": 4.723386233951498e-05, "loss": 0.0066, "step": 31040 }, { "epoch": 5.54, "learning_rate": 4.723297075606277e-05, "loss": 0.0084, "step": 31050 }, { "epoch": 5.54, "learning_rate": 4.723207917261056e-05, "loss": 0.007, "step": 31060 }, { "epoch": 5.54, "learning_rate": 4.723118758915835e-05, "loss": 0.0082, "step": 31070 }, { "epoch": 5.54, "learning_rate": 4.723029600570614e-05, "loss": 0.0033, "step": 31080 }, { "epoch": 5.54, "learning_rate": 4.722940442225392e-05, "loss": 0.0077, "step": 31090 }, { "epoch": 5.55, "learning_rate": 4.7228512838801715e-05, "loss": 0.008, "step": 31100 }, { "epoch": 5.55, "learning_rate": 4.72276212553495e-05, "loss": 0.0087, "step": 31110 }, { "epoch": 5.55, "learning_rate": 4.722672967189729e-05, "loss": 0.0089, "step": 31120 }, { "epoch": 5.55, "learning_rate": 4.722583808844508e-05, "loss": 0.008, "step": 31130 }, { "epoch": 5.55, "learning_rate": 4.7224946504992866e-05, "loss": 0.004, "step": 31140 }, { "epoch": 5.55, "learning_rate": 4.722405492154066e-05, "loss": 0.0083, "step": 31150 }, { "epoch": 5.56, "learning_rate": 4.722316333808845e-05, "loss": 0.0066, "step": 31160 }, { "epoch": 5.56, "learning_rate": 4.722227175463624e-05, "loss": 0.0053, "step": 31170 }, { "epoch": 5.56, "learning_rate": 4.7221380171184024e-05, "loss": 0.005, "step": 31180 }, { "epoch": 5.56, "learning_rate": 4.7220488587731815e-05, "loss": 0.0044, "step": 31190 }, { "epoch": 5.56, "learning_rate": 4.7219597004279606e-05, "loss": 0.0031, "step": 31200 }, { "epoch": 5.57, "learning_rate": 4.721870542082739e-05, "loss": 0.0063, "step": 31210 }, { "epoch": 5.57, "learning_rate": 4.721781383737518e-05, "loss": 0.0044, "step": 31220 }, { "epoch": 5.57, "learning_rate": 4.7216922253922966e-05, "loss": 0.0074, "step": 31230 }, { "epoch": 5.57, "learning_rate": 4.721603067047076e-05, "loss": 0.0034, "step": 31240 }, { "epoch": 5.57, "learning_rate": 4.721513908701854e-05, "loss": 0.0067, "step": 31250 }, { "epoch": 5.57, "learning_rate": 4.721424750356633e-05, "loss": 0.0062, "step": 31260 }, { "epoch": 5.58, "learning_rate": 4.7213355920114124e-05, "loss": 0.0063, "step": 31270 }, { "epoch": 5.58, "learning_rate": 4.7212464336661916e-05, "loss": 0.0089, "step": 31280 }, { "epoch": 5.58, "learning_rate": 4.721157275320971e-05, "loss": 0.0052, "step": 31290 }, { "epoch": 5.58, "learning_rate": 4.721068116975749e-05, "loss": 0.0054, "step": 31300 }, { "epoch": 5.58, "learning_rate": 4.720978958630528e-05, "loss": 0.0041, "step": 31310 }, { "epoch": 5.58, "learning_rate": 4.720889800285307e-05, "loss": 0.0045, "step": 31320 }, { "epoch": 5.59, "learning_rate": 4.720800641940086e-05, "loss": 0.009, "step": 31330 }, { "epoch": 5.59, "learning_rate": 4.720711483594864e-05, "loss": 0.0035, "step": 31340 }, { "epoch": 5.59, "learning_rate": 4.7206223252496434e-05, "loss": 0.0058, "step": 31350 }, { "epoch": 5.59, "learning_rate": 4.7205331669044225e-05, "loss": 0.0027, "step": 31360 }, { "epoch": 5.59, "learning_rate": 4.720444008559201e-05, "loss": 0.0076, "step": 31370 }, { "epoch": 5.6, "learning_rate": 4.720354850213981e-05, "loss": 0.0051, "step": 31380 }, { "epoch": 5.6, "learning_rate": 4.720265691868759e-05, "loss": 0.0068, "step": 31390 }, { "epoch": 5.6, "learning_rate": 4.720176533523538e-05, "loss": 0.0066, "step": 31400 }, { "epoch": 5.6, "learning_rate": 4.720087375178317e-05, "loss": 0.0053, "step": 31410 }, { "epoch": 5.6, "learning_rate": 4.719998216833096e-05, "loss": 0.0046, "step": 31420 }, { "epoch": 5.6, "learning_rate": 4.719909058487875e-05, "loss": 0.0082, "step": 31430 }, { "epoch": 5.61, "learning_rate": 4.7198199001426534e-05, "loss": 0.0079, "step": 31440 }, { "epoch": 5.61, "learning_rate": 4.7197307417974326e-05, "loss": 0.0064, "step": 31450 }, { "epoch": 5.61, "learning_rate": 4.719641583452211e-05, "loss": 0.0065, "step": 31460 }, { "epoch": 5.61, "learning_rate": 4.71955242510699e-05, "loss": 0.0056, "step": 31470 }, { "epoch": 5.61, "learning_rate": 4.7194632667617686e-05, "loss": 0.0099, "step": 31480 }, { "epoch": 5.62, "learning_rate": 4.7193741084165484e-05, "loss": 0.0049, "step": 31490 }, { "epoch": 5.62, "learning_rate": 4.719284950071327e-05, "loss": 0.0049, "step": 31500 }, { "epoch": 5.62, "learning_rate": 4.719195791726106e-05, "loss": 0.0053, "step": 31510 }, { "epoch": 5.62, "learning_rate": 4.719106633380885e-05, "loss": 0.0072, "step": 31520 }, { "epoch": 5.62, "learning_rate": 4.7190174750356635e-05, "loss": 0.0063, "step": 31530 }, { "epoch": 5.62, "learning_rate": 4.7189283166904426e-05, "loss": 0.0037, "step": 31540 }, { "epoch": 5.63, "learning_rate": 4.718839158345221e-05, "loss": 0.0048, "step": 31550 }, { "epoch": 5.63, "learning_rate": 4.71875e-05, "loss": 0.0055, "step": 31560 }, { "epoch": 5.63, "learning_rate": 4.7186608416547786e-05, "loss": 0.0089, "step": 31570 }, { "epoch": 5.63, "learning_rate": 4.718571683309558e-05, "loss": 0.0062, "step": 31580 }, { "epoch": 5.63, "learning_rate": 4.718482524964337e-05, "loss": 0.0072, "step": 31590 }, { "epoch": 5.63, "learning_rate": 4.718393366619116e-05, "loss": 0.005, "step": 31600 }, { "epoch": 5.64, "learning_rate": 4.718304208273895e-05, "loss": 0.0042, "step": 31610 }, { "epoch": 5.64, "learning_rate": 4.7182150499286736e-05, "loss": 0.0046, "step": 31620 }, { "epoch": 5.64, "learning_rate": 4.718125891583453e-05, "loss": 0.0036, "step": 31630 }, { "epoch": 5.64, "learning_rate": 4.718036733238231e-05, "loss": 0.0067, "step": 31640 }, { "epoch": 5.64, "learning_rate": 4.71794757489301e-05, "loss": 0.006, "step": 31650 }, { "epoch": 5.65, "learning_rate": 4.7178584165477894e-05, "loss": 0.0066, "step": 31660 }, { "epoch": 5.65, "learning_rate": 4.717769258202568e-05, "loss": 0.005, "step": 31670 }, { "epoch": 5.65, "learning_rate": 4.717680099857347e-05, "loss": 0.0062, "step": 31680 }, { "epoch": 5.65, "learning_rate": 4.7175909415121254e-05, "loss": 0.0047, "step": 31690 }, { "epoch": 5.65, "learning_rate": 4.7175017831669045e-05, "loss": 0.0039, "step": 31700 }, { "epoch": 5.65, "learning_rate": 4.7174126248216836e-05, "loss": 0.0054, "step": 31710 }, { "epoch": 5.66, "learning_rate": 4.717323466476463e-05, "loss": 0.0053, "step": 31720 }, { "epoch": 5.66, "learning_rate": 4.7172432239657635e-05, "loss": 0.0085, "step": 31730 }, { "epoch": 5.66, "learning_rate": 4.717154065620542e-05, "loss": 0.0042, "step": 31740 }, { "epoch": 5.66, "learning_rate": 4.717064907275322e-05, "loss": 0.007, "step": 31750 }, { "epoch": 5.66, "learning_rate": 4.7169757489301e-05, "loss": 0.0045, "step": 31760 }, { "epoch": 5.67, "learning_rate": 4.716886590584879e-05, "loss": 0.0061, "step": 31770 }, { "epoch": 5.67, "learning_rate": 4.716797432239658e-05, "loss": 0.0084, "step": 31780 }, { "epoch": 5.67, "learning_rate": 4.716708273894437e-05, "loss": 0.0038, "step": 31790 }, { "epoch": 5.67, "learning_rate": 4.7166191155492153e-05, "loss": 0.0075, "step": 31800 }, { "epoch": 5.67, "learning_rate": 4.7165299572039945e-05, "loss": 0.0052, "step": 31810 }, { "epoch": 5.67, "learning_rate": 4.7164407988587736e-05, "loss": 0.0082, "step": 31820 }, { "epoch": 5.68, "learning_rate": 4.716351640513552e-05, "loss": 0.0039, "step": 31830 }, { "epoch": 5.68, "learning_rate": 4.716262482168331e-05, "loss": 0.0069, "step": 31840 }, { "epoch": 5.68, "learning_rate": 4.7161733238231096e-05, "loss": 0.0063, "step": 31850 }, { "epoch": 5.68, "learning_rate": 4.7160841654778894e-05, "loss": 0.0072, "step": 31860 }, { "epoch": 5.68, "learning_rate": 4.715995007132668e-05, "loss": 0.0045, "step": 31870 }, { "epoch": 5.68, "learning_rate": 4.715905848787447e-05, "loss": 0.0059, "step": 31880 }, { "epoch": 5.69, "learning_rate": 4.7158166904422254e-05, "loss": 0.0038, "step": 31890 }, { "epoch": 5.69, "learning_rate": 4.7157275320970045e-05, "loss": 0.009, "step": 31900 }, { "epoch": 5.69, "learning_rate": 4.7156383737517836e-05, "loss": 0.0074, "step": 31910 }, { "epoch": 5.69, "learning_rate": 4.715549215406562e-05, "loss": 0.0048, "step": 31920 }, { "epoch": 5.69, "learning_rate": 4.715460057061341e-05, "loss": 0.006, "step": 31930 }, { "epoch": 5.7, "learning_rate": 4.7153708987161196e-05, "loss": 0.006, "step": 31940 }, { "epoch": 5.7, "learning_rate": 4.715281740370899e-05, "loss": 0.0069, "step": 31950 }, { "epoch": 5.7, "learning_rate": 4.715192582025678e-05, "loss": 0.0076, "step": 31960 }, { "epoch": 5.7, "learning_rate": 4.715103423680457e-05, "loss": 0.0042, "step": 31970 }, { "epoch": 5.7, "learning_rate": 4.715014265335236e-05, "loss": 0.003, "step": 31980 }, { "epoch": 5.7, "learning_rate": 4.7149251069900146e-05, "loss": 0.0052, "step": 31990 }, { "epoch": 5.71, "learning_rate": 4.714835948644794e-05, "loss": 0.0062, "step": 32000 }, { "epoch": 5.71, "learning_rate": 4.714746790299572e-05, "loss": 0.0051, "step": 32010 }, { "epoch": 5.71, "learning_rate": 4.714657631954351e-05, "loss": 0.0058, "step": 32020 }, { "epoch": 5.71, "learning_rate": 4.71456847360913e-05, "loss": 0.0064, "step": 32030 }, { "epoch": 5.71, "learning_rate": 4.714479315263909e-05, "loss": 0.0075, "step": 32040 }, { "epoch": 5.72, "learning_rate": 4.714390156918688e-05, "loss": 0.0052, "step": 32050 }, { "epoch": 5.72, "learning_rate": 4.7143009985734664e-05, "loss": 0.0065, "step": 32060 }, { "epoch": 5.72, "learning_rate": 4.7142118402282455e-05, "loss": 0.0062, "step": 32070 }, { "epoch": 5.72, "learning_rate": 4.7141226818830246e-05, "loss": 0.0049, "step": 32080 }, { "epoch": 5.72, "learning_rate": 4.714033523537804e-05, "loss": 0.0059, "step": 32090 }, { "epoch": 5.72, "learning_rate": 4.713944365192582e-05, "loss": 0.0059, "step": 32100 }, { "epoch": 5.73, "learning_rate": 4.713855206847361e-05, "loss": 0.0061, "step": 32110 }, { "epoch": 5.73, "learning_rate": 4.71376604850214e-05, "loss": 0.0059, "step": 32120 }, { "epoch": 5.73, "learning_rate": 4.713676890156919e-05, "loss": 0.0061, "step": 32130 }, { "epoch": 5.73, "learning_rate": 4.713587731811698e-05, "loss": 0.0048, "step": 32140 }, { "epoch": 5.73, "learning_rate": 4.7134985734664764e-05, "loss": 0.009, "step": 32150 }, { "epoch": 5.73, "learning_rate": 4.7134094151212556e-05, "loss": 0.0037, "step": 32160 }, { "epoch": 5.74, "learning_rate": 4.713320256776034e-05, "loss": 0.0059, "step": 32170 }, { "epoch": 5.74, "learning_rate": 4.713231098430813e-05, "loss": 0.0061, "step": 32180 }, { "epoch": 5.74, "learning_rate": 4.713141940085592e-05, "loss": 0.0071, "step": 32190 }, { "epoch": 5.74, "learning_rate": 4.7130527817403714e-05, "loss": 0.0077, "step": 32200 }, { "epoch": 5.74, "learning_rate": 4.7129636233951505e-05, "loss": 0.0053, "step": 32210 }, { "epoch": 5.75, "learning_rate": 4.712874465049929e-05, "loss": 0.0076, "step": 32220 }, { "epoch": 5.75, "learning_rate": 4.712785306704708e-05, "loss": 0.0082, "step": 32230 }, { "epoch": 5.75, "learning_rate": 4.7126961483594865e-05, "loss": 0.0062, "step": 32240 }, { "epoch": 5.75, "learning_rate": 4.7126069900142656e-05, "loss": 0.0041, "step": 32250 }, { "epoch": 5.75, "learning_rate": 4.712517831669044e-05, "loss": 0.0064, "step": 32260 }, { "epoch": 5.75, "learning_rate": 4.712428673323823e-05, "loss": 0.0104, "step": 32270 }, { "epoch": 5.76, "learning_rate": 4.712339514978602e-05, "loss": 0.0062, "step": 32280 }, { "epoch": 5.76, "learning_rate": 4.712250356633381e-05, "loss": 0.0057, "step": 32290 }, { "epoch": 5.76, "learning_rate": 4.7121611982881606e-05, "loss": 0.0025, "step": 32300 }, { "epoch": 5.76, "learning_rate": 4.712072039942939e-05, "loss": 0.0067, "step": 32310 }, { "epoch": 5.76, "learning_rate": 4.711982881597718e-05, "loss": 0.0057, "step": 32320 }, { "epoch": 5.76, "learning_rate": 4.7118937232524966e-05, "loss": 0.0074, "step": 32330 }, { "epoch": 5.77, "learning_rate": 4.711804564907276e-05, "loss": 0.0078, "step": 32340 }, { "epoch": 5.77, "learning_rate": 4.711715406562054e-05, "loss": 0.0063, "step": 32350 }, { "epoch": 5.77, "learning_rate": 4.711626248216833e-05, "loss": 0.0114, "step": 32360 }, { "epoch": 5.77, "learning_rate": 4.7115370898716124e-05, "loss": 0.0068, "step": 32370 }, { "epoch": 5.77, "learning_rate": 4.711447931526391e-05, "loss": 0.0079, "step": 32380 }, { "epoch": 5.78, "learning_rate": 4.71135877318117e-05, "loss": 0.0093, "step": 32390 }, { "epoch": 5.78, "learning_rate": 4.7112696148359484e-05, "loss": 0.005, "step": 32400 }, { "epoch": 5.78, "learning_rate": 4.711180456490728e-05, "loss": 0.0046, "step": 32410 }, { "epoch": 5.78, "learning_rate": 4.7110912981455066e-05, "loss": 0.0085, "step": 32420 }, { "epoch": 5.78, "learning_rate": 4.711002139800286e-05, "loss": 0.0087, "step": 32430 }, { "epoch": 5.78, "learning_rate": 4.710912981455065e-05, "loss": 0.0038, "step": 32440 }, { "epoch": 5.79, "learning_rate": 4.710823823109843e-05, "loss": 0.005, "step": 32450 }, { "epoch": 5.79, "learning_rate": 4.7107346647646224e-05, "loss": 0.0038, "step": 32460 }, { "epoch": 5.79, "learning_rate": 4.710645506419401e-05, "loss": 0.0054, "step": 32470 }, { "epoch": 5.79, "learning_rate": 4.71055634807418e-05, "loss": 0.0046, "step": 32480 }, { "epoch": 5.79, "learning_rate": 4.7104671897289584e-05, "loss": 0.0091, "step": 32490 }, { "epoch": 5.8, "learning_rate": 4.7103780313837376e-05, "loss": 0.0066, "step": 32500 }, { "epoch": 5.8, "learning_rate": 4.710288873038517e-05, "loss": 0.0044, "step": 32510 }, { "epoch": 5.8, "learning_rate": 4.710199714693296e-05, "loss": 0.0057, "step": 32520 }, { "epoch": 5.8, "learning_rate": 4.710110556348075e-05, "loss": 0.008, "step": 32530 }, { "epoch": 5.8, "learning_rate": 4.7100213980028534e-05, "loss": 0.0054, "step": 32540 }, { "epoch": 5.8, "learning_rate": 4.7099322396576325e-05, "loss": 0.0071, "step": 32550 }, { "epoch": 5.81, "learning_rate": 4.709843081312411e-05, "loss": 0.0042, "step": 32560 }, { "epoch": 5.81, "learning_rate": 4.70975392296719e-05, "loss": 0.0037, "step": 32570 }, { "epoch": 5.81, "learning_rate": 4.7096647646219685e-05, "loss": 0.0034, "step": 32580 }, { "epoch": 5.81, "learning_rate": 4.7095756062767476e-05, "loss": 0.0051, "step": 32590 }, { "epoch": 5.81, "learning_rate": 4.709486447931527e-05, "loss": 0.0086, "step": 32600 }, { "epoch": 5.81, "learning_rate": 4.709397289586305e-05, "loss": 0.0072, "step": 32610 }, { "epoch": 5.82, "learning_rate": 4.709308131241084e-05, "loss": 0.0065, "step": 32620 }, { "epoch": 5.82, "learning_rate": 4.709218972895863e-05, "loss": 0.0068, "step": 32630 }, { "epoch": 5.82, "learning_rate": 4.7091298145506425e-05, "loss": 0.0095, "step": 32640 }, { "epoch": 5.82, "learning_rate": 4.709040656205421e-05, "loss": 0.0056, "step": 32650 }, { "epoch": 5.82, "learning_rate": 4.7089514978602e-05, "loss": 0.0101, "step": 32660 }, { "epoch": 5.83, "learning_rate": 4.708862339514979e-05, "loss": 0.0063, "step": 32670 }, { "epoch": 5.83, "learning_rate": 4.708773181169758e-05, "loss": 0.0039, "step": 32680 }, { "epoch": 5.83, "learning_rate": 4.708684022824537e-05, "loss": 0.0067, "step": 32690 }, { "epoch": 5.83, "learning_rate": 4.708594864479315e-05, "loss": 0.0058, "step": 32700 }, { "epoch": 5.83, "learning_rate": 4.7085057061340943e-05, "loss": 0.0104, "step": 32710 }, { "epoch": 5.83, "learning_rate": 4.708416547788873e-05, "loss": 0.0066, "step": 32720 }, { "epoch": 5.84, "learning_rate": 4.708327389443652e-05, "loss": 0.0098, "step": 32730 }, { "epoch": 5.84, "learning_rate": 4.708238231098431e-05, "loss": 0.0072, "step": 32740 }, { "epoch": 5.84, "learning_rate": 4.70814907275321e-05, "loss": 0.0035, "step": 32750 }, { "epoch": 5.84, "learning_rate": 4.708059914407989e-05, "loss": 0.0092, "step": 32760 }, { "epoch": 5.84, "learning_rate": 4.707970756062768e-05, "loss": 0.0059, "step": 32770 }, { "epoch": 5.85, "learning_rate": 4.707881597717547e-05, "loss": 0.0061, "step": 32780 }, { "epoch": 5.85, "learning_rate": 4.707792439372325e-05, "loss": 0.007, "step": 32790 }, { "epoch": 5.85, "learning_rate": 4.7077032810271044e-05, "loss": 0.0081, "step": 32800 }, { "epoch": 5.85, "learning_rate": 4.707614122681883e-05, "loss": 0.0054, "step": 32810 }, { "epoch": 5.85, "learning_rate": 4.707524964336662e-05, "loss": 0.0053, "step": 32820 }, { "epoch": 5.85, "learning_rate": 4.707435805991441e-05, "loss": 0.0049, "step": 32830 }, { "epoch": 5.86, "learning_rate": 4.7073466476462195e-05, "loss": 0.0059, "step": 32840 }, { "epoch": 5.86, "learning_rate": 4.7072574893009987e-05, "loss": 0.0051, "step": 32850 }, { "epoch": 5.86, "learning_rate": 4.707168330955778e-05, "loss": 0.0064, "step": 32860 }, { "epoch": 5.86, "learning_rate": 4.707079172610557e-05, "loss": 0.0058, "step": 32870 }, { "epoch": 5.86, "learning_rate": 4.7069900142653353e-05, "loss": 0.0046, "step": 32880 }, { "epoch": 5.86, "learning_rate": 4.7069008559201145e-05, "loss": 0.0063, "step": 32890 }, { "epoch": 5.87, "learning_rate": 4.7068116975748936e-05, "loss": 0.0049, "step": 32900 }, { "epoch": 5.87, "learning_rate": 4.706722539229672e-05, "loss": 0.0058, "step": 32910 }, { "epoch": 5.87, "learning_rate": 4.706633380884451e-05, "loss": 0.0052, "step": 32920 }, { "epoch": 5.87, "learning_rate": 4.7065442225392296e-05, "loss": 0.0061, "step": 32930 }, { "epoch": 5.87, "learning_rate": 4.706455064194009e-05, "loss": 0.0063, "step": 32940 }, { "epoch": 5.88, "learning_rate": 4.706365905848787e-05, "loss": 0.007, "step": 32950 }, { "epoch": 5.88, "learning_rate": 4.706276747503566e-05, "loss": 0.0065, "step": 32960 }, { "epoch": 5.88, "learning_rate": 4.7061875891583454e-05, "loss": 0.0071, "step": 32970 }, { "epoch": 5.88, "learning_rate": 4.7060984308131245e-05, "loss": 0.0054, "step": 32980 }, { "epoch": 5.88, "learning_rate": 4.7060092724679036e-05, "loss": 0.0039, "step": 32990 }, { "epoch": 5.88, "learning_rate": 4.705920114122682e-05, "loss": 0.0078, "step": 33000 }, { "epoch": 5.89, "learning_rate": 4.705830955777461e-05, "loss": 0.0056, "step": 33010 }, { "epoch": 5.89, "learning_rate": 4.7057417974322396e-05, "loss": 0.0061, "step": 33020 }, { "epoch": 5.89, "learning_rate": 4.705652639087019e-05, "loss": 0.0079, "step": 33030 }, { "epoch": 5.89, "learning_rate": 4.705563480741797e-05, "loss": 0.0096, "step": 33040 }, { "epoch": 5.89, "learning_rate": 4.705474322396576e-05, "loss": 0.0059, "step": 33050 }, { "epoch": 5.9, "learning_rate": 4.7053851640513555e-05, "loss": 0.0057, "step": 33060 }, { "epoch": 5.9, "learning_rate": 4.705296005706134e-05, "loss": 0.009, "step": 33070 }, { "epoch": 5.9, "learning_rate": 4.705206847360914e-05, "loss": 0.0058, "step": 33080 }, { "epoch": 5.9, "learning_rate": 4.705117689015692e-05, "loss": 0.0033, "step": 33090 }, { "epoch": 5.9, "learning_rate": 4.705028530670471e-05, "loss": 0.005, "step": 33100 }, { "epoch": 5.9, "learning_rate": 4.70493937232525e-05, "loss": 0.0059, "step": 33110 }, { "epoch": 5.91, "learning_rate": 4.704850213980029e-05, "loss": 0.009, "step": 33120 }, { "epoch": 5.91, "learning_rate": 4.704761055634808e-05, "loss": 0.0043, "step": 33130 }, { "epoch": 5.91, "learning_rate": 4.7046718972895864e-05, "loss": 0.004, "step": 33140 }, { "epoch": 5.91, "learning_rate": 4.7045827389443655e-05, "loss": 0.0048, "step": 33150 }, { "epoch": 5.91, "learning_rate": 4.704493580599144e-05, "loss": 0.0069, "step": 33160 }, { "epoch": 5.91, "learning_rate": 4.704404422253923e-05, "loss": 0.0041, "step": 33170 }, { "epoch": 5.92, "learning_rate": 4.7043152639087015e-05, "loss": 0.0055, "step": 33180 }, { "epoch": 5.92, "learning_rate": 4.704226105563481e-05, "loss": 0.0037, "step": 33190 }, { "epoch": 5.92, "learning_rate": 4.70413694721826e-05, "loss": 0.0056, "step": 33200 }, { "epoch": 5.92, "learning_rate": 4.704047788873039e-05, "loss": 0.005, "step": 33210 }, { "epoch": 5.92, "learning_rate": 4.703958630527818e-05, "loss": 0.0062, "step": 33220 }, { "epoch": 5.93, "learning_rate": 4.7038694721825964e-05, "loss": 0.0053, "step": 33230 }, { "epoch": 5.93, "learning_rate": 4.7037803138373756e-05, "loss": 0.004, "step": 33240 }, { "epoch": 5.93, "learning_rate": 4.703691155492154e-05, "loss": 0.0051, "step": 33250 }, { "epoch": 5.93, "learning_rate": 4.703601997146933e-05, "loss": 0.005, "step": 33260 }, { "epoch": 5.93, "learning_rate": 4.7035128388017116e-05, "loss": 0.0063, "step": 33270 }, { "epoch": 5.93, "learning_rate": 4.703423680456491e-05, "loss": 0.0073, "step": 33280 }, { "epoch": 5.94, "learning_rate": 4.70333452211127e-05, "loss": 0.0044, "step": 33290 }, { "epoch": 5.94, "learning_rate": 4.703245363766049e-05, "loss": 0.007, "step": 33300 }, { "epoch": 5.94, "learning_rate": 4.703156205420828e-05, "loss": 0.0063, "step": 33310 }, { "epoch": 5.94, "learning_rate": 4.7030670470756065e-05, "loss": 0.0076, "step": 33320 }, { "epoch": 5.94, "learning_rate": 4.7029778887303856e-05, "loss": 0.0053, "step": 33330 }, { "epoch": 5.95, "learning_rate": 4.702888730385164e-05, "loss": 0.0081, "step": 33340 }, { "epoch": 5.95, "learning_rate": 4.702799572039943e-05, "loss": 0.0064, "step": 33350 }, { "epoch": 5.95, "learning_rate": 4.702710413694722e-05, "loss": 0.0075, "step": 33360 }, { "epoch": 5.95, "learning_rate": 4.702621255349501e-05, "loss": 0.0063, "step": 33370 }, { "epoch": 5.95, "learning_rate": 4.70253209700428e-05, "loss": 0.0037, "step": 33380 }, { "epoch": 5.95, "learning_rate": 4.702442938659058e-05, "loss": 0.0075, "step": 33390 }, { "epoch": 5.96, "learning_rate": 4.7023537803138374e-05, "loss": 0.0058, "step": 33400 }, { "epoch": 5.96, "learning_rate": 4.7022646219686166e-05, "loss": 0.0074, "step": 33410 }, { "epoch": 5.96, "learning_rate": 4.702175463623396e-05, "loss": 0.0062, "step": 33420 }, { "epoch": 5.96, "learning_rate": 4.702086305278174e-05, "loss": 0.0067, "step": 33430 }, { "epoch": 5.96, "learning_rate": 4.701997146932953e-05, "loss": 0.0035, "step": 33440 }, { "epoch": 5.96, "learning_rate": 4.7019079885877324e-05, "loss": 0.0079, "step": 33450 }, { "epoch": 5.97, "learning_rate": 4.701818830242511e-05, "loss": 0.0032, "step": 33460 }, { "epoch": 5.97, "learning_rate": 4.70172967189729e-05, "loss": 0.0045, "step": 33470 }, { "epoch": 5.97, "learning_rate": 4.7016405135520684e-05, "loss": 0.0086, "step": 33480 }, { "epoch": 5.97, "learning_rate": 4.7015513552068475e-05, "loss": 0.0094, "step": 33490 }, { "epoch": 5.97, "learning_rate": 4.701462196861626e-05, "loss": 0.0061, "step": 33500 }, { "epoch": 5.98, "learning_rate": 4.701373038516405e-05, "loss": 0.0071, "step": 33510 }, { "epoch": 5.98, "learning_rate": 4.701283880171184e-05, "loss": 0.0057, "step": 33520 }, { "epoch": 5.98, "learning_rate": 4.701194721825963e-05, "loss": 0.0067, "step": 33530 }, { "epoch": 5.98, "learning_rate": 4.7011055634807424e-05, "loss": 0.0056, "step": 33540 }, { "epoch": 5.98, "learning_rate": 4.701016405135521e-05, "loss": 0.0051, "step": 33550 }, { "epoch": 5.98, "learning_rate": 4.7009272467903e-05, "loss": 0.0074, "step": 33560 }, { "epoch": 5.99, "learning_rate": 4.7008380884450784e-05, "loss": 0.007, "step": 33570 }, { "epoch": 5.99, "learning_rate": 4.7007489300998575e-05, "loss": 0.0113, "step": 33580 }, { "epoch": 5.99, "learning_rate": 4.700659771754637e-05, "loss": 0.0074, "step": 33590 }, { "epoch": 5.99, "learning_rate": 4.700570613409415e-05, "loss": 0.0072, "step": 33600 }, { "epoch": 5.99, "learning_rate": 4.700481455064194e-05, "loss": 0.0038, "step": 33610 }, { "epoch": 6.0, "learning_rate": 4.700392296718973e-05, "loss": 0.0092, "step": 33620 }, { "epoch": 6.0, "learning_rate": 4.7003031383737525e-05, "loss": 0.0056, "step": 33630 }, { "epoch": 6.0, "learning_rate": 4.700213980028531e-05, "loss": 0.0045, "step": 33640 }, { "epoch": 6.0, "eval_loss": 0.013290228322148323, "eval_runtime": 197.0062, "eval_samples_per_second": 23.547, "eval_steps_per_second": 2.944, "step": 33648 }, { "epoch": 6.0, "learning_rate": 4.70012482168331e-05, "loss": 0.0063, "step": 33650 }, { "epoch": 6.0, "learning_rate": 4.7000356633380885e-05, "loss": 0.0057, "step": 33660 }, { "epoch": 6.0, "learning_rate": 4.6999465049928676e-05, "loss": 0.0058, "step": 33670 }, { "epoch": 6.01, "learning_rate": 4.699857346647647e-05, "loss": 0.0042, "step": 33680 }, { "epoch": 6.01, "learning_rate": 4.699768188302425e-05, "loss": 0.0037, "step": 33690 }, { "epoch": 6.01, "learning_rate": 4.699679029957204e-05, "loss": 0.0072, "step": 33700 }, { "epoch": 6.01, "learning_rate": 4.699589871611983e-05, "loss": 0.0061, "step": 33710 }, { "epoch": 6.01, "learning_rate": 4.699500713266762e-05, "loss": 0.0075, "step": 33720 }, { "epoch": 6.01, "learning_rate": 4.69941155492154e-05, "loss": 0.0041, "step": 33730 }, { "epoch": 6.02, "learning_rate": 4.69932239657632e-05, "loss": 0.0045, "step": 33740 }, { "epoch": 6.02, "learning_rate": 4.6992332382310985e-05, "loss": 0.0052, "step": 33750 }, { "epoch": 6.02, "learning_rate": 4.6991440798858777e-05, "loss": 0.0059, "step": 33760 }, { "epoch": 6.02, "learning_rate": 4.699054921540657e-05, "loss": 0.0047, "step": 33770 }, { "epoch": 6.02, "learning_rate": 4.698965763195435e-05, "loss": 0.0054, "step": 33780 }, { "epoch": 6.03, "learning_rate": 4.6988766048502143e-05, "loss": 0.0033, "step": 33790 }, { "epoch": 6.03, "learning_rate": 4.698787446504993e-05, "loss": 0.0055, "step": 33800 }, { "epoch": 6.03, "learning_rate": 4.698698288159772e-05, "loss": 0.0056, "step": 33810 }, { "epoch": 6.03, "learning_rate": 4.698609129814551e-05, "loss": 0.008, "step": 33820 }, { "epoch": 6.03, "learning_rate": 4.6985199714693295e-05, "loss": 0.0055, "step": 33830 }, { "epoch": 6.03, "learning_rate": 4.6984308131241086e-05, "loss": 0.0056, "step": 33840 }, { "epoch": 6.04, "learning_rate": 4.698341654778888e-05, "loss": 0.0055, "step": 33850 }, { "epoch": 6.04, "learning_rate": 4.698252496433667e-05, "loss": 0.0057, "step": 33860 }, { "epoch": 6.04, "learning_rate": 4.698163338088445e-05, "loss": 0.0032, "step": 33870 }, { "epoch": 6.04, "learning_rate": 4.6980741797432244e-05, "loss": 0.0058, "step": 33880 }, { "epoch": 6.04, "learning_rate": 4.697985021398003e-05, "loss": 0.0045, "step": 33890 }, { "epoch": 6.04, "learning_rate": 4.697895863052782e-05, "loss": 0.0043, "step": 33900 }, { "epoch": 6.05, "learning_rate": 4.697806704707561e-05, "loss": 0.0057, "step": 33910 }, { "epoch": 6.05, "learning_rate": 4.6977175463623395e-05, "loss": 0.0046, "step": 33920 }, { "epoch": 6.05, "learning_rate": 4.6976283880171187e-05, "loss": 0.0072, "step": 33930 }, { "epoch": 6.05, "learning_rate": 4.697539229671897e-05, "loss": 0.0041, "step": 33940 }, { "epoch": 6.05, "learning_rate": 4.697450071326676e-05, "loss": 0.0039, "step": 33950 }, { "epoch": 6.06, "learning_rate": 4.697360912981455e-05, "loss": 0.0046, "step": 33960 }, { "epoch": 6.06, "learning_rate": 4.6972717546362345e-05, "loss": 0.0054, "step": 33970 }, { "epoch": 6.06, "learning_rate": 4.697182596291013e-05, "loss": 0.0055, "step": 33980 }, { "epoch": 6.06, "learning_rate": 4.697093437945792e-05, "loss": 0.0043, "step": 33990 }, { "epoch": 6.06, "learning_rate": 4.697004279600571e-05, "loss": 0.0039, "step": 34000 }, { "epoch": 6.06, "learning_rate": 4.6969151212553496e-05, "loss": 0.0085, "step": 34010 }, { "epoch": 6.07, "learning_rate": 4.696825962910129e-05, "loss": 0.0075, "step": 34020 }, { "epoch": 6.07, "learning_rate": 4.696736804564907e-05, "loss": 0.0076, "step": 34030 }, { "epoch": 6.07, "learning_rate": 4.696647646219686e-05, "loss": 0.008, "step": 34040 }, { "epoch": 6.07, "learning_rate": 4.6965584878744654e-05, "loss": 0.0048, "step": 34050 }, { "epoch": 6.07, "learning_rate": 4.696469329529244e-05, "loss": 0.0058, "step": 34060 }, { "epoch": 6.08, "learning_rate": 4.696380171184023e-05, "loss": 0.0059, "step": 34070 }, { "epoch": 6.08, "learning_rate": 4.696291012838802e-05, "loss": 0.0083, "step": 34080 }, { "epoch": 6.08, "learning_rate": 4.696201854493581e-05, "loss": 0.005, "step": 34090 }, { "epoch": 6.08, "learning_rate": 4.6961126961483596e-05, "loss": 0.0063, "step": 34100 }, { "epoch": 6.08, "learning_rate": 4.696023537803139e-05, "loss": 0.0055, "step": 34110 }, { "epoch": 6.08, "learning_rate": 4.695934379457917e-05, "loss": 0.007, "step": 34120 }, { "epoch": 6.09, "learning_rate": 4.695845221112696e-05, "loss": 0.0064, "step": 34130 }, { "epoch": 6.09, "learning_rate": 4.6957560627674754e-05, "loss": 0.0053, "step": 34140 }, { "epoch": 6.09, "learning_rate": 4.695666904422254e-05, "loss": 0.0052, "step": 34150 }, { "epoch": 6.09, "learning_rate": 4.695577746077033e-05, "loss": 0.0039, "step": 34160 }, { "epoch": 6.09, "learning_rate": 4.6954885877318115e-05, "loss": 0.0057, "step": 34170 }, { "epoch": 6.09, "learning_rate": 4.695399429386591e-05, "loss": 0.0067, "step": 34180 }, { "epoch": 6.1, "learning_rate": 4.69531027104137e-05, "loss": 0.0047, "step": 34190 }, { "epoch": 6.1, "learning_rate": 4.695221112696149e-05, "loss": 0.007, "step": 34200 }, { "epoch": 6.1, "learning_rate": 4.695131954350927e-05, "loss": 0.0069, "step": 34210 }, { "epoch": 6.1, "learning_rate": 4.6950427960057064e-05, "loss": 0.007, "step": 34220 }, { "epoch": 6.1, "learning_rate": 4.6949536376604855e-05, "loss": 0.0063, "step": 34230 }, { "epoch": 6.11, "learning_rate": 4.694864479315264e-05, "loss": 0.0031, "step": 34240 }, { "epoch": 6.11, "learning_rate": 4.694775320970043e-05, "loss": 0.0052, "step": 34250 }, { "epoch": 6.11, "learning_rate": 4.6946861626248215e-05, "loss": 0.0042, "step": 34260 }, { "epoch": 6.11, "learning_rate": 4.6945970042796006e-05, "loss": 0.0052, "step": 34270 }, { "epoch": 6.11, "learning_rate": 4.69450784593438e-05, "loss": 0.005, "step": 34280 }, { "epoch": 6.11, "learning_rate": 4.694418687589159e-05, "loss": 0.0057, "step": 34290 }, { "epoch": 6.12, "learning_rate": 4.694329529243937e-05, "loss": 0.0037, "step": 34300 }, { "epoch": 6.12, "learning_rate": 4.6942403708987164e-05, "loss": 0.0055, "step": 34310 }, { "epoch": 6.12, "learning_rate": 4.6941512125534956e-05, "loss": 0.0043, "step": 34320 }, { "epoch": 6.12, "learning_rate": 4.694062054208274e-05, "loss": 0.0083, "step": 34330 }, { "epoch": 6.12, "learning_rate": 4.693972895863053e-05, "loss": 0.0058, "step": 34340 }, { "epoch": 6.13, "learning_rate": 4.6938837375178316e-05, "loss": 0.0064, "step": 34350 }, { "epoch": 6.13, "learning_rate": 4.693794579172611e-05, "loss": 0.0059, "step": 34360 }, { "epoch": 6.13, "learning_rate": 4.69370542082739e-05, "loss": 0.0045, "step": 34370 }, { "epoch": 6.13, "learning_rate": 4.693616262482168e-05, "loss": 0.006, "step": 34380 }, { "epoch": 6.13, "learning_rate": 4.6935271041369474e-05, "loss": 0.0034, "step": 34390 }, { "epoch": 6.13, "learning_rate": 4.6934379457917265e-05, "loss": 0.0032, "step": 34400 }, { "epoch": 6.14, "learning_rate": 4.6933487874465056e-05, "loss": 0.0063, "step": 34410 }, { "epoch": 6.14, "learning_rate": 4.693259629101284e-05, "loss": 0.0033, "step": 34420 }, { "epoch": 6.14, "learning_rate": 4.693170470756063e-05, "loss": 0.0028, "step": 34430 }, { "epoch": 6.14, "learning_rate": 4.6930813124108416e-05, "loss": 0.0059, "step": 34440 }, { "epoch": 6.14, "learning_rate": 4.692992154065621e-05, "loss": 0.0061, "step": 34450 }, { "epoch": 6.14, "learning_rate": 4.6929029957204e-05, "loss": 0.0052, "step": 34460 }, { "epoch": 6.15, "learning_rate": 4.692813837375178e-05, "loss": 0.0044, "step": 34470 }, { "epoch": 6.15, "learning_rate": 4.6927246790299574e-05, "loss": 0.003, "step": 34480 }, { "epoch": 6.15, "learning_rate": 4.692635520684736e-05, "loss": 0.0055, "step": 34490 }, { "epoch": 6.15, "learning_rate": 4.692546362339515e-05, "loss": 0.0068, "step": 34500 }, { "epoch": 6.15, "learning_rate": 4.692457203994294e-05, "loss": 0.0052, "step": 34510 }, { "epoch": 6.16, "learning_rate": 4.692368045649073e-05, "loss": 0.005, "step": 34520 }, { "epoch": 6.16, "learning_rate": 4.692278887303852e-05, "loss": 0.0028, "step": 34530 }, { "epoch": 6.16, "learning_rate": 4.692189728958631e-05, "loss": 0.0042, "step": 34540 }, { "epoch": 6.16, "learning_rate": 4.69210057061341e-05, "loss": 0.0046, "step": 34550 }, { "epoch": 6.16, "learning_rate": 4.6920114122681884e-05, "loss": 0.0067, "step": 34560 }, { "epoch": 6.16, "learning_rate": 4.6919222539229675e-05, "loss": 0.004, "step": 34570 }, { "epoch": 6.17, "learning_rate": 4.691833095577746e-05, "loss": 0.0046, "step": 34580 }, { "epoch": 6.17, "learning_rate": 4.691743937232525e-05, "loss": 0.0038, "step": 34590 }, { "epoch": 6.17, "learning_rate": 4.691654778887304e-05, "loss": 0.0051, "step": 34600 }, { "epoch": 6.17, "learning_rate": 4.6915656205420826e-05, "loss": 0.004, "step": 34610 }, { "epoch": 6.17, "learning_rate": 4.6914764621968624e-05, "loss": 0.0073, "step": 34620 }, { "epoch": 6.18, "learning_rate": 4.691387303851641e-05, "loss": 0.0047, "step": 34630 }, { "epoch": 6.18, "learning_rate": 4.69129814550642e-05, "loss": 0.004, "step": 34640 }, { "epoch": 6.18, "learning_rate": 4.6912089871611984e-05, "loss": 0.0063, "step": 34650 }, { "epoch": 6.18, "learning_rate": 4.6911198288159775e-05, "loss": 0.0055, "step": 34660 }, { "epoch": 6.18, "learning_rate": 4.691030670470756e-05, "loss": 0.0052, "step": 34670 }, { "epoch": 6.18, "learning_rate": 4.690941512125535e-05, "loss": 0.0062, "step": 34680 }, { "epoch": 6.19, "learning_rate": 4.690852353780314e-05, "loss": 0.0071, "step": 34690 }, { "epoch": 6.19, "learning_rate": 4.690763195435093e-05, "loss": 0.0079, "step": 34700 }, { "epoch": 6.19, "learning_rate": 4.690674037089872e-05, "loss": 0.0056, "step": 34710 }, { "epoch": 6.19, "learning_rate": 4.69058487874465e-05, "loss": 0.007, "step": 34720 }, { "epoch": 6.19, "learning_rate": 4.69049572039943e-05, "loss": 0.0071, "step": 34730 }, { "epoch": 6.19, "learning_rate": 4.6904065620542085e-05, "loss": 0.0075, "step": 34740 }, { "epoch": 6.2, "learning_rate": 4.6903174037089876e-05, "loss": 0.0043, "step": 34750 }, { "epoch": 6.2, "learning_rate": 4.690228245363766e-05, "loss": 0.0057, "step": 34760 }, { "epoch": 6.2, "learning_rate": 4.690139087018545e-05, "loss": 0.0057, "step": 34770 }, { "epoch": 6.2, "learning_rate": 4.690049928673324e-05, "loss": 0.0064, "step": 34780 }, { "epoch": 6.2, "learning_rate": 4.689960770328103e-05, "loss": 0.006, "step": 34790 }, { "epoch": 6.21, "learning_rate": 4.689871611982882e-05, "loss": 0.0059, "step": 34800 }, { "epoch": 6.21, "learning_rate": 4.68978245363766e-05, "loss": 0.0066, "step": 34810 }, { "epoch": 6.21, "learning_rate": 4.6896932952924394e-05, "loss": 0.0058, "step": 34820 }, { "epoch": 6.21, "learning_rate": 4.6896041369472185e-05, "loss": 0.0037, "step": 34830 }, { "epoch": 6.21, "learning_rate": 4.6895149786019977e-05, "loss": 0.0056, "step": 34840 }, { "epoch": 6.21, "learning_rate": 4.689425820256777e-05, "loss": 0.0049, "step": 34850 }, { "epoch": 6.22, "learning_rate": 4.689336661911555e-05, "loss": 0.0046, "step": 34860 }, { "epoch": 6.22, "learning_rate": 4.6892475035663343e-05, "loss": 0.0044, "step": 34870 }, { "epoch": 6.22, "learning_rate": 4.689158345221113e-05, "loss": 0.0077, "step": 34880 }, { "epoch": 6.22, "learning_rate": 4.689069186875892e-05, "loss": 0.0065, "step": 34890 }, { "epoch": 6.22, "learning_rate": 4.6889800285306703e-05, "loss": 0.006, "step": 34900 }, { "epoch": 6.23, "learning_rate": 4.6888908701854495e-05, "loss": 0.0049, "step": 34910 }, { "epoch": 6.23, "learning_rate": 4.6888017118402286e-05, "loss": 0.0039, "step": 34920 }, { "epoch": 6.23, "learning_rate": 4.688712553495007e-05, "loss": 0.0059, "step": 34930 }, { "epoch": 6.23, "learning_rate": 4.688623395149786e-05, "loss": 0.0037, "step": 34940 }, { "epoch": 6.23, "learning_rate": 4.688534236804565e-05, "loss": 0.0058, "step": 34950 }, { "epoch": 6.23, "learning_rate": 4.6884450784593444e-05, "loss": 0.0063, "step": 34960 }, { "epoch": 6.24, "learning_rate": 4.688355920114123e-05, "loss": 0.0062, "step": 34970 }, { "epoch": 6.24, "learning_rate": 4.688266761768902e-05, "loss": 0.0068, "step": 34980 }, { "epoch": 6.24, "learning_rate": 4.6881776034236804e-05, "loss": 0.0067, "step": 34990 }, { "epoch": 6.24, "learning_rate": 4.6880884450784595e-05, "loss": 0.0051, "step": 35000 }, { "epoch": 6.24, "learning_rate": 4.6879992867332386e-05, "loss": 0.0064, "step": 35010 }, { "epoch": 6.24, "learning_rate": 4.687910128388017e-05, "loss": 0.0038, "step": 35020 }, { "epoch": 6.25, "learning_rate": 4.687820970042796e-05, "loss": 0.0045, "step": 35030 }, { "epoch": 6.25, "learning_rate": 4.6877318116975747e-05, "loss": 0.0063, "step": 35040 }, { "epoch": 6.25, "learning_rate": 4.687642653352354e-05, "loss": 0.0028, "step": 35050 }, { "epoch": 6.25, "learning_rate": 4.687553495007133e-05, "loss": 0.0051, "step": 35060 }, { "epoch": 6.25, "learning_rate": 4.687464336661912e-05, "loss": 0.006, "step": 35070 }, { "epoch": 6.26, "learning_rate": 4.687375178316691e-05, "loss": 0.0044, "step": 35080 }, { "epoch": 6.26, "learning_rate": 4.6872860199714696e-05, "loss": 0.0065, "step": 35090 }, { "epoch": 6.26, "learning_rate": 4.687196861626249e-05, "loss": 0.0046, "step": 35100 }, { "epoch": 6.26, "learning_rate": 4.687107703281027e-05, "loss": 0.0042, "step": 35110 }, { "epoch": 6.26, "learning_rate": 4.687018544935806e-05, "loss": 0.0079, "step": 35120 }, { "epoch": 6.26, "learning_rate": 4.686929386590585e-05, "loss": 0.0061, "step": 35130 }, { "epoch": 6.27, "learning_rate": 4.686840228245364e-05, "loss": 0.0056, "step": 35140 }, { "epoch": 6.27, "learning_rate": 4.686751069900143e-05, "loss": 0.0051, "step": 35150 }, { "epoch": 6.27, "learning_rate": 4.6866619115549214e-05, "loss": 0.0084, "step": 35160 }, { "epoch": 6.27, "learning_rate": 4.686572753209701e-05, "loss": 0.005, "step": 35170 }, { "epoch": 6.27, "learning_rate": 4.6864835948644796e-05, "loss": 0.0062, "step": 35180 }, { "epoch": 6.27, "learning_rate": 4.686394436519259e-05, "loss": 0.0071, "step": 35190 }, { "epoch": 6.28, "learning_rate": 4.686305278174037e-05, "loss": 0.0052, "step": 35200 }, { "epoch": 6.28, "learning_rate": 4.686216119828816e-05, "loss": 0.0071, "step": 35210 }, { "epoch": 6.28, "learning_rate": 4.686126961483595e-05, "loss": 0.0059, "step": 35220 }, { "epoch": 6.28, "learning_rate": 4.686037803138374e-05, "loss": 0.006, "step": 35230 }, { "epoch": 6.28, "learning_rate": 4.685948644793153e-05, "loss": 0.0037, "step": 35240 }, { "epoch": 6.29, "learning_rate": 4.6858594864479315e-05, "loss": 0.0058, "step": 35250 }, { "epoch": 6.29, "learning_rate": 4.6857703281027106e-05, "loss": 0.0051, "step": 35260 }, { "epoch": 6.29, "learning_rate": 4.685681169757489e-05, "loss": 0.0043, "step": 35270 }, { "epoch": 6.29, "learning_rate": 4.685592011412269e-05, "loss": 0.0064, "step": 35280 }, { "epoch": 6.29, "learning_rate": 4.685502853067047e-05, "loss": 0.0048, "step": 35290 }, { "epoch": 6.29, "learning_rate": 4.6854136947218264e-05, "loss": 0.0033, "step": 35300 }, { "epoch": 6.3, "learning_rate": 4.6853245363766055e-05, "loss": 0.0082, "step": 35310 }, { "epoch": 6.3, "learning_rate": 4.685235378031384e-05, "loss": 0.0058, "step": 35320 }, { "epoch": 6.3, "learning_rate": 4.685146219686163e-05, "loss": 0.0057, "step": 35330 }, { "epoch": 6.3, "learning_rate": 4.6850570613409415e-05, "loss": 0.0051, "step": 35340 }, { "epoch": 6.3, "learning_rate": 4.6849679029957206e-05, "loss": 0.0052, "step": 35350 }, { "epoch": 6.31, "learning_rate": 4.684878744650499e-05, "loss": 0.0071, "step": 35360 }, { "epoch": 6.31, "learning_rate": 4.684789586305278e-05, "loss": 0.0038, "step": 35370 }, { "epoch": 6.31, "learning_rate": 4.684700427960057e-05, "loss": 0.0083, "step": 35380 }, { "epoch": 6.31, "learning_rate": 4.6846112696148364e-05, "loss": 0.0055, "step": 35390 }, { "epoch": 6.31, "learning_rate": 4.6845221112696156e-05, "loss": 0.0055, "step": 35400 }, { "epoch": 6.31, "learning_rate": 4.684432952924394e-05, "loss": 0.0047, "step": 35410 }, { "epoch": 6.32, "learning_rate": 4.684343794579173e-05, "loss": 0.0033, "step": 35420 }, { "epoch": 6.32, "learning_rate": 4.6842546362339516e-05, "loss": 0.0049, "step": 35430 }, { "epoch": 6.32, "learning_rate": 4.684165477888731e-05, "loss": 0.005, "step": 35440 }, { "epoch": 6.32, "learning_rate": 4.684076319543509e-05, "loss": 0.0054, "step": 35450 }, { "epoch": 6.32, "learning_rate": 4.683987161198288e-05, "loss": 0.0049, "step": 35460 }, { "epoch": 6.32, "learning_rate": 4.6838980028530674e-05, "loss": 0.0057, "step": 35470 }, { "epoch": 6.33, "learning_rate": 4.683808844507846e-05, "loss": 0.0095, "step": 35480 }, { "epoch": 6.33, "learning_rate": 4.683719686162625e-05, "loss": 0.0056, "step": 35490 }, { "epoch": 6.33, "learning_rate": 4.683630527817404e-05, "loss": 0.0049, "step": 35500 }, { "epoch": 6.33, "learning_rate": 4.683541369472183e-05, "loss": 0.0052, "step": 35510 }, { "epoch": 6.33, "learning_rate": 4.6834522111269616e-05, "loss": 0.0058, "step": 35520 }, { "epoch": 6.34, "learning_rate": 4.683363052781741e-05, "loss": 0.0048, "step": 35530 }, { "epoch": 6.34, "learning_rate": 4.68327389443652e-05, "loss": 0.0038, "step": 35540 }, { "epoch": 6.34, "learning_rate": 4.683184736091298e-05, "loss": 0.0057, "step": 35550 }, { "epoch": 6.34, "learning_rate": 4.6830955777460774e-05, "loss": 0.0082, "step": 35560 }, { "epoch": 6.34, "learning_rate": 4.683006419400856e-05, "loss": 0.0039, "step": 35570 }, { "epoch": 6.34, "learning_rate": 4.682917261055635e-05, "loss": 0.0049, "step": 35580 }, { "epoch": 6.35, "learning_rate": 4.6828281027104134e-05, "loss": 0.0051, "step": 35590 }, { "epoch": 6.35, "learning_rate": 4.6827389443651926e-05, "loss": 0.006, "step": 35600 }, { "epoch": 6.35, "learning_rate": 4.682649786019972e-05, "loss": 0.0048, "step": 35610 }, { "epoch": 6.35, "learning_rate": 4.682560627674751e-05, "loss": 0.0092, "step": 35620 }, { "epoch": 6.35, "learning_rate": 4.68247146932953e-05, "loss": 0.0085, "step": 35630 }, { "epoch": 6.36, "learning_rate": 4.6823823109843084e-05, "loss": 0.0064, "step": 35640 }, { "epoch": 6.36, "learning_rate": 4.6822931526390875e-05, "loss": 0.0033, "step": 35650 }, { "epoch": 6.36, "learning_rate": 4.682203994293866e-05, "loss": 0.004, "step": 35660 }, { "epoch": 6.36, "learning_rate": 4.682114835948645e-05, "loss": 0.0059, "step": 35670 }, { "epoch": 6.36, "learning_rate": 4.6820256776034235e-05, "loss": 0.0067, "step": 35680 }, { "epoch": 6.36, "learning_rate": 4.6819365192582026e-05, "loss": 0.0049, "step": 35690 }, { "epoch": 6.37, "learning_rate": 4.681847360912982e-05, "loss": 0.0046, "step": 35700 }, { "epoch": 6.37, "learning_rate": 4.68175820256776e-05, "loss": 0.004, "step": 35710 }, { "epoch": 6.37, "learning_rate": 4.68166904422254e-05, "loss": 0.0041, "step": 35720 }, { "epoch": 6.37, "learning_rate": 4.6815798858773184e-05, "loss": 0.0057, "step": 35730 }, { "epoch": 6.37, "learning_rate": 4.6814907275320975e-05, "loss": 0.0046, "step": 35740 }, { "epoch": 6.37, "learning_rate": 4.681401569186876e-05, "loss": 0.0046, "step": 35750 }, { "epoch": 6.38, "learning_rate": 4.681312410841655e-05, "loss": 0.0054, "step": 35760 }, { "epoch": 6.38, "learning_rate": 4.681223252496434e-05, "loss": 0.0048, "step": 35770 }, { "epoch": 6.38, "learning_rate": 4.681134094151213e-05, "loss": 0.0052, "step": 35780 }, { "epoch": 6.38, "learning_rate": 4.681044935805992e-05, "loss": 0.0055, "step": 35790 }, { "epoch": 6.38, "learning_rate": 4.68095577746077e-05, "loss": 0.0061, "step": 35800 }, { "epoch": 6.39, "learning_rate": 4.6808666191155494e-05, "loss": 0.0042, "step": 35810 }, { "epoch": 6.39, "learning_rate": 4.680777460770328e-05, "loss": 0.0063, "step": 35820 }, { "epoch": 6.39, "learning_rate": 4.6806883024251076e-05, "loss": 0.0053, "step": 35830 }, { "epoch": 6.39, "learning_rate": 4.680599144079886e-05, "loss": 0.0034, "step": 35840 }, { "epoch": 6.39, "learning_rate": 4.680518901569187e-05, "loss": 0.006, "step": 35850 }, { "epoch": 6.39, "learning_rate": 4.680429743223966e-05, "loss": 0.0061, "step": 35860 }, { "epoch": 6.4, "learning_rate": 4.6803405848787444e-05, "loss": 0.0049, "step": 35870 }, { "epoch": 6.4, "learning_rate": 4.680251426533524e-05, "loss": 0.004, "step": 35880 }, { "epoch": 6.4, "learning_rate": 4.6801622681883026e-05, "loss": 0.0054, "step": 35890 }, { "epoch": 6.4, "learning_rate": 4.680073109843082e-05, "loss": 0.005, "step": 35900 }, { "epoch": 6.4, "learning_rate": 4.67998395149786e-05, "loss": 0.0054, "step": 35910 }, { "epoch": 6.41, "learning_rate": 4.679894793152639e-05, "loss": 0.007, "step": 35920 }, { "epoch": 6.41, "learning_rate": 4.6798056348074185e-05, "loss": 0.0067, "step": 35930 }, { "epoch": 6.41, "learning_rate": 4.679716476462197e-05, "loss": 0.005, "step": 35940 }, { "epoch": 6.41, "learning_rate": 4.6796362339514984e-05, "loss": 0.0084, "step": 35950 }, { "epoch": 6.41, "learning_rate": 4.679547075606277e-05, "loss": 0.0054, "step": 35960 }, { "epoch": 6.41, "learning_rate": 4.679457917261056e-05, "loss": 0.0048, "step": 35970 }, { "epoch": 6.42, "learning_rate": 4.6793687589158344e-05, "loss": 0.0057, "step": 35980 }, { "epoch": 6.42, "learning_rate": 4.6792796005706135e-05, "loss": 0.0044, "step": 35990 }, { "epoch": 6.42, "learning_rate": 4.6791904422253926e-05, "loss": 0.0069, "step": 36000 }, { "epoch": 6.42, "learning_rate": 4.679101283880171e-05, "loss": 0.0036, "step": 36010 }, { "epoch": 6.42, "learning_rate": 4.67901212553495e-05, "loss": 0.0052, "step": 36020 }, { "epoch": 6.42, "learning_rate": 4.678922967189729e-05, "loss": 0.0061, "step": 36030 }, { "epoch": 6.43, "learning_rate": 4.6788338088445084e-05, "loss": 0.0039, "step": 36040 }, { "epoch": 6.43, "learning_rate": 4.678744650499287e-05, "loss": 0.004, "step": 36050 }, { "epoch": 6.43, "learning_rate": 4.678655492154066e-05, "loss": 0.0043, "step": 36060 }, { "epoch": 6.43, "learning_rate": 4.678566333808845e-05, "loss": 0.0089, "step": 36070 }, { "epoch": 6.43, "learning_rate": 4.6784771754636236e-05, "loss": 0.0045, "step": 36080 }, { "epoch": 6.44, "learning_rate": 4.678388017118403e-05, "loss": 0.0063, "step": 36090 }, { "epoch": 6.44, "learning_rate": 4.678298858773181e-05, "loss": 0.0067, "step": 36100 }, { "epoch": 6.44, "learning_rate": 4.67820970042796e-05, "loss": 0.0036, "step": 36110 }, { "epoch": 6.44, "learning_rate": 4.678120542082739e-05, "loss": 0.0065, "step": 36120 }, { "epoch": 6.44, "learning_rate": 4.678031383737518e-05, "loss": 0.0043, "step": 36130 }, { "epoch": 6.44, "learning_rate": 4.677942225392297e-05, "loss": 0.0066, "step": 36140 }, { "epoch": 6.45, "learning_rate": 4.677853067047076e-05, "loss": 0.0037, "step": 36150 }, { "epoch": 6.45, "learning_rate": 4.677763908701855e-05, "loss": 0.0076, "step": 36160 }, { "epoch": 6.45, "learning_rate": 4.6776747503566336e-05, "loss": 0.0053, "step": 36170 }, { "epoch": 6.45, "learning_rate": 4.677585592011413e-05, "loss": 0.0068, "step": 36180 }, { "epoch": 6.45, "learning_rate": 4.677496433666191e-05, "loss": 0.008, "step": 36190 }, { "epoch": 6.46, "learning_rate": 4.67740727532097e-05, "loss": 0.004, "step": 36200 }, { "epoch": 6.46, "learning_rate": 4.677318116975749e-05, "loss": 0.009, "step": 36210 }, { "epoch": 6.46, "learning_rate": 4.677228958630528e-05, "loss": 0.0063, "step": 36220 }, { "epoch": 6.46, "learning_rate": 4.677139800285307e-05, "loss": 0.0062, "step": 36230 }, { "epoch": 6.46, "learning_rate": 4.6770506419400854e-05, "loss": 0.0047, "step": 36240 }, { "epoch": 6.46, "learning_rate": 4.676961483594865e-05, "loss": 0.0076, "step": 36250 }, { "epoch": 6.47, "learning_rate": 4.676872325249644e-05, "loss": 0.0079, "step": 36260 }, { "epoch": 6.47, "learning_rate": 4.676783166904423e-05, "loss": 0.0059, "step": 36270 }, { "epoch": 6.47, "learning_rate": 4.676694008559201e-05, "loss": 0.005, "step": 36280 }, { "epoch": 6.47, "learning_rate": 4.6766048502139804e-05, "loss": 0.0064, "step": 36290 }, { "epoch": 6.47, "learning_rate": 4.6765156918687595e-05, "loss": 0.0057, "step": 36300 }, { "epoch": 6.47, "learning_rate": 4.676426533523538e-05, "loss": 0.0054, "step": 36310 }, { "epoch": 6.48, "learning_rate": 4.676337375178317e-05, "loss": 0.006, "step": 36320 }, { "epoch": 6.48, "learning_rate": 4.6762482168330955e-05, "loss": 0.0043, "step": 36330 }, { "epoch": 6.48, "learning_rate": 4.6761590584878746e-05, "loss": 0.0057, "step": 36340 }, { "epoch": 6.48, "learning_rate": 4.676069900142653e-05, "loss": 0.0069, "step": 36350 }, { "epoch": 6.48, "learning_rate": 4.675980741797433e-05, "loss": 0.0056, "step": 36360 }, { "epoch": 6.49, "learning_rate": 4.675891583452211e-05, "loss": 0.0037, "step": 36370 }, { "epoch": 6.49, "learning_rate": 4.6758024251069904e-05, "loss": 0.0058, "step": 36380 }, { "epoch": 6.49, "learning_rate": 4.6757132667617695e-05, "loss": 0.0041, "step": 36390 }, { "epoch": 6.49, "learning_rate": 4.675624108416548e-05, "loss": 0.0052, "step": 36400 }, { "epoch": 6.49, "learning_rate": 4.675534950071327e-05, "loss": 0.0034, "step": 36410 }, { "epoch": 6.49, "learning_rate": 4.6754457917261055e-05, "loss": 0.0042, "step": 36420 }, { "epoch": 6.5, "learning_rate": 4.675356633380885e-05, "loss": 0.0068, "step": 36430 }, { "epoch": 6.5, "learning_rate": 4.675267475035663e-05, "loss": 0.0063, "step": 36440 }, { "epoch": 6.5, "learning_rate": 4.675178316690442e-05, "loss": 0.0076, "step": 36450 }, { "epoch": 6.5, "learning_rate": 4.6750891583452214e-05, "loss": 0.0067, "step": 36460 }, { "epoch": 6.5, "learning_rate": 4.6750000000000005e-05, "loss": 0.0061, "step": 36470 }, { "epoch": 6.5, "learning_rate": 4.6749108416547796e-05, "loss": 0.0064, "step": 36480 }, { "epoch": 6.51, "learning_rate": 4.674821683309558e-05, "loss": 0.0078, "step": 36490 }, { "epoch": 6.51, "learning_rate": 4.674732524964337e-05, "loss": 0.0065, "step": 36500 }, { "epoch": 6.51, "learning_rate": 4.6746433666191156e-05, "loss": 0.0057, "step": 36510 }, { "epoch": 6.51, "learning_rate": 4.674554208273895e-05, "loss": 0.0057, "step": 36520 }, { "epoch": 6.51, "learning_rate": 4.674465049928674e-05, "loss": 0.0062, "step": 36530 }, { "epoch": 6.52, "learning_rate": 4.674375891583452e-05, "loss": 0.0066, "step": 36540 }, { "epoch": 6.52, "learning_rate": 4.6742867332382314e-05, "loss": 0.0035, "step": 36550 }, { "epoch": 6.52, "learning_rate": 4.67419757489301e-05, "loss": 0.0072, "step": 36560 }, { "epoch": 6.52, "learning_rate": 4.674108416547789e-05, "loss": 0.0033, "step": 36570 }, { "epoch": 6.52, "learning_rate": 4.674019258202568e-05, "loss": 0.0067, "step": 36580 }, { "epoch": 6.52, "learning_rate": 4.673930099857347e-05, "loss": 0.0036, "step": 36590 }, { "epoch": 6.53, "learning_rate": 4.6738409415121257e-05, "loss": 0.0041, "step": 36600 }, { "epoch": 6.53, "learning_rate": 4.673751783166905e-05, "loss": 0.0071, "step": 36610 }, { "epoch": 6.53, "learning_rate": 4.673662624821684e-05, "loss": 0.0067, "step": 36620 }, { "epoch": 6.53, "learning_rate": 4.6735734664764623e-05, "loss": 0.0064, "step": 36630 }, { "epoch": 6.53, "learning_rate": 4.6734843081312415e-05, "loss": 0.0045, "step": 36640 }, { "epoch": 6.54, "learning_rate": 4.67339514978602e-05, "loss": 0.0079, "step": 36650 }, { "epoch": 6.54, "learning_rate": 4.673305991440799e-05, "loss": 0.0046, "step": 36660 }, { "epoch": 6.54, "learning_rate": 4.6732168330955775e-05, "loss": 0.0056, "step": 36670 }, { "epoch": 6.54, "learning_rate": 4.6731276747503566e-05, "loss": 0.005, "step": 36680 }, { "epoch": 6.54, "learning_rate": 4.673038516405136e-05, "loss": 0.0062, "step": 36690 }, { "epoch": 6.54, "learning_rate": 4.672949358059915e-05, "loss": 0.0045, "step": 36700 }, { "epoch": 6.55, "learning_rate": 4.672860199714694e-05, "loss": 0.0091, "step": 36710 }, { "epoch": 6.55, "learning_rate": 4.6727710413694724e-05, "loss": 0.0058, "step": 36720 }, { "epoch": 6.55, "learning_rate": 4.6726818830242515e-05, "loss": 0.0048, "step": 36730 }, { "epoch": 6.55, "learning_rate": 4.67259272467903e-05, "loss": 0.0069, "step": 36740 }, { "epoch": 6.55, "learning_rate": 4.672503566333809e-05, "loss": 0.0074, "step": 36750 }, { "epoch": 6.55, "learning_rate": 4.672414407988588e-05, "loss": 0.0038, "step": 36760 }, { "epoch": 6.56, "learning_rate": 4.6723252496433666e-05, "loss": 0.0076, "step": 36770 }, { "epoch": 6.56, "learning_rate": 4.672236091298146e-05, "loss": 0.0049, "step": 36780 }, { "epoch": 6.56, "learning_rate": 4.672146932952924e-05, "loss": 0.0042, "step": 36790 }, { "epoch": 6.56, "learning_rate": 4.672057774607704e-05, "loss": 0.0043, "step": 36800 }, { "epoch": 6.56, "learning_rate": 4.6719686162624825e-05, "loss": 0.0054, "step": 36810 }, { "epoch": 6.57, "learning_rate": 4.6718794579172616e-05, "loss": 0.0021, "step": 36820 }, { "epoch": 6.57, "learning_rate": 4.67179029957204e-05, "loss": 0.0037, "step": 36830 }, { "epoch": 6.57, "learning_rate": 4.671701141226819e-05, "loss": 0.0058, "step": 36840 }, { "epoch": 6.57, "learning_rate": 4.671611982881598e-05, "loss": 0.0055, "step": 36850 }, { "epoch": 6.57, "learning_rate": 4.671522824536377e-05, "loss": 0.0041, "step": 36860 }, { "epoch": 6.57, "learning_rate": 4.671433666191156e-05, "loss": 0.0039, "step": 36870 }, { "epoch": 6.58, "learning_rate": 4.671344507845934e-05, "loss": 0.0055, "step": 36880 }, { "epoch": 6.58, "learning_rate": 4.6712553495007134e-05, "loss": 0.0063, "step": 36890 }, { "epoch": 6.58, "learning_rate": 4.671166191155492e-05, "loss": 0.0032, "step": 36900 }, { "epoch": 6.58, "learning_rate": 4.6710770328102716e-05, "loss": 0.0043, "step": 36910 }, { "epoch": 6.58, "learning_rate": 4.67098787446505e-05, "loss": 0.0098, "step": 36920 }, { "epoch": 6.59, "learning_rate": 4.670898716119829e-05, "loss": 0.0067, "step": 36930 }, { "epoch": 6.59, "learning_rate": 4.670809557774608e-05, "loss": 0.0074, "step": 36940 }, { "epoch": 6.59, "learning_rate": 4.670720399429387e-05, "loss": 0.0079, "step": 36950 }, { "epoch": 6.59, "learning_rate": 4.670631241084166e-05, "loss": 0.0076, "step": 36960 }, { "epoch": 6.59, "learning_rate": 4.670542082738944e-05, "loss": 0.0054, "step": 36970 }, { "epoch": 6.59, "learning_rate": 4.6704529243937234e-05, "loss": 0.005, "step": 36980 }, { "epoch": 6.6, "learning_rate": 4.6703637660485026e-05, "loss": 0.0034, "step": 36990 }, { "epoch": 6.6, "learning_rate": 4.670274607703281e-05, "loss": 0.0062, "step": 37000 }, { "epoch": 6.6, "learning_rate": 4.67018544935806e-05, "loss": 0.0053, "step": 37010 }, { "epoch": 6.6, "learning_rate": 4.670096291012839e-05, "loss": 0.0044, "step": 37020 }, { "epoch": 6.6, "learning_rate": 4.6700071326676184e-05, "loss": 0.0061, "step": 37030 }, { "epoch": 6.6, "learning_rate": 4.669917974322397e-05, "loss": 0.006, "step": 37040 }, { "epoch": 6.61, "learning_rate": 4.669828815977176e-05, "loss": 0.0039, "step": 37050 }, { "epoch": 6.61, "learning_rate": 4.6697396576319544e-05, "loss": 0.0047, "step": 37060 }, { "epoch": 6.61, "learning_rate": 4.6696504992867335e-05, "loss": 0.006, "step": 37070 }, { "epoch": 6.61, "learning_rate": 4.6695613409415126e-05, "loss": 0.0052, "step": 37080 }, { "epoch": 6.61, "learning_rate": 4.669472182596291e-05, "loss": 0.0088, "step": 37090 }, { "epoch": 6.62, "learning_rate": 4.66938302425107e-05, "loss": 0.0049, "step": 37100 }, { "epoch": 6.62, "learning_rate": 4.6692938659058486e-05, "loss": 0.007, "step": 37110 }, { "epoch": 6.62, "learning_rate": 4.669204707560628e-05, "loss": 0.0036, "step": 37120 }, { "epoch": 6.62, "learning_rate": 4.669115549215407e-05, "loss": 0.0071, "step": 37130 }, { "epoch": 6.62, "learning_rate": 4.669026390870186e-05, "loss": 0.0089, "step": 37140 }, { "epoch": 6.62, "learning_rate": 4.6689372325249644e-05, "loss": 0.0056, "step": 37150 }, { "epoch": 6.63, "learning_rate": 4.6688480741797436e-05, "loss": 0.0071, "step": 37160 }, { "epoch": 6.63, "learning_rate": 4.668758915834523e-05, "loss": 0.0056, "step": 37170 }, { "epoch": 6.63, "learning_rate": 4.668669757489301e-05, "loss": 0.0068, "step": 37180 }, { "epoch": 6.63, "learning_rate": 4.66858059914408e-05, "loss": 0.0048, "step": 37190 }, { "epoch": 6.63, "learning_rate": 4.668491440798859e-05, "loss": 0.0042, "step": 37200 }, { "epoch": 6.64, "learning_rate": 4.668402282453638e-05, "loss": 0.0063, "step": 37210 }, { "epoch": 6.64, "learning_rate": 4.668313124108417e-05, "loss": 0.0064, "step": 37220 }, { "epoch": 6.64, "learning_rate": 4.6682239657631954e-05, "loss": 0.0045, "step": 37230 }, { "epoch": 6.64, "learning_rate": 4.6681348074179745e-05, "loss": 0.0063, "step": 37240 }, { "epoch": 6.64, "learning_rate": 4.6680456490727536e-05, "loss": 0.0037, "step": 37250 }, { "epoch": 6.64, "learning_rate": 4.667956490727533e-05, "loss": 0.0048, "step": 37260 }, { "epoch": 6.65, "learning_rate": 4.667867332382311e-05, "loss": 0.0064, "step": 37270 }, { "epoch": 6.65, "learning_rate": 4.66777817403709e-05, "loss": 0.0042, "step": 37280 }, { "epoch": 6.65, "learning_rate": 4.667689015691869e-05, "loss": 0.0032, "step": 37290 }, { "epoch": 6.65, "learning_rate": 4.667599857346648e-05, "loss": 0.008, "step": 37300 }, { "epoch": 6.65, "learning_rate": 4.667510699001427e-05, "loss": 0.0067, "step": 37310 }, { "epoch": 6.65, "learning_rate": 4.6674215406562054e-05, "loss": 0.0078, "step": 37320 }, { "epoch": 6.66, "learning_rate": 4.6673323823109845e-05, "loss": 0.0056, "step": 37330 }, { "epoch": 6.66, "learning_rate": 4.667243223965763e-05, "loss": 0.0038, "step": 37340 }, { "epoch": 6.66, "learning_rate": 4.667154065620543e-05, "loss": 0.0082, "step": 37350 }, { "epoch": 6.66, "learning_rate": 4.667064907275321e-05, "loss": 0.0071, "step": 37360 }, { "epoch": 6.66, "learning_rate": 4.6669757489301004e-05, "loss": 0.0095, "step": 37370 }, { "epoch": 6.67, "learning_rate": 4.666886590584879e-05, "loss": 0.0059, "step": 37380 }, { "epoch": 6.67, "learning_rate": 4.666797432239658e-05, "loss": 0.0086, "step": 37390 }, { "epoch": 6.67, "learning_rate": 4.666708273894437e-05, "loss": 0.0033, "step": 37400 }, { "epoch": 6.67, "learning_rate": 4.6666191155492155e-05, "loss": 0.0032, "step": 37410 }, { "epoch": 6.67, "learning_rate": 4.6665299572039946e-05, "loss": 0.0048, "step": 37420 }, { "epoch": 6.67, "learning_rate": 4.666440798858773e-05, "loss": 0.0055, "step": 37430 }, { "epoch": 6.68, "learning_rate": 4.666351640513552e-05, "loss": 0.0055, "step": 37440 }, { "epoch": 6.68, "learning_rate": 4.6662624821683306e-05, "loss": 0.0034, "step": 37450 }, { "epoch": 6.68, "learning_rate": 4.6661733238231104e-05, "loss": 0.0031, "step": 37460 }, { "epoch": 6.68, "learning_rate": 4.666084165477889e-05, "loss": 0.0065, "step": 37470 }, { "epoch": 6.68, "learning_rate": 4.665995007132668e-05, "loss": 0.0059, "step": 37480 }, { "epoch": 6.69, "learning_rate": 4.665905848787447e-05, "loss": 0.0057, "step": 37490 }, { "epoch": 6.69, "learning_rate": 4.6658166904422255e-05, "loss": 0.0048, "step": 37500 }, { "epoch": 6.69, "learning_rate": 4.6657275320970047e-05, "loss": 0.0079, "step": 37510 }, { "epoch": 6.69, "learning_rate": 4.665638373751783e-05, "loss": 0.0063, "step": 37520 }, { "epoch": 6.69, "learning_rate": 4.665549215406562e-05, "loss": 0.0081, "step": 37530 }, { "epoch": 6.69, "learning_rate": 4.6654600570613413e-05, "loss": 0.0038, "step": 37540 }, { "epoch": 6.7, "learning_rate": 4.66537089871612e-05, "loss": 0.005, "step": 37550 }, { "epoch": 6.7, "learning_rate": 4.665281740370899e-05, "loss": 0.0085, "step": 37560 }, { "epoch": 6.7, "learning_rate": 4.665192582025678e-05, "loss": 0.0038, "step": 37570 }, { "epoch": 6.7, "learning_rate": 4.665112339514979e-05, "loss": 0.0089, "step": 37580 }, { "epoch": 6.7, "learning_rate": 4.665023181169757e-05, "loss": 0.0041, "step": 37590 }, { "epoch": 6.7, "learning_rate": 4.6649340228245364e-05, "loss": 0.0052, "step": 37600 }, { "epoch": 6.71, "learning_rate": 4.6648448644793155e-05, "loss": 0.0075, "step": 37610 }, { "epoch": 6.71, "learning_rate": 4.6647557061340946e-05, "loss": 0.0065, "step": 37620 }, { "epoch": 6.71, "learning_rate": 4.664666547788874e-05, "loss": 0.0048, "step": 37630 }, { "epoch": 6.71, "learning_rate": 4.664577389443652e-05, "loss": 0.006, "step": 37640 }, { "epoch": 6.71, "learning_rate": 4.664488231098431e-05, "loss": 0.007, "step": 37650 }, { "epoch": 6.72, "learning_rate": 4.66439907275321e-05, "loss": 0.0054, "step": 37660 }, { "epoch": 6.72, "learning_rate": 4.664309914407989e-05, "loss": 0.0027, "step": 37670 }, { "epoch": 6.72, "learning_rate": 4.664220756062767e-05, "loss": 0.0056, "step": 37680 }, { "epoch": 6.72, "learning_rate": 4.6641315977175465e-05, "loss": 0.0062, "step": 37690 }, { "epoch": 6.72, "learning_rate": 4.6640424393723256e-05, "loss": 0.0052, "step": 37700 }, { "epoch": 6.72, "learning_rate": 4.663953281027104e-05, "loss": 0.0067, "step": 37710 }, { "epoch": 6.73, "learning_rate": 4.663864122681883e-05, "loss": 0.0031, "step": 37720 }, { "epoch": 6.73, "learning_rate": 4.663774964336662e-05, "loss": 0.0063, "step": 37730 }, { "epoch": 6.73, "learning_rate": 4.6636858059914414e-05, "loss": 0.0051, "step": 37740 }, { "epoch": 6.73, "learning_rate": 4.66359664764622e-05, "loss": 0.0061, "step": 37750 }, { "epoch": 6.73, "learning_rate": 4.663507489300999e-05, "loss": 0.0049, "step": 37760 }, { "epoch": 6.74, "learning_rate": 4.663418330955778e-05, "loss": 0.004, "step": 37770 }, { "epoch": 6.74, "learning_rate": 4.6633291726105565e-05, "loss": 0.0078, "step": 37780 }, { "epoch": 6.74, "learning_rate": 4.6632400142653356e-05, "loss": 0.0049, "step": 37790 }, { "epoch": 6.74, "learning_rate": 4.663150855920114e-05, "loss": 0.0049, "step": 37800 }, { "epoch": 6.74, "learning_rate": 4.663061697574893e-05, "loss": 0.0095, "step": 37810 }, { "epoch": 6.74, "learning_rate": 4.6629725392296716e-05, "loss": 0.0054, "step": 37820 }, { "epoch": 6.75, "learning_rate": 4.662883380884451e-05, "loss": 0.0029, "step": 37830 }, { "epoch": 6.75, "learning_rate": 4.66279422253923e-05, "loss": 0.0046, "step": 37840 }, { "epoch": 6.75, "learning_rate": 4.662705064194009e-05, "loss": 0.0047, "step": 37850 }, { "epoch": 6.75, "learning_rate": 4.662615905848788e-05, "loss": 0.0034, "step": 37860 }, { "epoch": 6.75, "learning_rate": 4.6625267475035666e-05, "loss": 0.0083, "step": 37870 }, { "epoch": 6.75, "learning_rate": 4.662437589158346e-05, "loss": 0.0047, "step": 37880 }, { "epoch": 6.76, "learning_rate": 4.662348430813124e-05, "loss": 0.0049, "step": 37890 }, { "epoch": 6.76, "learning_rate": 4.662259272467903e-05, "loss": 0.0034, "step": 37900 }, { "epoch": 6.76, "learning_rate": 4.662170114122682e-05, "loss": 0.0067, "step": 37910 }, { "epoch": 6.76, "learning_rate": 4.662080955777461e-05, "loss": 0.006, "step": 37920 }, { "epoch": 6.76, "learning_rate": 4.66199179743224e-05, "loss": 0.0056, "step": 37930 }, { "epoch": 6.77, "learning_rate": 4.6619026390870184e-05, "loss": 0.0093, "step": 37940 }, { "epoch": 6.77, "learning_rate": 4.661813480741798e-05, "loss": 0.0051, "step": 37950 }, { "epoch": 6.77, "learning_rate": 4.6617243223965766e-05, "loss": 0.0067, "step": 37960 }, { "epoch": 6.77, "learning_rate": 4.661635164051356e-05, "loss": 0.006, "step": 37970 }, { "epoch": 6.77, "learning_rate": 4.661546005706134e-05, "loss": 0.0045, "step": 37980 }, { "epoch": 6.77, "learning_rate": 4.661456847360913e-05, "loss": 0.0048, "step": 37990 }, { "epoch": 6.78, "learning_rate": 4.6613676890156924e-05, "loss": 0.0059, "step": 38000 }, { "epoch": 6.78, "learning_rate": 4.661278530670471e-05, "loss": 0.0071, "step": 38010 }, { "epoch": 6.78, "learning_rate": 4.66118937232525e-05, "loss": 0.0051, "step": 38020 }, { "epoch": 6.78, "learning_rate": 4.6611002139800284e-05, "loss": 0.0039, "step": 38030 }, { "epoch": 6.78, "learning_rate": 4.6610110556348076e-05, "loss": 0.0056, "step": 38040 }, { "epoch": 6.78, "learning_rate": 4.660921897289586e-05, "loss": 0.0047, "step": 38050 }, { "epoch": 6.79, "learning_rate": 4.660832738944366e-05, "loss": 0.0063, "step": 38060 }, { "epoch": 6.79, "learning_rate": 4.660743580599144e-05, "loss": 0.004, "step": 38070 }, { "epoch": 6.79, "learning_rate": 4.6606544222539234e-05, "loss": 0.0038, "step": 38080 }, { "epoch": 6.79, "learning_rate": 4.6605652639087025e-05, "loss": 0.0067, "step": 38090 }, { "epoch": 6.79, "learning_rate": 4.660476105563481e-05, "loss": 0.0043, "step": 38100 }, { "epoch": 6.8, "learning_rate": 4.66038694721826e-05, "loss": 0.0045, "step": 38110 }, { "epoch": 6.8, "learning_rate": 4.6602977888730385e-05, "loss": 0.0048, "step": 38120 }, { "epoch": 6.8, "learning_rate": 4.6602086305278176e-05, "loss": 0.0047, "step": 38130 }, { "epoch": 6.8, "learning_rate": 4.660119472182596e-05, "loss": 0.0037, "step": 38140 }, { "epoch": 6.8, "learning_rate": 4.660030313837375e-05, "loss": 0.0055, "step": 38150 }, { "epoch": 6.8, "learning_rate": 4.659941155492154e-05, "loss": 0.0066, "step": 38160 }, { "epoch": 6.81, "learning_rate": 4.6598519971469334e-05, "loss": 0.0068, "step": 38170 }, { "epoch": 6.81, "learning_rate": 4.6597628388017125e-05, "loss": 0.0065, "step": 38180 }, { "epoch": 6.81, "learning_rate": 4.659673680456491e-05, "loss": 0.0049, "step": 38190 }, { "epoch": 6.81, "learning_rate": 4.65958452211127e-05, "loss": 0.0045, "step": 38200 }, { "epoch": 6.81, "learning_rate": 4.6594953637660485e-05, "loss": 0.0087, "step": 38210 }, { "epoch": 6.82, "learning_rate": 4.659406205420828e-05, "loss": 0.0045, "step": 38220 }, { "epoch": 6.82, "learning_rate": 4.659317047075607e-05, "loss": 0.0046, "step": 38230 }, { "epoch": 6.82, "learning_rate": 4.659227888730385e-05, "loss": 0.0062, "step": 38240 }, { "epoch": 6.82, "learning_rate": 4.6591387303851644e-05, "loss": 0.0071, "step": 38250 }, { "epoch": 6.82, "learning_rate": 4.659049572039943e-05, "loss": 0.0065, "step": 38260 }, { "epoch": 6.82, "learning_rate": 4.658960413694722e-05, "loss": 0.0046, "step": 38270 }, { "epoch": 6.83, "learning_rate": 4.658871255349501e-05, "loss": 0.0044, "step": 38280 }, { "epoch": 6.83, "learning_rate": 4.65878209700428e-05, "loss": 0.0047, "step": 38290 }, { "epoch": 6.83, "learning_rate": 4.6586929386590586e-05, "loss": 0.0079, "step": 38300 }, { "epoch": 6.83, "learning_rate": 4.658603780313838e-05, "loss": 0.0025, "step": 38310 }, { "epoch": 6.83, "learning_rate": 4.658514621968617e-05, "loss": 0.0034, "step": 38320 }, { "epoch": 6.83, "learning_rate": 4.658425463623395e-05, "loss": 0.0055, "step": 38330 }, { "epoch": 6.84, "learning_rate": 4.6583363052781744e-05, "loss": 0.0052, "step": 38340 }, { "epoch": 6.84, "learning_rate": 4.658247146932953e-05, "loss": 0.0026, "step": 38350 }, { "epoch": 6.84, "learning_rate": 4.658157988587732e-05, "loss": 0.007, "step": 38360 }, { "epoch": 6.84, "learning_rate": 4.6580688302425104e-05, "loss": 0.0046, "step": 38370 }, { "epoch": 6.84, "learning_rate": 4.6579796718972895e-05, "loss": 0.0072, "step": 38380 }, { "epoch": 6.85, "learning_rate": 4.657890513552069e-05, "loss": 0.0067, "step": 38390 }, { "epoch": 6.85, "learning_rate": 4.657801355206848e-05, "loss": 0.0058, "step": 38400 }, { "epoch": 6.85, "learning_rate": 4.657712196861627e-05, "loss": 0.0057, "step": 38410 }, { "epoch": 6.85, "learning_rate": 4.6576230385164053e-05, "loss": 0.0044, "step": 38420 }, { "epoch": 6.85, "learning_rate": 4.6575338801711845e-05, "loss": 0.0066, "step": 38430 }, { "epoch": 6.85, "learning_rate": 4.657444721825963e-05, "loss": 0.0036, "step": 38440 }, { "epoch": 6.86, "learning_rate": 4.657355563480742e-05, "loss": 0.0061, "step": 38450 }, { "epoch": 6.86, "learning_rate": 4.657266405135521e-05, "loss": 0.0058, "step": 38460 }, { "epoch": 6.86, "learning_rate": 4.6571772467902996e-05, "loss": 0.0086, "step": 38470 }, { "epoch": 6.86, "learning_rate": 4.657088088445079e-05, "loss": 0.0063, "step": 38480 }, { "epoch": 6.86, "learning_rate": 4.656998930099857e-05, "loss": 0.0033, "step": 38490 }, { "epoch": 6.87, "learning_rate": 4.656909771754637e-05, "loss": 0.0076, "step": 38500 }, { "epoch": 6.87, "learning_rate": 4.6568206134094154e-05, "loss": 0.007, "step": 38510 }, { "epoch": 6.87, "learning_rate": 4.6567314550641945e-05, "loss": 0.005, "step": 38520 }, { "epoch": 6.87, "learning_rate": 4.656642296718973e-05, "loss": 0.0046, "step": 38530 }, { "epoch": 6.87, "learning_rate": 4.656553138373752e-05, "loss": 0.0061, "step": 38540 }, { "epoch": 6.87, "learning_rate": 4.656463980028531e-05, "loss": 0.0046, "step": 38550 }, { "epoch": 6.88, "learning_rate": 4.6563748216833097e-05, "loss": 0.0061, "step": 38560 }, { "epoch": 6.88, "learning_rate": 4.656285663338089e-05, "loss": 0.0077, "step": 38570 }, { "epoch": 6.88, "learning_rate": 4.656196504992867e-05, "loss": 0.0046, "step": 38580 }, { "epoch": 6.88, "learning_rate": 4.656107346647646e-05, "loss": 0.0062, "step": 38590 }, { "epoch": 6.88, "learning_rate": 4.656018188302425e-05, "loss": 0.0029, "step": 38600 }, { "epoch": 6.88, "learning_rate": 4.6559290299572046e-05, "loss": 0.0071, "step": 38610 }, { "epoch": 6.89, "learning_rate": 4.655839871611983e-05, "loss": 0.0056, "step": 38620 }, { "epoch": 6.89, "learning_rate": 4.655750713266762e-05, "loss": 0.0064, "step": 38630 }, { "epoch": 6.89, "learning_rate": 4.655661554921541e-05, "loss": 0.0049, "step": 38640 }, { "epoch": 6.89, "learning_rate": 4.65557239657632e-05, "loss": 0.0072, "step": 38650 }, { "epoch": 6.89, "learning_rate": 4.655483238231099e-05, "loss": 0.0055, "step": 38660 }, { "epoch": 6.9, "learning_rate": 4.655394079885877e-05, "loss": 0.0033, "step": 38670 }, { "epoch": 6.9, "learning_rate": 4.6553049215406564e-05, "loss": 0.0067, "step": 38680 }, { "epoch": 6.9, "learning_rate": 4.655215763195435e-05, "loss": 0.0061, "step": 38690 }, { "epoch": 6.9, "learning_rate": 4.655126604850214e-05, "loss": 0.0066, "step": 38700 }, { "epoch": 6.9, "learning_rate": 4.655037446504993e-05, "loss": 0.0063, "step": 38710 }, { "epoch": 6.9, "learning_rate": 4.654948288159772e-05, "loss": 0.0114, "step": 38720 }, { "epoch": 6.91, "learning_rate": 4.654859129814551e-05, "loss": 0.0054, "step": 38730 }, { "epoch": 6.91, "learning_rate": 4.65476997146933e-05, "loss": 0.0044, "step": 38740 }, { "epoch": 6.91, "learning_rate": 4.654680813124109e-05, "loss": 0.0037, "step": 38750 }, { "epoch": 6.91, "learning_rate": 4.654591654778887e-05, "loss": 0.0054, "step": 38760 }, { "epoch": 6.91, "learning_rate": 4.6545024964336665e-05, "loss": 0.0074, "step": 38770 }, { "epoch": 6.92, "learning_rate": 4.6544133380884456e-05, "loss": 0.0034, "step": 38780 }, { "epoch": 6.92, "learning_rate": 4.654324179743224e-05, "loss": 0.0099, "step": 38790 }, { "epoch": 6.92, "learning_rate": 4.654235021398003e-05, "loss": 0.0067, "step": 38800 }, { "epoch": 6.92, "learning_rate": 4.6541458630527816e-05, "loss": 0.0034, "step": 38810 }, { "epoch": 6.92, "learning_rate": 4.654056704707561e-05, "loss": 0.0044, "step": 38820 }, { "epoch": 6.92, "learning_rate": 4.65396754636234e-05, "loss": 0.0031, "step": 38830 }, { "epoch": 6.93, "learning_rate": 4.653878388017119e-05, "loss": 0.0052, "step": 38840 }, { "epoch": 6.93, "learning_rate": 4.6537892296718974e-05, "loss": 0.0062, "step": 38850 }, { "epoch": 6.93, "learning_rate": 4.6537000713266765e-05, "loss": 0.0043, "step": 38860 }, { "epoch": 6.93, "learning_rate": 4.6536109129814556e-05, "loss": 0.006, "step": 38870 }, { "epoch": 6.93, "learning_rate": 4.653521754636234e-05, "loss": 0.0082, "step": 38880 }, { "epoch": 6.93, "learning_rate": 4.653432596291013e-05, "loss": 0.0068, "step": 38890 }, { "epoch": 6.94, "learning_rate": 4.6533434379457916e-05, "loss": 0.0031, "step": 38900 }, { "epoch": 6.94, "learning_rate": 4.653254279600571e-05, "loss": 0.0036, "step": 38910 }, { "epoch": 6.94, "learning_rate": 4.653165121255349e-05, "loss": 0.0049, "step": 38920 }, { "epoch": 6.94, "learning_rate": 4.653075962910128e-05, "loss": 0.0051, "step": 38930 }, { "epoch": 6.94, "learning_rate": 4.6529868045649074e-05, "loss": 0.0076, "step": 38940 }, { "epoch": 6.95, "learning_rate": 4.6528976462196866e-05, "loss": 0.0056, "step": 38950 }, { "epoch": 6.95, "learning_rate": 4.652808487874466e-05, "loss": 0.0051, "step": 38960 }, { "epoch": 6.95, "learning_rate": 4.652719329529244e-05, "loss": 0.0089, "step": 38970 }, { "epoch": 6.95, "learning_rate": 4.652630171184023e-05, "loss": 0.0051, "step": 38980 }, { "epoch": 6.95, "learning_rate": 4.652541012838802e-05, "loss": 0.0079, "step": 38990 }, { "epoch": 6.95, "learning_rate": 4.652451854493581e-05, "loss": 0.0056, "step": 39000 }, { "epoch": 6.96, "learning_rate": 4.65236269614836e-05, "loss": 0.0064, "step": 39010 }, { "epoch": 6.96, "learning_rate": 4.6522735378031384e-05, "loss": 0.005, "step": 39020 }, { "epoch": 6.96, "learning_rate": 4.6521843794579175e-05, "loss": 0.0048, "step": 39030 }, { "epoch": 6.96, "learning_rate": 4.652095221112696e-05, "loss": 0.0047, "step": 39040 }, { "epoch": 6.96, "learning_rate": 4.652006062767476e-05, "loss": 0.0069, "step": 39050 }, { "epoch": 6.97, "learning_rate": 4.651916904422254e-05, "loss": 0.0048, "step": 39060 }, { "epoch": 6.97, "learning_rate": 4.651827746077033e-05, "loss": 0.0067, "step": 39070 }, { "epoch": 6.97, "learning_rate": 4.651738587731812e-05, "loss": 0.0048, "step": 39080 }, { "epoch": 6.97, "learning_rate": 4.651649429386591e-05, "loss": 0.0043, "step": 39090 }, { "epoch": 6.97, "learning_rate": 4.65156027104137e-05, "loss": 0.002, "step": 39100 }, { "epoch": 6.97, "learning_rate": 4.6514711126961484e-05, "loss": 0.0037, "step": 39110 }, { "epoch": 6.98, "learning_rate": 4.6513819543509276e-05, "loss": 0.0055, "step": 39120 }, { "epoch": 6.98, "learning_rate": 4.651292796005706e-05, "loss": 0.0067, "step": 39130 }, { "epoch": 6.98, "learning_rate": 4.651203637660485e-05, "loss": 0.0064, "step": 39140 }, { "epoch": 6.98, "learning_rate": 4.6511144793152636e-05, "loss": 0.008, "step": 39150 }, { "epoch": 6.98, "learning_rate": 4.6510253209700434e-05, "loss": 0.0053, "step": 39160 }, { "epoch": 6.98, "learning_rate": 4.650936162624822e-05, "loss": 0.0068, "step": 39170 }, { "epoch": 6.99, "learning_rate": 4.650847004279601e-05, "loss": 0.0049, "step": 39180 }, { "epoch": 6.99, "learning_rate": 4.65075784593438e-05, "loss": 0.0046, "step": 39190 }, { "epoch": 6.99, "learning_rate": 4.6506686875891585e-05, "loss": 0.0033, "step": 39200 }, { "epoch": 6.99, "learning_rate": 4.6505795292439376e-05, "loss": 0.0051, "step": 39210 }, { "epoch": 6.99, "learning_rate": 4.650490370898716e-05, "loss": 0.0065, "step": 39220 }, { "epoch": 7.0, "learning_rate": 4.650401212553495e-05, "loss": 0.0043, "step": 39230 }, { "epoch": 7.0, "learning_rate": 4.650312054208274e-05, "loss": 0.0047, "step": 39240 }, { "epoch": 7.0, "learning_rate": 4.650222895863053e-05, "loss": 0.0038, "step": 39250 }, { "epoch": 7.0, "eval_loss": 0.01523965410888195, "eval_runtime": 196.0516, "eval_samples_per_second": 23.662, "eval_steps_per_second": 2.958, "step": 39256 }, { "epoch": 7.0, "learning_rate": 4.650133737517832e-05, "loss": 0.0069, "step": 39260 }, { "epoch": 7.0, "learning_rate": 4.650044579172611e-05, "loss": 0.0038, "step": 39270 }, { "epoch": 7.0, "learning_rate": 4.64995542082739e-05, "loss": 0.0074, "step": 39280 }, { "epoch": 7.01, "learning_rate": 4.6498662624821685e-05, "loss": 0.004, "step": 39290 }, { "epoch": 7.01, "learning_rate": 4.649777104136948e-05, "loss": 0.0042, "step": 39300 }, { "epoch": 7.01, "learning_rate": 4.649687945791726e-05, "loss": 0.0032, "step": 39310 }, { "epoch": 7.01, "learning_rate": 4.649598787446505e-05, "loss": 0.0056, "step": 39320 }, { "epoch": 7.01, "learning_rate": 4.6495096291012844e-05, "loss": 0.0032, "step": 39330 }, { "epoch": 7.01, "learning_rate": 4.649420470756063e-05, "loss": 0.0055, "step": 39340 }, { "epoch": 7.02, "learning_rate": 4.649331312410842e-05, "loss": 0.0061, "step": 39350 }, { "epoch": 7.02, "learning_rate": 4.6492421540656204e-05, "loss": 0.0031, "step": 39360 }, { "epoch": 7.02, "learning_rate": 4.6491529957203995e-05, "loss": 0.0048, "step": 39370 }, { "epoch": 7.02, "learning_rate": 4.6490638373751786e-05, "loss": 0.0055, "step": 39380 }, { "epoch": 7.02, "learning_rate": 4.648974679029958e-05, "loss": 0.0074, "step": 39390 }, { "epoch": 7.03, "learning_rate": 4.648885520684736e-05, "loss": 0.0031, "step": 39400 }, { "epoch": 7.03, "learning_rate": 4.648796362339515e-05, "loss": 0.004, "step": 39410 }, { "epoch": 7.03, "learning_rate": 4.6487072039942944e-05, "loss": 0.0052, "step": 39420 }, { "epoch": 7.03, "learning_rate": 4.648618045649073e-05, "loss": 0.0044, "step": 39430 }, { "epoch": 7.03, "learning_rate": 4.648528887303852e-05, "loss": 0.004, "step": 39440 }, { "epoch": 7.03, "learning_rate": 4.6484397289586304e-05, "loss": 0.0048, "step": 39450 }, { "epoch": 7.04, "learning_rate": 4.6483505706134095e-05, "loss": 0.0037, "step": 39460 }, { "epoch": 7.04, "learning_rate": 4.6482614122681887e-05, "loss": 0.0049, "step": 39470 }, { "epoch": 7.04, "learning_rate": 4.648172253922967e-05, "loss": 0.0039, "step": 39480 }, { "epoch": 7.04, "learning_rate": 4.648083095577747e-05, "loss": 0.0034, "step": 39490 }, { "epoch": 7.04, "learning_rate": 4.6479939372325253e-05, "loss": 0.0078, "step": 39500 }, { "epoch": 7.05, "learning_rate": 4.6479047788873045e-05, "loss": 0.0086, "step": 39510 }, { "epoch": 7.05, "learning_rate": 4.647815620542083e-05, "loss": 0.0025, "step": 39520 }, { "epoch": 7.05, "learning_rate": 4.647726462196862e-05, "loss": 0.004, "step": 39530 }, { "epoch": 7.05, "learning_rate": 4.6476373038516405e-05, "loss": 0.005, "step": 39540 }, { "epoch": 7.05, "learning_rate": 4.6475481455064196e-05, "loss": 0.0058, "step": 39550 }, { "epoch": 7.05, "learning_rate": 4.647458987161199e-05, "loss": 0.0053, "step": 39560 }, { "epoch": 7.06, "learning_rate": 4.647369828815977e-05, "loss": 0.0046, "step": 39570 }, { "epoch": 7.06, "learning_rate": 4.647280670470756e-05, "loss": 0.0062, "step": 39580 }, { "epoch": 7.06, "learning_rate": 4.647191512125535e-05, "loss": 0.0057, "step": 39590 }, { "epoch": 7.06, "learning_rate": 4.6471023537803145e-05, "loss": 0.0064, "step": 39600 }, { "epoch": 7.06, "learning_rate": 4.647013195435093e-05, "loss": 0.0079, "step": 39610 }, { "epoch": 7.06, "learning_rate": 4.646924037089872e-05, "loss": 0.0038, "step": 39620 }, { "epoch": 7.07, "learning_rate": 4.6468348787446505e-05, "loss": 0.0045, "step": 39630 }, { "epoch": 7.07, "learning_rate": 4.6467457203994296e-05, "loss": 0.0049, "step": 39640 }, { "epoch": 7.07, "learning_rate": 4.646656562054209e-05, "loss": 0.0042, "step": 39650 }, { "epoch": 7.07, "learning_rate": 4.646567403708987e-05, "loss": 0.0046, "step": 39660 }, { "epoch": 7.07, "learning_rate": 4.646478245363766e-05, "loss": 0.0061, "step": 39670 }, { "epoch": 7.08, "learning_rate": 4.646389087018545e-05, "loss": 0.0054, "step": 39680 }, { "epoch": 7.08, "learning_rate": 4.646299928673324e-05, "loss": 0.0059, "step": 39690 }, { "epoch": 7.08, "learning_rate": 4.646210770328103e-05, "loss": 0.0058, "step": 39700 }, { "epoch": 7.08, "learning_rate": 4.646121611982882e-05, "loss": 0.0056, "step": 39710 }, { "epoch": 7.08, "learning_rate": 4.646032453637661e-05, "loss": 0.009, "step": 39720 }, { "epoch": 7.08, "learning_rate": 4.64594329529244e-05, "loss": 0.0049, "step": 39730 }, { "epoch": 7.09, "learning_rate": 4.645854136947219e-05, "loss": 0.0064, "step": 39740 }, { "epoch": 7.09, "learning_rate": 4.645764978601997e-05, "loss": 0.0052, "step": 39750 }, { "epoch": 7.09, "learning_rate": 4.6456758202567764e-05, "loss": 0.0039, "step": 39760 }, { "epoch": 7.09, "learning_rate": 4.645586661911555e-05, "loss": 0.0065, "step": 39770 }, { "epoch": 7.09, "learning_rate": 4.645497503566334e-05, "loss": 0.0037, "step": 39780 }, { "epoch": 7.1, "learning_rate": 4.645408345221113e-05, "loss": 0.0033, "step": 39790 }, { "epoch": 7.1, "learning_rate": 4.6453191868758915e-05, "loss": 0.0055, "step": 39800 }, { "epoch": 7.1, "learning_rate": 4.6452300285306706e-05, "loss": 0.0057, "step": 39810 }, { "epoch": 7.1, "learning_rate": 4.64514087018545e-05, "loss": 0.004, "step": 39820 }, { "epoch": 7.1, "learning_rate": 4.645051711840229e-05, "loss": 0.0047, "step": 39830 }, { "epoch": 7.1, "learning_rate": 4.644962553495007e-05, "loss": 0.0039, "step": 39840 }, { "epoch": 7.11, "learning_rate": 4.6448733951497864e-05, "loss": 0.005, "step": 39850 }, { "epoch": 7.11, "learning_rate": 4.644784236804565e-05, "loss": 0.0039, "step": 39860 }, { "epoch": 7.11, "learning_rate": 4.644695078459344e-05, "loss": 0.0046, "step": 39870 }, { "epoch": 7.11, "learning_rate": 4.644605920114123e-05, "loss": 0.0073, "step": 39880 }, { "epoch": 7.11, "learning_rate": 4.6445167617689016e-05, "loss": 0.0042, "step": 39890 }, { "epoch": 7.11, "learning_rate": 4.644427603423681e-05, "loss": 0.0042, "step": 39900 }, { "epoch": 7.12, "learning_rate": 4.644338445078459e-05, "loss": 0.0059, "step": 39910 }, { "epoch": 7.12, "learning_rate": 4.644249286733238e-05, "loss": 0.005, "step": 39920 }, { "epoch": 7.12, "learning_rate": 4.6441601283880174e-05, "loss": 0.0051, "step": 39930 }, { "epoch": 7.12, "learning_rate": 4.6440709700427965e-05, "loss": 0.0048, "step": 39940 }, { "epoch": 7.12, "learning_rate": 4.6439818116975756e-05, "loss": 0.0039, "step": 39950 }, { "epoch": 7.13, "learning_rate": 4.643892653352354e-05, "loss": 0.0055, "step": 39960 }, { "epoch": 7.13, "learning_rate": 4.643803495007133e-05, "loss": 0.0031, "step": 39970 }, { "epoch": 7.13, "learning_rate": 4.6437143366619116e-05, "loss": 0.0045, "step": 39980 }, { "epoch": 7.13, "learning_rate": 4.643625178316691e-05, "loss": 0.0041, "step": 39990 }, { "epoch": 7.13, "learning_rate": 4.643536019971469e-05, "loss": 0.0061, "step": 40000 }, { "epoch": 7.13, "learning_rate": 4.643446861626248e-05, "loss": 0.0039, "step": 40010 }, { "epoch": 7.14, "learning_rate": 4.6433577032810274e-05, "loss": 0.0037, "step": 40020 }, { "epoch": 7.14, "learning_rate": 4.643268544935806e-05, "loss": 0.0056, "step": 40030 }, { "epoch": 7.14, "learning_rate": 4.643179386590586e-05, "loss": 0.003, "step": 40040 }, { "epoch": 7.14, "learning_rate": 4.643090228245364e-05, "loss": 0.0021, "step": 40050 }, { "epoch": 7.14, "learning_rate": 4.643001069900143e-05, "loss": 0.0035, "step": 40060 }, { "epoch": 7.15, "learning_rate": 4.642911911554922e-05, "loss": 0.0064, "step": 40070 }, { "epoch": 7.15, "learning_rate": 4.642822753209701e-05, "loss": 0.0027, "step": 40080 }, { "epoch": 7.15, "learning_rate": 4.642733594864479e-05, "loss": 0.0056, "step": 40090 }, { "epoch": 7.15, "learning_rate": 4.6426444365192584e-05, "loss": 0.0038, "step": 40100 }, { "epoch": 7.15, "learning_rate": 4.6425552781740375e-05, "loss": 0.0041, "step": 40110 }, { "epoch": 7.15, "learning_rate": 4.642466119828816e-05, "loss": 0.0036, "step": 40120 }, { "epoch": 7.16, "learning_rate": 4.642376961483595e-05, "loss": 0.0029, "step": 40130 }, { "epoch": 7.16, "learning_rate": 4.6422878031383735e-05, "loss": 0.0048, "step": 40140 }, { "epoch": 7.16, "learning_rate": 4.6421986447931526e-05, "loss": 0.0042, "step": 40150 }, { "epoch": 7.16, "learning_rate": 4.642109486447932e-05, "loss": 0.0062, "step": 40160 }, { "epoch": 7.16, "learning_rate": 4.642020328102711e-05, "loss": 0.005, "step": 40170 }, { "epoch": 7.16, "learning_rate": 4.64193116975749e-05, "loss": 0.0052, "step": 40180 }, { "epoch": 7.17, "learning_rate": 4.6418420114122684e-05, "loss": 0.0064, "step": 40190 }, { "epoch": 7.17, "learning_rate": 4.6417528530670475e-05, "loss": 0.0048, "step": 40200 }, { "epoch": 7.17, "learning_rate": 4.641663694721826e-05, "loss": 0.005, "step": 40210 }, { "epoch": 7.17, "learning_rate": 4.641574536376605e-05, "loss": 0.006, "step": 40220 }, { "epoch": 7.17, "learning_rate": 4.6414853780313836e-05, "loss": 0.0049, "step": 40230 }, { "epoch": 7.18, "learning_rate": 4.641396219686163e-05, "loss": 0.0042, "step": 40240 }, { "epoch": 7.18, "learning_rate": 4.641307061340942e-05, "loss": 0.0053, "step": 40250 }, { "epoch": 7.18, "learning_rate": 4.64121790299572e-05, "loss": 0.0073, "step": 40260 }, { "epoch": 7.18, "learning_rate": 4.6411287446505e-05, "loss": 0.0049, "step": 40270 }, { "epoch": 7.18, "learning_rate": 4.6410395863052785e-05, "loss": 0.0054, "step": 40280 }, { "epoch": 7.18, "learning_rate": 4.6409504279600576e-05, "loss": 0.0048, "step": 40290 }, { "epoch": 7.19, "learning_rate": 4.640861269614836e-05, "loss": 0.0073, "step": 40300 }, { "epoch": 7.19, "learning_rate": 4.640772111269615e-05, "loss": 0.0042, "step": 40310 }, { "epoch": 7.19, "learning_rate": 4.6406829529243936e-05, "loss": 0.0063, "step": 40320 }, { "epoch": 7.19, "learning_rate": 4.640593794579173e-05, "loss": 0.003, "step": 40330 }, { "epoch": 7.19, "learning_rate": 4.640504636233952e-05, "loss": 0.0098, "step": 40340 }, { "epoch": 7.2, "learning_rate": 4.64041547788873e-05, "loss": 0.0052, "step": 40350 }, { "epoch": 7.2, "learning_rate": 4.6403263195435094e-05, "loss": 0.0058, "step": 40360 }, { "epoch": 7.2, "learning_rate": 4.640237161198288e-05, "loss": 0.0047, "step": 40370 }, { "epoch": 7.2, "learning_rate": 4.640148002853068e-05, "loss": 0.0043, "step": 40380 }, { "epoch": 7.2, "learning_rate": 4.640058844507846e-05, "loss": 0.0044, "step": 40390 }, { "epoch": 7.2, "learning_rate": 4.639969686162625e-05, "loss": 0.0075, "step": 40400 }, { "epoch": 7.21, "learning_rate": 4.6398805278174043e-05, "loss": 0.0029, "step": 40410 }, { "epoch": 7.21, "learning_rate": 4.639791369472183e-05, "loss": 0.006, "step": 40420 }, { "epoch": 7.21, "learning_rate": 4.639702211126962e-05, "loss": 0.0051, "step": 40430 }, { "epoch": 7.21, "learning_rate": 4.6396130527817404e-05, "loss": 0.0037, "step": 40440 }, { "epoch": 7.21, "learning_rate": 4.6395238944365195e-05, "loss": 0.004, "step": 40450 }, { "epoch": 7.21, "learning_rate": 4.639434736091298e-05, "loss": 0.0066, "step": 40460 }, { "epoch": 7.22, "learning_rate": 4.639345577746077e-05, "loss": 0.0048, "step": 40470 }, { "epoch": 7.22, "learning_rate": 4.639256419400856e-05, "loss": 0.0054, "step": 40480 }, { "epoch": 7.22, "learning_rate": 4.639167261055635e-05, "loss": 0.0054, "step": 40490 }, { "epoch": 7.22, "learning_rate": 4.6390781027104144e-05, "loss": 0.0054, "step": 40500 }, { "epoch": 7.22, "learning_rate": 4.638988944365193e-05, "loss": 0.0066, "step": 40510 }, { "epoch": 7.23, "learning_rate": 4.638899786019972e-05, "loss": 0.0056, "step": 40520 }, { "epoch": 7.23, "learning_rate": 4.6388106276747504e-05, "loss": 0.0032, "step": 40530 }, { "epoch": 7.23, "learning_rate": 4.6387214693295295e-05, "loss": 0.0054, "step": 40540 }, { "epoch": 7.23, "learning_rate": 4.638632310984308e-05, "loss": 0.0041, "step": 40550 }, { "epoch": 7.23, "learning_rate": 4.638543152639087e-05, "loss": 0.0049, "step": 40560 }, { "epoch": 7.23, "learning_rate": 4.638453994293866e-05, "loss": 0.0044, "step": 40570 }, { "epoch": 7.24, "learning_rate": 4.6383648359486447e-05, "loss": 0.0053, "step": 40580 }, { "epoch": 7.24, "learning_rate": 4.638275677603424e-05, "loss": 0.0052, "step": 40590 }, { "epoch": 7.24, "learning_rate": 4.638186519258203e-05, "loss": 0.0053, "step": 40600 }, { "epoch": 7.24, "learning_rate": 4.638097360912982e-05, "loss": 0.0044, "step": 40610 }, { "epoch": 7.24, "learning_rate": 4.6380082025677605e-05, "loss": 0.0039, "step": 40620 }, { "epoch": 7.25, "learning_rate": 4.6379190442225396e-05, "loss": 0.0065, "step": 40630 }, { "epoch": 7.25, "learning_rate": 4.637829885877318e-05, "loss": 0.0045, "step": 40640 }, { "epoch": 7.25, "learning_rate": 4.637740727532097e-05, "loss": 0.0076, "step": 40650 }, { "epoch": 7.25, "learning_rate": 4.637651569186876e-05, "loss": 0.0091, "step": 40660 }, { "epoch": 7.25, "learning_rate": 4.637562410841655e-05, "loss": 0.0062, "step": 40670 }, { "epoch": 7.25, "learning_rate": 4.637473252496434e-05, "loss": 0.0043, "step": 40680 }, { "epoch": 7.26, "learning_rate": 4.637384094151212e-05, "loss": 0.0064, "step": 40690 }, { "epoch": 7.26, "learning_rate": 4.6372949358059914e-05, "loss": 0.0028, "step": 40700 }, { "epoch": 7.26, "learning_rate": 4.6372057774607705e-05, "loss": 0.0058, "step": 40710 }, { "epoch": 7.26, "learning_rate": 4.6371166191155496e-05, "loss": 0.0054, "step": 40720 }, { "epoch": 7.26, "learning_rate": 4.637027460770329e-05, "loss": 0.0053, "step": 40730 }, { "epoch": 7.26, "learning_rate": 4.636938302425107e-05, "loss": 0.0063, "step": 40740 }, { "epoch": 7.27, "learning_rate": 4.636849144079886e-05, "loss": 0.0033, "step": 40750 }, { "epoch": 7.27, "learning_rate": 4.636759985734665e-05, "loss": 0.0055, "step": 40760 }, { "epoch": 7.27, "learning_rate": 4.636670827389444e-05, "loss": 0.0043, "step": 40770 }, { "epoch": 7.27, "learning_rate": 4.636581669044222e-05, "loss": 0.0035, "step": 40780 }, { "epoch": 7.27, "learning_rate": 4.6364925106990015e-05, "loss": 0.0034, "step": 40790 }, { "epoch": 7.28, "learning_rate": 4.6364033523537806e-05, "loss": 0.0081, "step": 40800 }, { "epoch": 7.28, "learning_rate": 4.636314194008559e-05, "loss": 0.0039, "step": 40810 }, { "epoch": 7.28, "learning_rate": 4.636225035663339e-05, "loss": 0.0043, "step": 40820 }, { "epoch": 7.28, "learning_rate": 4.636135877318117e-05, "loss": 0.0051, "step": 40830 }, { "epoch": 7.28, "learning_rate": 4.6360467189728964e-05, "loss": 0.0058, "step": 40840 }, { "epoch": 7.28, "learning_rate": 4.635957560627675e-05, "loss": 0.0065, "step": 40850 }, { "epoch": 7.29, "learning_rate": 4.635868402282454e-05, "loss": 0.004, "step": 40860 }, { "epoch": 7.29, "learning_rate": 4.6357792439372324e-05, "loss": 0.0052, "step": 40870 }, { "epoch": 7.29, "learning_rate": 4.6356900855920115e-05, "loss": 0.0083, "step": 40880 }, { "epoch": 7.29, "learning_rate": 4.6356009272467906e-05, "loss": 0.0061, "step": 40890 }, { "epoch": 7.29, "learning_rate": 4.635511768901569e-05, "loss": 0.0061, "step": 40900 }, { "epoch": 7.29, "learning_rate": 4.635422610556348e-05, "loss": 0.0043, "step": 40910 }, { "epoch": 7.3, "learning_rate": 4.6353334522111266e-05, "loss": 0.005, "step": 40920 }, { "epoch": 7.3, "learning_rate": 4.6352442938659064e-05, "loss": 0.0034, "step": 40930 }, { "epoch": 7.3, "learning_rate": 4.635155135520685e-05, "loss": 0.0044, "step": 40940 }, { "epoch": 7.3, "learning_rate": 4.635065977175464e-05, "loss": 0.0059, "step": 40950 }, { "epoch": 7.3, "learning_rate": 4.634976818830243e-05, "loss": 0.0033, "step": 40960 }, { "epoch": 7.31, "learning_rate": 4.6348876604850216e-05, "loss": 0.0048, "step": 40970 }, { "epoch": 7.31, "learning_rate": 4.634798502139801e-05, "loss": 0.0027, "step": 40980 }, { "epoch": 7.31, "learning_rate": 4.634709343794579e-05, "loss": 0.0038, "step": 40990 }, { "epoch": 7.31, "learning_rate": 4.634620185449358e-05, "loss": 0.0057, "step": 41000 }, { "epoch": 7.31, "learning_rate": 4.634531027104137e-05, "loss": 0.0048, "step": 41010 }, { "epoch": 7.31, "learning_rate": 4.634441868758916e-05, "loss": 0.0041, "step": 41020 }, { "epoch": 7.32, "learning_rate": 4.634352710413695e-05, "loss": 0.003, "step": 41030 }, { "epoch": 7.32, "learning_rate": 4.634263552068474e-05, "loss": 0.0044, "step": 41040 }, { "epoch": 7.32, "learning_rate": 4.634174393723253e-05, "loss": 0.0059, "step": 41050 }, { "epoch": 7.32, "learning_rate": 4.6340852353780316e-05, "loss": 0.0045, "step": 41060 }, { "epoch": 7.32, "learning_rate": 4.633996077032811e-05, "loss": 0.0035, "step": 41070 }, { "epoch": 7.33, "learning_rate": 4.633906918687589e-05, "loss": 0.0041, "step": 41080 }, { "epoch": 7.33, "learning_rate": 4.633817760342368e-05, "loss": 0.0075, "step": 41090 }, { "epoch": 7.33, "learning_rate": 4.633728601997147e-05, "loss": 0.0059, "step": 41100 }, { "epoch": 7.33, "learning_rate": 4.633639443651926e-05, "loss": 0.0064, "step": 41110 }, { "epoch": 7.33, "learning_rate": 4.633550285306705e-05, "loss": 0.0048, "step": 41120 }, { "epoch": 7.33, "learning_rate": 4.6334611269614834e-05, "loss": 0.0031, "step": 41130 }, { "epoch": 7.34, "learning_rate": 4.6333719686162626e-05, "loss": 0.0055, "step": 41140 }, { "epoch": 7.34, "learning_rate": 4.633282810271042e-05, "loss": 0.0044, "step": 41150 }, { "epoch": 7.34, "learning_rate": 4.633193651925821e-05, "loss": 0.007, "step": 41160 }, { "epoch": 7.34, "learning_rate": 4.633104493580599e-05, "loss": 0.0044, "step": 41170 }, { "epoch": 7.34, "learning_rate": 4.6330153352353784e-05, "loss": 0.0066, "step": 41180 }, { "epoch": 7.34, "learning_rate": 4.6329261768901575e-05, "loss": 0.0059, "step": 41190 }, { "epoch": 7.35, "learning_rate": 4.632837018544936e-05, "loss": 0.0038, "step": 41200 }, { "epoch": 7.35, "learning_rate": 4.632747860199715e-05, "loss": 0.0035, "step": 41210 }, { "epoch": 7.35, "learning_rate": 4.6326587018544935e-05, "loss": 0.0052, "step": 41220 }, { "epoch": 7.35, "learning_rate": 4.6325695435092726e-05, "loss": 0.0062, "step": 41230 }, { "epoch": 7.35, "learning_rate": 4.632480385164051e-05, "loss": 0.0039, "step": 41240 }, { "epoch": 7.36, "learning_rate": 4.63239122681883e-05, "loss": 0.0044, "step": 41250 }, { "epoch": 7.36, "learning_rate": 4.632302068473609e-05, "loss": 0.0044, "step": 41260 }, { "epoch": 7.36, "learning_rate": 4.6322129101283884e-05, "loss": 0.0054, "step": 41270 }, { "epoch": 7.36, "learning_rate": 4.6321237517831675e-05, "loss": 0.0031, "step": 41280 }, { "epoch": 7.36, "learning_rate": 4.632034593437946e-05, "loss": 0.0059, "step": 41290 }, { "epoch": 7.36, "learning_rate": 4.631945435092725e-05, "loss": 0.0066, "step": 41300 }, { "epoch": 7.37, "learning_rate": 4.6318562767475036e-05, "loss": 0.0034, "step": 41310 }, { "epoch": 7.37, "learning_rate": 4.631767118402283e-05, "loss": 0.0031, "step": 41320 }, { "epoch": 7.37, "learning_rate": 4.631677960057061e-05, "loss": 0.0043, "step": 41330 }, { "epoch": 7.37, "learning_rate": 4.63158880171184e-05, "loss": 0.0027, "step": 41340 }, { "epoch": 7.37, "learning_rate": 4.6314996433666194e-05, "loss": 0.004, "step": 41350 }, { "epoch": 7.38, "learning_rate": 4.631410485021398e-05, "loss": 0.0051, "step": 41360 }, { "epoch": 7.38, "learning_rate": 4.6313213266761776e-05, "loss": 0.0019, "step": 41370 }, { "epoch": 7.38, "learning_rate": 4.631232168330956e-05, "loss": 0.0042, "step": 41380 }, { "epoch": 7.38, "learning_rate": 4.631143009985735e-05, "loss": 0.0046, "step": 41390 }, { "epoch": 7.38, "learning_rate": 4.6310538516405136e-05, "loss": 0.0064, "step": 41400 }, { "epoch": 7.38, "learning_rate": 4.630964693295293e-05, "loss": 0.0041, "step": 41410 }, { "epoch": 7.39, "learning_rate": 4.630875534950072e-05, "loss": 0.0047, "step": 41420 }, { "epoch": 7.39, "learning_rate": 4.63078637660485e-05, "loss": 0.0028, "step": 41430 }, { "epoch": 7.39, "learning_rate": 4.6306972182596294e-05, "loss": 0.0091, "step": 41440 }, { "epoch": 7.39, "learning_rate": 4.630608059914408e-05, "loss": 0.0029, "step": 41450 }, { "epoch": 7.39, "learning_rate": 4.630518901569187e-05, "loss": 0.0064, "step": 41460 }, { "epoch": 7.39, "learning_rate": 4.6304297432239654e-05, "loss": 0.0044, "step": 41470 }, { "epoch": 7.4, "learning_rate": 4.630340584878745e-05, "loss": 0.0041, "step": 41480 }, { "epoch": 7.4, "learning_rate": 4.630251426533524e-05, "loss": 0.009, "step": 41490 }, { "epoch": 7.4, "learning_rate": 4.630162268188303e-05, "loss": 0.0064, "step": 41500 }, { "epoch": 7.4, "learning_rate": 4.630073109843082e-05, "loss": 0.0063, "step": 41510 }, { "epoch": 7.4, "learning_rate": 4.6299839514978604e-05, "loss": 0.0064, "step": 41520 }, { "epoch": 7.41, "learning_rate": 4.6298947931526395e-05, "loss": 0.0054, "step": 41530 }, { "epoch": 7.41, "learning_rate": 4.629805634807418e-05, "loss": 0.0048, "step": 41540 }, { "epoch": 7.41, "learning_rate": 4.629716476462197e-05, "loss": 0.0049, "step": 41550 }, { "epoch": 7.41, "learning_rate": 4.6296273181169755e-05, "loss": 0.0044, "step": 41560 }, { "epoch": 7.41, "learning_rate": 4.6295381597717546e-05, "loss": 0.0037, "step": 41570 }, { "epoch": 7.41, "learning_rate": 4.629449001426534e-05, "loss": 0.0071, "step": 41580 }, { "epoch": 7.42, "learning_rate": 4.629359843081313e-05, "loss": 0.006, "step": 41590 }, { "epoch": 7.42, "learning_rate": 4.629270684736092e-05, "loss": 0.0058, "step": 41600 }, { "epoch": 7.42, "learning_rate": 4.6291815263908704e-05, "loss": 0.003, "step": 41610 }, { "epoch": 7.42, "learning_rate": 4.6290923680456495e-05, "loss": 0.004, "step": 41620 }, { "epoch": 7.42, "learning_rate": 4.629003209700428e-05, "loss": 0.0035, "step": 41630 }, { "epoch": 7.43, "learning_rate": 4.628914051355207e-05, "loss": 0.0085, "step": 41640 }, { "epoch": 7.43, "learning_rate": 4.628824893009986e-05, "loss": 0.0067, "step": 41650 }, { "epoch": 7.43, "learning_rate": 4.6287357346647647e-05, "loss": 0.0028, "step": 41660 }, { "epoch": 7.43, "learning_rate": 4.628646576319544e-05, "loss": 0.0037, "step": 41670 }, { "epoch": 7.43, "learning_rate": 4.628557417974322e-05, "loss": 0.0025, "step": 41680 }, { "epoch": 7.43, "learning_rate": 4.6284682596291013e-05, "loss": 0.0059, "step": 41690 }, { "epoch": 7.44, "learning_rate": 4.6283791012838805e-05, "loss": 0.0084, "step": 41700 }, { "epoch": 7.44, "learning_rate": 4.6282899429386596e-05, "loss": 0.0057, "step": 41710 }, { "epoch": 7.44, "learning_rate": 4.628200784593438e-05, "loss": 0.0054, "step": 41720 }, { "epoch": 7.44, "learning_rate": 4.628111626248217e-05, "loss": 0.0056, "step": 41730 }, { "epoch": 7.44, "learning_rate": 4.628022467902996e-05, "loss": 0.0056, "step": 41740 }, { "epoch": 7.44, "learning_rate": 4.627933309557775e-05, "loss": 0.0073, "step": 41750 }, { "epoch": 7.45, "learning_rate": 4.627844151212554e-05, "loss": 0.0053, "step": 41760 }, { "epoch": 7.45, "learning_rate": 4.627754992867332e-05, "loss": 0.0072, "step": 41770 }, { "epoch": 7.45, "learning_rate": 4.6276658345221114e-05, "loss": 0.0065, "step": 41780 }, { "epoch": 7.45, "learning_rate": 4.62757667617689e-05, "loss": 0.0039, "step": 41790 }, { "epoch": 7.45, "learning_rate": 4.627487517831669e-05, "loss": 0.0032, "step": 41800 }, { "epoch": 7.46, "learning_rate": 4.627398359486448e-05, "loss": 0.0034, "step": 41810 }, { "epoch": 7.46, "learning_rate": 4.627309201141227e-05, "loss": 0.0048, "step": 41820 }, { "epoch": 7.46, "learning_rate": 4.627220042796006e-05, "loss": 0.0051, "step": 41830 }, { "epoch": 7.46, "learning_rate": 4.627130884450785e-05, "loss": 0.0047, "step": 41840 }, { "epoch": 7.46, "learning_rate": 4.627041726105564e-05, "loss": 0.0043, "step": 41850 }, { "epoch": 7.46, "learning_rate": 4.626952567760342e-05, "loss": 0.0073, "step": 41860 }, { "epoch": 7.47, "learning_rate": 4.6268634094151215e-05, "loss": 0.0026, "step": 41870 }, { "epoch": 7.47, "learning_rate": 4.6267742510699006e-05, "loss": 0.0063, "step": 41880 }, { "epoch": 7.47, "learning_rate": 4.626685092724679e-05, "loss": 0.0048, "step": 41890 }, { "epoch": 7.47, "learning_rate": 4.626595934379458e-05, "loss": 0.0066, "step": 41900 }, { "epoch": 7.47, "learning_rate": 4.6265067760342366e-05, "loss": 0.0049, "step": 41910 }, { "epoch": 7.48, "learning_rate": 4.6264176176890164e-05, "loss": 0.0078, "step": 41920 }, { "epoch": 7.48, "learning_rate": 4.626328459343795e-05, "loss": 0.0037, "step": 41930 }, { "epoch": 7.48, "learning_rate": 4.626239300998574e-05, "loss": 0.0072, "step": 41940 }, { "epoch": 7.48, "learning_rate": 4.6261501426533524e-05, "loss": 0.0039, "step": 41950 }, { "epoch": 7.48, "learning_rate": 4.6260609843081315e-05, "loss": 0.0064, "step": 41960 }, { "epoch": 7.48, "learning_rate": 4.6259718259629106e-05, "loss": 0.0053, "step": 41970 }, { "epoch": 7.49, "learning_rate": 4.625882667617689e-05, "loss": 0.0052, "step": 41980 }, { "epoch": 7.49, "learning_rate": 4.625793509272468e-05, "loss": 0.0034, "step": 41990 }, { "epoch": 7.49, "learning_rate": 4.6257043509272466e-05, "loss": 0.0057, "step": 42000 }, { "epoch": 7.49, "learning_rate": 4.625615192582026e-05, "loss": 0.0062, "step": 42010 }, { "epoch": 7.49, "learning_rate": 4.625526034236804e-05, "loss": 0.0067, "step": 42020 }, { "epoch": 7.49, "learning_rate": 4.625436875891584e-05, "loss": 0.0094, "step": 42030 }, { "epoch": 7.5, "learning_rate": 4.6253477175463624e-05, "loss": 0.0042, "step": 42040 }, { "epoch": 7.5, "learning_rate": 4.6252585592011416e-05, "loss": 0.0041, "step": 42050 }, { "epoch": 7.5, "learning_rate": 4.625169400855921e-05, "loss": 0.0045, "step": 42060 }, { "epoch": 7.5, "learning_rate": 4.625080242510699e-05, "loss": 0.0063, "step": 42070 }, { "epoch": 7.5, "learning_rate": 4.624991084165478e-05, "loss": 0.0068, "step": 42080 }, { "epoch": 7.51, "learning_rate": 4.624901925820257e-05, "loss": 0.0059, "step": 42090 }, { "epoch": 7.51, "learning_rate": 4.624812767475036e-05, "loss": 0.0025, "step": 42100 }, { "epoch": 7.51, "learning_rate": 4.624723609129815e-05, "loss": 0.0048, "step": 42110 }, { "epoch": 7.51, "learning_rate": 4.6246344507845934e-05, "loss": 0.0081, "step": 42120 }, { "epoch": 7.51, "learning_rate": 4.6245452924393725e-05, "loss": 0.005, "step": 42130 }, { "epoch": 7.51, "learning_rate": 4.6244561340941516e-05, "loss": 0.0046, "step": 42140 }, { "epoch": 7.52, "learning_rate": 4.624366975748931e-05, "loss": 0.0057, "step": 42150 }, { "epoch": 7.52, "learning_rate": 4.624277817403709e-05, "loss": 0.0036, "step": 42160 }, { "epoch": 7.52, "learning_rate": 4.624188659058488e-05, "loss": 0.0048, "step": 42170 }, { "epoch": 7.52, "learning_rate": 4.624099500713267e-05, "loss": 0.0034, "step": 42180 }, { "epoch": 7.52, "learning_rate": 4.624010342368046e-05, "loss": 0.005, "step": 42190 }, { "epoch": 7.52, "learning_rate": 4.623921184022825e-05, "loss": 0.0054, "step": 42200 }, { "epoch": 7.53, "learning_rate": 4.6238320256776034e-05, "loss": 0.0041, "step": 42210 }, { "epoch": 7.53, "learning_rate": 4.6237428673323826e-05, "loss": 0.005, "step": 42220 }, { "epoch": 7.53, "learning_rate": 4.623653708987161e-05, "loss": 0.0027, "step": 42230 }, { "epoch": 7.53, "learning_rate": 4.62356455064194e-05, "loss": 0.005, "step": 42240 }, { "epoch": 7.53, "learning_rate": 4.623475392296719e-05, "loss": 0.0062, "step": 42250 }, { "epoch": 7.54, "learning_rate": 4.6233862339514984e-05, "loss": 0.0058, "step": 42260 }, { "epoch": 7.54, "learning_rate": 4.623297075606277e-05, "loss": 0.0058, "step": 42270 }, { "epoch": 7.54, "learning_rate": 4.623207917261056e-05, "loss": 0.0045, "step": 42280 }, { "epoch": 7.54, "learning_rate": 4.623118758915835e-05, "loss": 0.0043, "step": 42290 }, { "epoch": 7.54, "learning_rate": 4.6230296005706135e-05, "loss": 0.0045, "step": 42300 }, { "epoch": 7.54, "learning_rate": 4.6229404422253926e-05, "loss": 0.0051, "step": 42310 }, { "epoch": 7.55, "learning_rate": 4.622851283880171e-05, "loss": 0.0038, "step": 42320 }, { "epoch": 7.55, "learning_rate": 4.62276212553495e-05, "loss": 0.008, "step": 42330 }, { "epoch": 7.55, "learning_rate": 4.622672967189729e-05, "loss": 0.0086, "step": 42340 }, { "epoch": 7.55, "learning_rate": 4.622583808844508e-05, "loss": 0.0042, "step": 42350 }, { "epoch": 7.55, "learning_rate": 4.6224946504992875e-05, "loss": 0.0032, "step": 42360 }, { "epoch": 7.56, "learning_rate": 4.622405492154066e-05, "loss": 0.0072, "step": 42370 }, { "epoch": 7.56, "learning_rate": 4.622316333808845e-05, "loss": 0.006, "step": 42380 }, { "epoch": 7.56, "learning_rate": 4.6222271754636235e-05, "loss": 0.0081, "step": 42390 }, { "epoch": 7.56, "learning_rate": 4.622138017118403e-05, "loss": 0.0029, "step": 42400 }, { "epoch": 7.56, "learning_rate": 4.622048858773181e-05, "loss": 0.005, "step": 42410 }, { "epoch": 7.56, "learning_rate": 4.6219686162624826e-05, "loss": 0.007, "step": 42420 }, { "epoch": 7.57, "learning_rate": 4.621879457917262e-05, "loss": 0.0035, "step": 42430 }, { "epoch": 7.57, "learning_rate": 4.62179029957204e-05, "loss": 0.004, "step": 42440 }, { "epoch": 7.57, "learning_rate": 4.621701141226819e-05, "loss": 0.0048, "step": 42450 }, { "epoch": 7.57, "learning_rate": 4.621611982881598e-05, "loss": 0.0052, "step": 42460 }, { "epoch": 7.57, "learning_rate": 4.621522824536377e-05, "loss": 0.0074, "step": 42470 }, { "epoch": 7.57, "learning_rate": 4.621433666191155e-05, "loss": 0.0026, "step": 42480 }, { "epoch": 7.58, "learning_rate": 4.6213445078459344e-05, "loss": 0.0057, "step": 42490 }, { "epoch": 7.58, "learning_rate": 4.6212553495007135e-05, "loss": 0.0039, "step": 42500 }, { "epoch": 7.58, "learning_rate": 4.6211661911554926e-05, "loss": 0.0079, "step": 42510 }, { "epoch": 7.58, "learning_rate": 4.621077032810272e-05, "loss": 0.008, "step": 42520 }, { "epoch": 7.58, "learning_rate": 4.62098787446505e-05, "loss": 0.0048, "step": 42530 }, { "epoch": 7.59, "learning_rate": 4.620898716119829e-05, "loss": 0.0053, "step": 42540 }, { "epoch": 7.59, "learning_rate": 4.620809557774608e-05, "loss": 0.0079, "step": 42550 }, { "epoch": 7.59, "learning_rate": 4.620720399429387e-05, "loss": 0.004, "step": 42560 }, { "epoch": 7.59, "learning_rate": 4.6206312410841653e-05, "loss": 0.0059, "step": 42570 }, { "epoch": 7.59, "learning_rate": 4.6205420827389445e-05, "loss": 0.0032, "step": 42580 }, { "epoch": 7.59, "learning_rate": 4.6204529243937236e-05, "loss": 0.0054, "step": 42590 }, { "epoch": 7.6, "learning_rate": 4.620363766048502e-05, "loss": 0.0044, "step": 42600 }, { "epoch": 7.6, "learning_rate": 4.620274607703281e-05, "loss": 0.0096, "step": 42610 }, { "epoch": 7.6, "learning_rate": 4.6201854493580596e-05, "loss": 0.006, "step": 42620 }, { "epoch": 7.6, "learning_rate": 4.6200962910128394e-05, "loss": 0.0056, "step": 42630 }, { "epoch": 7.6, "learning_rate": 4.620007132667618e-05, "loss": 0.0062, "step": 42640 }, { "epoch": 7.61, "learning_rate": 4.619917974322397e-05, "loss": 0.0048, "step": 42650 }, { "epoch": 7.61, "learning_rate": 4.619828815977176e-05, "loss": 0.0056, "step": 42660 }, { "epoch": 7.61, "learning_rate": 4.6197396576319545e-05, "loss": 0.0093, "step": 42670 }, { "epoch": 7.61, "learning_rate": 4.6196504992867336e-05, "loss": 0.0073, "step": 42680 }, { "epoch": 7.61, "learning_rate": 4.619561340941512e-05, "loss": 0.0064, "step": 42690 }, { "epoch": 7.61, "learning_rate": 4.619472182596291e-05, "loss": 0.003, "step": 42700 }, { "epoch": 7.62, "learning_rate": 4.6193830242510696e-05, "loss": 0.0069, "step": 42710 }, { "epoch": 7.62, "learning_rate": 4.619293865905849e-05, "loss": 0.0044, "step": 42720 }, { "epoch": 7.62, "learning_rate": 4.619204707560628e-05, "loss": 0.0053, "step": 42730 }, { "epoch": 7.62, "learning_rate": 4.619115549215407e-05, "loss": 0.0065, "step": 42740 }, { "epoch": 7.62, "learning_rate": 4.619026390870186e-05, "loss": 0.0064, "step": 42750 }, { "epoch": 7.62, "learning_rate": 4.6189372325249646e-05, "loss": 0.003, "step": 42760 }, { "epoch": 7.63, "learning_rate": 4.618848074179744e-05, "loss": 0.0038, "step": 42770 }, { "epoch": 7.63, "learning_rate": 4.618758915834522e-05, "loss": 0.007, "step": 42780 }, { "epoch": 7.63, "learning_rate": 4.618669757489301e-05, "loss": 0.0057, "step": 42790 }, { "epoch": 7.63, "learning_rate": 4.61858059914408e-05, "loss": 0.0032, "step": 42800 }, { "epoch": 7.63, "learning_rate": 4.618491440798859e-05, "loss": 0.0033, "step": 42810 }, { "epoch": 7.64, "learning_rate": 4.618402282453638e-05, "loss": 0.0033, "step": 42820 }, { "epoch": 7.64, "learning_rate": 4.6183131241084164e-05, "loss": 0.0082, "step": 42830 }, { "epoch": 7.64, "learning_rate": 4.6182239657631955e-05, "loss": 0.0054, "step": 42840 }, { "epoch": 7.64, "learning_rate": 4.6181348074179746e-05, "loss": 0.0052, "step": 42850 }, { "epoch": 7.64, "learning_rate": 4.618045649072754e-05, "loss": 0.0043, "step": 42860 }, { "epoch": 7.64, "learning_rate": 4.617956490727532e-05, "loss": 0.0058, "step": 42870 }, { "epoch": 7.65, "learning_rate": 4.617867332382311e-05, "loss": 0.0064, "step": 42880 }, { "epoch": 7.65, "learning_rate": 4.6177781740370904e-05, "loss": 0.0058, "step": 42890 }, { "epoch": 7.65, "learning_rate": 4.617689015691869e-05, "loss": 0.0049, "step": 42900 }, { "epoch": 7.65, "learning_rate": 4.617599857346648e-05, "loss": 0.0063, "step": 42910 }, { "epoch": 7.65, "learning_rate": 4.6175106990014264e-05, "loss": 0.0051, "step": 42920 }, { "epoch": 7.66, "learning_rate": 4.6174215406562056e-05, "loss": 0.0076, "step": 42930 }, { "epoch": 7.66, "learning_rate": 4.617332382310984e-05, "loss": 0.0051, "step": 42940 }, { "epoch": 7.66, "learning_rate": 4.617243223965763e-05, "loss": 0.0061, "step": 42950 }, { "epoch": 7.66, "learning_rate": 4.617154065620542e-05, "loss": 0.0067, "step": 42960 }, { "epoch": 7.66, "learning_rate": 4.6170649072753214e-05, "loss": 0.0053, "step": 42970 }, { "epoch": 7.66, "learning_rate": 4.6169757489301005e-05, "loss": 0.0042, "step": 42980 }, { "epoch": 7.67, "learning_rate": 4.616886590584879e-05, "loss": 0.0044, "step": 42990 }, { "epoch": 7.67, "learning_rate": 4.616797432239658e-05, "loss": 0.0049, "step": 43000 }, { "epoch": 7.67, "learning_rate": 4.6167082738944365e-05, "loss": 0.0037, "step": 43010 }, { "epoch": 7.67, "learning_rate": 4.6166191155492156e-05, "loss": 0.0063, "step": 43020 }, { "epoch": 7.67, "learning_rate": 4.616529957203994e-05, "loss": 0.0046, "step": 43030 }, { "epoch": 7.67, "learning_rate": 4.616440798858773e-05, "loss": 0.0047, "step": 43040 }, { "epoch": 7.68, "learning_rate": 4.616351640513552e-05, "loss": 0.0051, "step": 43050 }, { "epoch": 7.68, "learning_rate": 4.616262482168331e-05, "loss": 0.0054, "step": 43060 }, { "epoch": 7.68, "learning_rate": 4.6161733238231106e-05, "loss": 0.006, "step": 43070 }, { "epoch": 7.68, "learning_rate": 4.616084165477889e-05, "loss": 0.0028, "step": 43080 }, { "epoch": 7.68, "learning_rate": 4.615995007132668e-05, "loss": 0.0051, "step": 43090 }, { "epoch": 7.69, "learning_rate": 4.6159058487874466e-05, "loss": 0.0038, "step": 43100 }, { "epoch": 7.69, "learning_rate": 4.615816690442226e-05, "loss": 0.0045, "step": 43110 }, { "epoch": 7.69, "learning_rate": 4.615727532097005e-05, "loss": 0.0082, "step": 43120 }, { "epoch": 7.69, "learning_rate": 4.615638373751783e-05, "loss": 0.0061, "step": 43130 }, { "epoch": 7.69, "learning_rate": 4.6155492154065624e-05, "loss": 0.0029, "step": 43140 }, { "epoch": 7.69, "learning_rate": 4.615460057061341e-05, "loss": 0.0042, "step": 43150 }, { "epoch": 7.7, "learning_rate": 4.61537089871612e-05, "loss": 0.0048, "step": 43160 }, { "epoch": 7.7, "learning_rate": 4.6152817403708984e-05, "loss": 0.0047, "step": 43170 }, { "epoch": 7.7, "learning_rate": 4.615192582025678e-05, "loss": 0.0052, "step": 43180 }, { "epoch": 7.7, "learning_rate": 4.6151034236804566e-05, "loss": 0.0047, "step": 43190 }, { "epoch": 7.7, "learning_rate": 4.615014265335236e-05, "loss": 0.0069, "step": 43200 }, { "epoch": 7.71, "learning_rate": 4.614925106990015e-05, "loss": 0.0048, "step": 43210 }, { "epoch": 7.71, "learning_rate": 4.614835948644793e-05, "loss": 0.0072, "step": 43220 }, { "epoch": 7.71, "learning_rate": 4.6147467902995724e-05, "loss": 0.0062, "step": 43230 }, { "epoch": 7.71, "learning_rate": 4.614657631954351e-05, "loss": 0.0062, "step": 43240 }, { "epoch": 7.71, "learning_rate": 4.61456847360913e-05, "loss": 0.0083, "step": 43250 }, { "epoch": 7.71, "learning_rate": 4.6144793152639084e-05, "loss": 0.0058, "step": 43260 }, { "epoch": 7.72, "learning_rate": 4.6143901569186876e-05, "loss": 0.0084, "step": 43270 }, { "epoch": 7.72, "learning_rate": 4.614300998573467e-05, "loss": 0.0055, "step": 43280 }, { "epoch": 7.72, "learning_rate": 4.614211840228246e-05, "loss": 0.0048, "step": 43290 }, { "epoch": 7.72, "learning_rate": 4.614122681883025e-05, "loss": 0.0035, "step": 43300 }, { "epoch": 7.72, "learning_rate": 4.6140335235378034e-05, "loss": 0.0031, "step": 43310 }, { "epoch": 7.72, "learning_rate": 4.6139443651925825e-05, "loss": 0.0041, "step": 43320 }, { "epoch": 7.73, "learning_rate": 4.613855206847361e-05, "loss": 0.0076, "step": 43330 }, { "epoch": 7.73, "learning_rate": 4.61376604850214e-05, "loss": 0.0054, "step": 43340 }, { "epoch": 7.73, "learning_rate": 4.613676890156919e-05, "loss": 0.0042, "step": 43350 }, { "epoch": 7.73, "learning_rate": 4.6135877318116976e-05, "loss": 0.0074, "step": 43360 }, { "epoch": 7.73, "learning_rate": 4.613498573466477e-05, "loss": 0.0046, "step": 43370 }, { "epoch": 7.74, "learning_rate": 4.613409415121255e-05, "loss": 0.0082, "step": 43380 }, { "epoch": 7.74, "learning_rate": 4.613320256776034e-05, "loss": 0.0059, "step": 43390 }, { "epoch": 7.74, "learning_rate": 4.6132310984308134e-05, "loss": 0.0045, "step": 43400 }, { "epoch": 7.74, "learning_rate": 4.6131419400855925e-05, "loss": 0.0029, "step": 43410 }, { "epoch": 7.74, "learning_rate": 4.613052781740371e-05, "loss": 0.006, "step": 43420 }, { "epoch": 7.74, "learning_rate": 4.61296362339515e-05, "loss": 0.0058, "step": 43430 }, { "epoch": 7.75, "learning_rate": 4.612874465049929e-05, "loss": 0.0088, "step": 43440 }, { "epoch": 7.75, "learning_rate": 4.612785306704708e-05, "loss": 0.0083, "step": 43450 }, { "epoch": 7.75, "learning_rate": 4.612696148359487e-05, "loss": 0.0054, "step": 43460 }, { "epoch": 7.75, "learning_rate": 4.612606990014265e-05, "loss": 0.0051, "step": 43470 }, { "epoch": 7.75, "learning_rate": 4.6125178316690443e-05, "loss": 0.0037, "step": 43480 }, { "epoch": 7.75, "learning_rate": 4.612428673323823e-05, "loss": 0.0046, "step": 43490 }, { "epoch": 7.76, "learning_rate": 4.612339514978602e-05, "loss": 0.0095, "step": 43500 }, { "epoch": 7.76, "learning_rate": 4.612250356633381e-05, "loss": 0.0045, "step": 43510 }, { "epoch": 7.76, "learning_rate": 4.61216119828816e-05, "loss": 0.0062, "step": 43520 }, { "epoch": 7.76, "learning_rate": 4.612072039942939e-05, "loss": 0.0056, "step": 43530 }, { "epoch": 7.76, "learning_rate": 4.611982881597718e-05, "loss": 0.0075, "step": 43540 }, { "epoch": 7.77, "learning_rate": 4.611893723252497e-05, "loss": 0.0056, "step": 43550 }, { "epoch": 7.77, "learning_rate": 4.611804564907275e-05, "loss": 0.0057, "step": 43560 }, { "epoch": 7.77, "learning_rate": 4.6117154065620544e-05, "loss": 0.0039, "step": 43570 }, { "epoch": 7.77, "learning_rate": 4.6116262482168335e-05, "loss": 0.0064, "step": 43580 }, { "epoch": 7.77, "learning_rate": 4.611537089871612e-05, "loss": 0.0036, "step": 43590 }, { "epoch": 7.77, "learning_rate": 4.611447931526391e-05, "loss": 0.0059, "step": 43600 }, { "epoch": 7.78, "learning_rate": 4.6113587731811695e-05, "loss": 0.0065, "step": 43610 }, { "epoch": 7.78, "learning_rate": 4.611269614835949e-05, "loss": 0.0032, "step": 43620 }, { "epoch": 7.78, "learning_rate": 4.611180456490728e-05, "loss": 0.0036, "step": 43630 }, { "epoch": 7.78, "learning_rate": 4.611091298145507e-05, "loss": 0.0047, "step": 43640 }, { "epoch": 7.78, "learning_rate": 4.6110021398002853e-05, "loss": 0.0032, "step": 43650 }, { "epoch": 7.79, "learning_rate": 4.6109129814550645e-05, "loss": 0.006, "step": 43660 }, { "epoch": 7.79, "learning_rate": 4.6108238231098436e-05, "loss": 0.0036, "step": 43670 }, { "epoch": 7.79, "learning_rate": 4.610734664764622e-05, "loss": 0.0035, "step": 43680 }, { "epoch": 7.79, "learning_rate": 4.610645506419401e-05, "loss": 0.0049, "step": 43690 }, { "epoch": 7.79, "learning_rate": 4.6105563480741796e-05, "loss": 0.0041, "step": 43700 }, { "epoch": 7.79, "learning_rate": 4.610467189728959e-05, "loss": 0.006, "step": 43710 }, { "epoch": 7.8, "learning_rate": 4.610378031383737e-05, "loss": 0.0045, "step": 43720 }, { "epoch": 7.8, "learning_rate": 4.610288873038517e-05, "loss": 0.0039, "step": 43730 }, { "epoch": 7.8, "learning_rate": 4.6101997146932954e-05, "loss": 0.005, "step": 43740 }, { "epoch": 7.8, "learning_rate": 4.6101105563480745e-05, "loss": 0.0072, "step": 43750 }, { "epoch": 7.8, "learning_rate": 4.6100213980028536e-05, "loss": 0.0039, "step": 43760 }, { "epoch": 7.8, "learning_rate": 4.609932239657632e-05, "loss": 0.0034, "step": 43770 }, { "epoch": 7.81, "learning_rate": 4.609843081312411e-05, "loss": 0.0058, "step": 43780 }, { "epoch": 7.81, "learning_rate": 4.6097539229671896e-05, "loss": 0.0033, "step": 43790 }, { "epoch": 7.81, "learning_rate": 4.609664764621969e-05, "loss": 0.0092, "step": 43800 }, { "epoch": 7.81, "learning_rate": 4.609575606276748e-05, "loss": 0.0034, "step": 43810 }, { "epoch": 7.81, "learning_rate": 4.609486447931526e-05, "loss": 0.0056, "step": 43820 }, { "epoch": 7.82, "learning_rate": 4.6093972895863055e-05, "loss": 0.0049, "step": 43830 }, { "epoch": 7.82, "learning_rate": 4.6093081312410846e-05, "loss": 0.0081, "step": 43840 }, { "epoch": 7.82, "learning_rate": 4.609218972895864e-05, "loss": 0.0046, "step": 43850 }, { "epoch": 7.82, "learning_rate": 4.609129814550642e-05, "loss": 0.0073, "step": 43860 }, { "epoch": 7.82, "learning_rate": 4.609040656205421e-05, "loss": 0.0043, "step": 43870 }, { "epoch": 7.82, "learning_rate": 4.6089514978602e-05, "loss": 0.0041, "step": 43880 }, { "epoch": 7.83, "learning_rate": 4.608862339514979e-05, "loss": 0.0053, "step": 43890 }, { "epoch": 7.83, "learning_rate": 4.608773181169758e-05, "loss": 0.0063, "step": 43900 }, { "epoch": 7.83, "learning_rate": 4.6086840228245364e-05, "loss": 0.0037, "step": 43910 }, { "epoch": 7.83, "learning_rate": 4.6085948644793155e-05, "loss": 0.0047, "step": 43920 }, { "epoch": 7.83, "learning_rate": 4.608505706134094e-05, "loss": 0.003, "step": 43930 }, { "epoch": 7.84, "learning_rate": 4.608416547788873e-05, "loss": 0.0059, "step": 43940 }, { "epoch": 7.84, "learning_rate": 4.608327389443652e-05, "loss": 0.0048, "step": 43950 }, { "epoch": 7.84, "learning_rate": 4.608238231098431e-05, "loss": 0.0086, "step": 43960 }, { "epoch": 7.84, "learning_rate": 4.60814907275321e-05, "loss": 0.0094, "step": 43970 }, { "epoch": 7.84, "learning_rate": 4.608059914407989e-05, "loss": 0.0072, "step": 43980 }, { "epoch": 7.84, "learning_rate": 4.607970756062768e-05, "loss": 0.0061, "step": 43990 }, { "epoch": 7.85, "learning_rate": 4.6078815977175464e-05, "loss": 0.0047, "step": 44000 }, { "epoch": 7.85, "learning_rate": 4.6077924393723256e-05, "loss": 0.0047, "step": 44010 }, { "epoch": 7.85, "learning_rate": 4.607703281027104e-05, "loss": 0.0057, "step": 44020 }, { "epoch": 7.85, "learning_rate": 4.607614122681883e-05, "loss": 0.0027, "step": 44030 }, { "epoch": 7.85, "learning_rate": 4.607524964336662e-05, "loss": 0.0051, "step": 44040 }, { "epoch": 7.85, "learning_rate": 4.607435805991441e-05, "loss": 0.0044, "step": 44050 }, { "epoch": 7.86, "learning_rate": 4.60734664764622e-05, "loss": 0.0057, "step": 44060 }, { "epoch": 7.86, "learning_rate": 4.607257489300999e-05, "loss": 0.0063, "step": 44070 }, { "epoch": 7.86, "learning_rate": 4.607168330955778e-05, "loss": 0.0031, "step": 44080 }, { "epoch": 7.86, "learning_rate": 4.6070791726105565e-05, "loss": 0.0063, "step": 44090 }, { "epoch": 7.86, "learning_rate": 4.6069900142653356e-05, "loss": 0.0035, "step": 44100 }, { "epoch": 7.87, "learning_rate": 4.606900855920114e-05, "loss": 0.0083, "step": 44110 }, { "epoch": 7.87, "learning_rate": 4.606811697574893e-05, "loss": 0.0062, "step": 44120 }, { "epoch": 7.87, "learning_rate": 4.606722539229672e-05, "loss": 0.0068, "step": 44130 }, { "epoch": 7.87, "learning_rate": 4.606633380884451e-05, "loss": 0.0085, "step": 44140 }, { "epoch": 7.87, "learning_rate": 4.60654422253923e-05, "loss": 0.0057, "step": 44150 }, { "epoch": 7.87, "learning_rate": 4.606455064194008e-05, "loss": 0.0055, "step": 44160 }, { "epoch": 7.88, "learning_rate": 4.606365905848788e-05, "loss": 0.0036, "step": 44170 }, { "epoch": 7.88, "learning_rate": 4.6062767475035666e-05, "loss": 0.0063, "step": 44180 }, { "epoch": 7.88, "learning_rate": 4.606187589158346e-05, "loss": 0.0069, "step": 44190 }, { "epoch": 7.88, "learning_rate": 4.606098430813124e-05, "loss": 0.0057, "step": 44200 }, { "epoch": 7.88, "learning_rate": 4.606009272467903e-05, "loss": 0.005, "step": 44210 }, { "epoch": 7.89, "learning_rate": 4.6059201141226824e-05, "loss": 0.0052, "step": 44220 }, { "epoch": 7.89, "learning_rate": 4.605830955777461e-05, "loss": 0.0042, "step": 44230 }, { "epoch": 7.89, "learning_rate": 4.60574179743224e-05, "loss": 0.0035, "step": 44240 }, { "epoch": 7.89, "learning_rate": 4.6056526390870184e-05, "loss": 0.0043, "step": 44250 }, { "epoch": 7.89, "learning_rate": 4.6055634807417975e-05, "loss": 0.0033, "step": 44260 }, { "epoch": 7.89, "learning_rate": 4.6054743223965766e-05, "loss": 0.0042, "step": 44270 }, { "epoch": 7.9, "learning_rate": 4.605385164051356e-05, "loss": 0.0045, "step": 44280 }, { "epoch": 7.9, "learning_rate": 4.605296005706134e-05, "loss": 0.0047, "step": 44290 }, { "epoch": 7.9, "learning_rate": 4.605206847360913e-05, "loss": 0.0051, "step": 44300 }, { "epoch": 7.9, "learning_rate": 4.6051176890156924e-05, "loss": 0.0037, "step": 44310 }, { "epoch": 7.9, "learning_rate": 4.605028530670471e-05, "loss": 0.0086, "step": 44320 }, { "epoch": 7.9, "learning_rate": 4.60493937232525e-05, "loss": 0.0043, "step": 44330 }, { "epoch": 7.91, "learning_rate": 4.6048502139800284e-05, "loss": 0.0046, "step": 44340 }, { "epoch": 7.91, "learning_rate": 4.6047610556348075e-05, "loss": 0.0047, "step": 44350 }, { "epoch": 7.91, "learning_rate": 4.604671897289587e-05, "loss": 0.0052, "step": 44360 }, { "epoch": 7.91, "learning_rate": 4.604582738944365e-05, "loss": 0.0028, "step": 44370 }, { "epoch": 7.91, "learning_rate": 4.604493580599144e-05, "loss": 0.0048, "step": 44380 }, { "epoch": 7.92, "learning_rate": 4.6044044222539234e-05, "loss": 0.0076, "step": 44390 }, { "epoch": 7.92, "learning_rate": 4.6043152639087025e-05, "loss": 0.0051, "step": 44400 }, { "epoch": 7.92, "learning_rate": 4.604226105563481e-05, "loss": 0.0088, "step": 44410 }, { "epoch": 7.92, "learning_rate": 4.60413694721826e-05, "loss": 0.0032, "step": 44420 }, { "epoch": 7.92, "learning_rate": 4.6040477888730385e-05, "loss": 0.0031, "step": 44430 }, { "epoch": 7.92, "learning_rate": 4.6039586305278176e-05, "loss": 0.0035, "step": 44440 }, { "epoch": 7.93, "learning_rate": 4.603869472182597e-05, "loss": 0.0056, "step": 44450 }, { "epoch": 7.93, "learning_rate": 4.603780313837375e-05, "loss": 0.005, "step": 44460 }, { "epoch": 7.93, "learning_rate": 4.603691155492154e-05, "loss": 0.0054, "step": 44470 }, { "epoch": 7.93, "learning_rate": 4.603601997146933e-05, "loss": 0.0088, "step": 44480 }, { "epoch": 7.93, "learning_rate": 4.603512838801712e-05, "loss": 0.004, "step": 44490 }, { "epoch": 7.94, "learning_rate": 4.603423680456491e-05, "loss": 0.0062, "step": 44500 }, { "epoch": 7.94, "learning_rate": 4.60333452211127e-05, "loss": 0.0057, "step": 44510 }, { "epoch": 7.94, "learning_rate": 4.6032453637660485e-05, "loss": 0.0055, "step": 44520 }, { "epoch": 7.94, "learning_rate": 4.6031562054208277e-05, "loss": 0.0083, "step": 44530 }, { "epoch": 7.94, "learning_rate": 4.603067047075607e-05, "loss": 0.0047, "step": 44540 }, { "epoch": 7.94, "learning_rate": 4.602977888730385e-05, "loss": 0.0049, "step": 44550 }, { "epoch": 7.95, "learning_rate": 4.6028887303851643e-05, "loss": 0.0039, "step": 44560 }, { "epoch": 7.95, "learning_rate": 4.602799572039943e-05, "loss": 0.0032, "step": 44570 }, { "epoch": 7.95, "learning_rate": 4.602710413694722e-05, "loss": 0.0075, "step": 44580 }, { "epoch": 7.95, "learning_rate": 4.602621255349501e-05, "loss": 0.004, "step": 44590 }, { "epoch": 7.95, "learning_rate": 4.6025320970042795e-05, "loss": 0.0043, "step": 44600 }, { "epoch": 7.95, "learning_rate": 4.602442938659059e-05, "loss": 0.0052, "step": 44610 }, { "epoch": 7.96, "learning_rate": 4.602353780313838e-05, "loss": 0.0066, "step": 44620 }, { "epoch": 7.96, "learning_rate": 4.602264621968617e-05, "loss": 0.0031, "step": 44630 }, { "epoch": 7.96, "learning_rate": 4.602175463623395e-05, "loss": 0.0059, "step": 44640 }, { "epoch": 7.96, "learning_rate": 4.6020863052781744e-05, "loss": 0.0038, "step": 44650 }, { "epoch": 7.96, "learning_rate": 4.601997146932953e-05, "loss": 0.0062, "step": 44660 }, { "epoch": 7.97, "learning_rate": 4.601907988587732e-05, "loss": 0.0056, "step": 44670 }, { "epoch": 7.97, "learning_rate": 4.601818830242511e-05, "loss": 0.0042, "step": 44680 }, { "epoch": 7.97, "learning_rate": 4.6017296718972895e-05, "loss": 0.0066, "step": 44690 }, { "epoch": 7.97, "learning_rate": 4.6016405135520686e-05, "loss": 0.0029, "step": 44700 }, { "epoch": 7.97, "learning_rate": 4.601551355206847e-05, "loss": 0.0051, "step": 44710 }, { "epoch": 7.97, "learning_rate": 4.601462196861627e-05, "loss": 0.0019, "step": 44720 }, { "epoch": 7.98, "learning_rate": 4.601373038516405e-05, "loss": 0.007, "step": 44730 }, { "epoch": 7.98, "learning_rate": 4.6012838801711845e-05, "loss": 0.0073, "step": 44740 }, { "epoch": 7.98, "learning_rate": 4.601194721825963e-05, "loss": 0.0069, "step": 44750 }, { "epoch": 7.98, "learning_rate": 4.601105563480742e-05, "loss": 0.0059, "step": 44760 }, { "epoch": 7.98, "learning_rate": 4.601016405135521e-05, "loss": 0.0049, "step": 44770 }, { "epoch": 7.99, "learning_rate": 4.6009272467902996e-05, "loss": 0.0036, "step": 44780 }, { "epoch": 7.99, "learning_rate": 4.600838088445079e-05, "loss": 0.0041, "step": 44790 }, { "epoch": 7.99, "learning_rate": 4.600748930099857e-05, "loss": 0.0044, "step": 44800 }, { "epoch": 7.99, "learning_rate": 4.600659771754636e-05, "loss": 0.0031, "step": 44810 }, { "epoch": 7.99, "learning_rate": 4.6005706134094154e-05, "loss": 0.0047, "step": 44820 }, { "epoch": 7.99, "learning_rate": 4.6004814550641945e-05, "loss": 0.006, "step": 44830 }, { "epoch": 8.0, "learning_rate": 4.6003922967189736e-05, "loss": 0.0049, "step": 44840 }, { "epoch": 8.0, "learning_rate": 4.600303138373752e-05, "loss": 0.0045, "step": 44850 }, { "epoch": 8.0, "learning_rate": 4.600213980028531e-05, "loss": 0.0065, "step": 44860 }, { "epoch": 8.0, "eval_loss": 0.0144475307315588, "eval_runtime": 195.9591, "eval_samples_per_second": 23.673, "eval_steps_per_second": 2.96, "step": 44864 }, { "epoch": 8.0, "learning_rate": 4.6001248216833096e-05, "loss": 0.002, "step": 44870 }, { "epoch": 8.0, "learning_rate": 4.600035663338089e-05, "loss": 0.0058, "step": 44880 }, { "epoch": 8.0, "learning_rate": 4.599946504992867e-05, "loss": 0.0038, "step": 44890 }, { "epoch": 8.01, "learning_rate": 4.599857346647646e-05, "loss": 0.0027, "step": 44900 }, { "epoch": 8.01, "learning_rate": 4.5997681883024254e-05, "loss": 0.004, "step": 44910 }, { "epoch": 8.01, "learning_rate": 4.599679029957204e-05, "loss": 0.0034, "step": 44920 }, { "epoch": 8.01, "learning_rate": 4.599589871611983e-05, "loss": 0.0058, "step": 44930 }, { "epoch": 8.01, "learning_rate": 4.599500713266762e-05, "loss": 0.0039, "step": 44940 }, { "epoch": 8.02, "learning_rate": 4.599411554921541e-05, "loss": 0.0061, "step": 44950 }, { "epoch": 8.02, "learning_rate": 4.59932239657632e-05, "loss": 0.0041, "step": 44960 }, { "epoch": 8.02, "learning_rate": 4.599233238231099e-05, "loss": 0.0044, "step": 44970 }, { "epoch": 8.02, "learning_rate": 4.599144079885877e-05, "loss": 0.0033, "step": 44980 }, { "epoch": 8.02, "learning_rate": 4.5990549215406564e-05, "loss": 0.0061, "step": 44990 }, { "epoch": 8.02, "learning_rate": 4.5989657631954355e-05, "loss": 0.0021, "step": 45000 }, { "epoch": 8.03, "learning_rate": 4.598876604850214e-05, "loss": 0.0048, "step": 45010 }, { "epoch": 8.03, "learning_rate": 4.598787446504993e-05, "loss": 0.0042, "step": 45020 }, { "epoch": 8.03, "learning_rate": 4.5986982881597715e-05, "loss": 0.0042, "step": 45030 }, { "epoch": 8.03, "learning_rate": 4.5986091298145506e-05, "loss": 0.0062, "step": 45040 }, { "epoch": 8.03, "learning_rate": 4.59851997146933e-05, "loss": 0.0044, "step": 45050 }, { "epoch": 8.03, "learning_rate": 4.598430813124109e-05, "loss": 0.0053, "step": 45060 }, { "epoch": 8.04, "learning_rate": 4.598341654778888e-05, "loss": 0.0038, "step": 45070 }, { "epoch": 8.04, "learning_rate": 4.5982524964336664e-05, "loss": 0.0046, "step": 45080 }, { "epoch": 8.04, "learning_rate": 4.5981633380884456e-05, "loss": 0.0048, "step": 45090 }, { "epoch": 8.04, "learning_rate": 4.598074179743224e-05, "loss": 0.0022, "step": 45100 }, { "epoch": 8.04, "learning_rate": 4.597985021398003e-05, "loss": 0.0067, "step": 45110 }, { "epoch": 8.05, "learning_rate": 4.5978958630527816e-05, "loss": 0.0032, "step": 45120 }, { "epoch": 8.05, "learning_rate": 4.597806704707561e-05, "loss": 0.0043, "step": 45130 }, { "epoch": 8.05, "learning_rate": 4.59771754636234e-05, "loss": 0.0027, "step": 45140 }, { "epoch": 8.05, "learning_rate": 4.597628388017118e-05, "loss": 0.0053, "step": 45150 }, { "epoch": 8.05, "learning_rate": 4.597539229671898e-05, "loss": 0.0048, "step": 45160 }, { "epoch": 8.05, "learning_rate": 4.5974500713266765e-05, "loss": 0.0025, "step": 45170 }, { "epoch": 8.06, "learning_rate": 4.5973609129814556e-05, "loss": 0.0044, "step": 45180 }, { "epoch": 8.06, "learning_rate": 4.597271754636234e-05, "loss": 0.0066, "step": 45190 }, { "epoch": 8.06, "learning_rate": 4.597182596291013e-05, "loss": 0.004, "step": 45200 }, { "epoch": 8.06, "learning_rate": 4.5970934379457916e-05, "loss": 0.004, "step": 45210 }, { "epoch": 8.06, "learning_rate": 4.597004279600571e-05, "loss": 0.0049, "step": 45220 }, { "epoch": 8.07, "learning_rate": 4.59691512125535e-05, "loss": 0.0038, "step": 45230 }, { "epoch": 8.07, "learning_rate": 4.596825962910128e-05, "loss": 0.0036, "step": 45240 }, { "epoch": 8.07, "learning_rate": 4.5967368045649074e-05, "loss": 0.004, "step": 45250 }, { "epoch": 8.07, "learning_rate": 4.596647646219686e-05, "loss": 0.0053, "step": 45260 }, { "epoch": 8.07, "learning_rate": 4.596558487874466e-05, "loss": 0.0057, "step": 45270 }, { "epoch": 8.07, "learning_rate": 4.596469329529244e-05, "loss": 0.0072, "step": 45280 }, { "epoch": 8.08, "learning_rate": 4.596380171184023e-05, "loss": 0.0051, "step": 45290 }, { "epoch": 8.08, "learning_rate": 4.5962910128388024e-05, "loss": 0.0055, "step": 45300 }, { "epoch": 8.08, "learning_rate": 4.596201854493581e-05, "loss": 0.0058, "step": 45310 }, { "epoch": 8.08, "learning_rate": 4.59611269614836e-05, "loss": 0.0041, "step": 45320 }, { "epoch": 8.08, "learning_rate": 4.5960235378031384e-05, "loss": 0.005, "step": 45330 }, { "epoch": 8.08, "learning_rate": 4.5959343794579175e-05, "loss": 0.0077, "step": 45340 }, { "epoch": 8.09, "learning_rate": 4.595845221112696e-05, "loss": 0.0052, "step": 45350 }, { "epoch": 8.09, "learning_rate": 4.595756062767475e-05, "loss": 0.0046, "step": 45360 }, { "epoch": 8.09, "learning_rate": 4.595666904422254e-05, "loss": 0.0035, "step": 45370 }, { "epoch": 8.09, "learning_rate": 4.595577746077033e-05, "loss": 0.0036, "step": 45380 }, { "epoch": 8.09, "learning_rate": 4.5954885877318124e-05, "loss": 0.0019, "step": 45390 }, { "epoch": 8.1, "learning_rate": 4.595399429386591e-05, "loss": 0.0071, "step": 45400 }, { "epoch": 8.1, "learning_rate": 4.59531027104137e-05, "loss": 0.0056, "step": 45410 }, { "epoch": 8.1, "learning_rate": 4.5952211126961484e-05, "loss": 0.0065, "step": 45420 }, { "epoch": 8.1, "learning_rate": 4.5951319543509275e-05, "loss": 0.0043, "step": 45430 }, { "epoch": 8.1, "learning_rate": 4.595042796005706e-05, "loss": 0.0061, "step": 45440 }, { "epoch": 8.1, "learning_rate": 4.594953637660485e-05, "loss": 0.0068, "step": 45450 }, { "epoch": 8.11, "learning_rate": 4.594864479315264e-05, "loss": 0.0072, "step": 45460 }, { "epoch": 8.11, "learning_rate": 4.594775320970043e-05, "loss": 0.0037, "step": 45470 }, { "epoch": 8.11, "learning_rate": 4.594686162624822e-05, "loss": 0.0032, "step": 45480 }, { "epoch": 8.11, "learning_rate": 4.594597004279601e-05, "loss": 0.005, "step": 45490 }, { "epoch": 8.11, "learning_rate": 4.59450784593438e-05, "loss": 0.0047, "step": 45500 }, { "epoch": 8.12, "learning_rate": 4.5944186875891585e-05, "loss": 0.0044, "step": 45510 }, { "epoch": 8.12, "learning_rate": 4.5943295292439376e-05, "loss": 0.0054, "step": 45520 }, { "epoch": 8.12, "learning_rate": 4.594240370898717e-05, "loss": 0.0055, "step": 45530 }, { "epoch": 8.12, "learning_rate": 4.594151212553495e-05, "loss": 0.0038, "step": 45540 }, { "epoch": 8.12, "learning_rate": 4.594062054208274e-05, "loss": 0.0044, "step": 45550 }, { "epoch": 8.12, "learning_rate": 4.593972895863053e-05, "loss": 0.0051, "step": 45560 }, { "epoch": 8.13, "learning_rate": 4.593883737517832e-05, "loss": 0.0063, "step": 45570 }, { "epoch": 8.13, "learning_rate": 4.59379457917261e-05, "loss": 0.0035, "step": 45580 }, { "epoch": 8.13, "learning_rate": 4.5937054208273894e-05, "loss": 0.0036, "step": 45590 }, { "epoch": 8.13, "learning_rate": 4.5936162624821685e-05, "loss": 0.0066, "step": 45600 }, { "epoch": 8.13, "learning_rate": 4.5935271041369477e-05, "loss": 0.0036, "step": 45610 }, { "epoch": 8.13, "learning_rate": 4.593437945791727e-05, "loss": 0.006, "step": 45620 }, { "epoch": 8.14, "learning_rate": 4.593348787446505e-05, "loss": 0.0039, "step": 45630 }, { "epoch": 8.14, "learning_rate": 4.5932596291012843e-05, "loss": 0.0032, "step": 45640 }, { "epoch": 8.14, "learning_rate": 4.593170470756063e-05, "loss": 0.0064, "step": 45650 }, { "epoch": 8.14, "learning_rate": 4.593081312410842e-05, "loss": 0.0041, "step": 45660 }, { "epoch": 8.14, "learning_rate": 4.5929921540656203e-05, "loss": 0.0037, "step": 45670 }, { "epoch": 8.15, "learning_rate": 4.5929029957203995e-05, "loss": 0.005, "step": 45680 }, { "epoch": 8.15, "learning_rate": 4.5928138373751786e-05, "loss": 0.0033, "step": 45690 }, { "epoch": 8.15, "learning_rate": 4.592724679029957e-05, "loss": 0.003, "step": 45700 }, { "epoch": 8.15, "learning_rate": 4.592635520684737e-05, "loss": 0.0036, "step": 45710 }, { "epoch": 8.15, "learning_rate": 4.592546362339515e-05, "loss": 0.0047, "step": 45720 }, { "epoch": 8.15, "learning_rate": 4.5924572039942944e-05, "loss": 0.0045, "step": 45730 }, { "epoch": 8.16, "learning_rate": 4.592368045649073e-05, "loss": 0.0024, "step": 45740 }, { "epoch": 8.16, "learning_rate": 4.592278887303852e-05, "loss": 0.0028, "step": 45750 }, { "epoch": 8.16, "learning_rate": 4.592189728958631e-05, "loss": 0.0049, "step": 45760 }, { "epoch": 8.16, "learning_rate": 4.5921005706134095e-05, "loss": 0.0068, "step": 45770 }, { "epoch": 8.16, "learning_rate": 4.592020328102711e-05, "loss": 0.0056, "step": 45780 }, { "epoch": 8.17, "learning_rate": 4.5919311697574894e-05, "loss": 0.0036, "step": 45790 }, { "epoch": 8.17, "learning_rate": 4.5918420114122686e-05, "loss": 0.0072, "step": 45800 }, { "epoch": 8.17, "learning_rate": 4.591752853067047e-05, "loss": 0.008, "step": 45810 }, { "epoch": 8.17, "learning_rate": 4.591663694721826e-05, "loss": 0.005, "step": 45820 }, { "epoch": 8.17, "learning_rate": 4.591574536376605e-05, "loss": 0.0058, "step": 45830 }, { "epoch": 8.17, "learning_rate": 4.591485378031384e-05, "loss": 0.0039, "step": 45840 }, { "epoch": 8.18, "learning_rate": 4.591396219686163e-05, "loss": 0.0085, "step": 45850 }, { "epoch": 8.18, "learning_rate": 4.591307061340941e-05, "loss": 0.0046, "step": 45860 }, { "epoch": 8.18, "learning_rate": 4.591217902995721e-05, "loss": 0.004, "step": 45870 }, { "epoch": 8.18, "learning_rate": 4.5911287446504995e-05, "loss": 0.0047, "step": 45880 }, { "epoch": 8.18, "learning_rate": 4.5910395863052786e-05, "loss": 0.0039, "step": 45890 }, { "epoch": 8.18, "learning_rate": 4.590950427960057e-05, "loss": 0.0028, "step": 45900 }, { "epoch": 8.19, "learning_rate": 4.590861269614836e-05, "loss": 0.0038, "step": 45910 }, { "epoch": 8.19, "learning_rate": 4.590772111269615e-05, "loss": 0.0054, "step": 45920 }, { "epoch": 8.19, "learning_rate": 4.590682952924394e-05, "loss": 0.0079, "step": 45930 }, { "epoch": 8.19, "learning_rate": 4.590593794579173e-05, "loss": 0.0081, "step": 45940 }, { "epoch": 8.19, "learning_rate": 4.590504636233951e-05, "loss": 0.0038, "step": 45950 }, { "epoch": 8.2, "learning_rate": 4.5904154778887304e-05, "loss": 0.0076, "step": 45960 }, { "epoch": 8.2, "learning_rate": 4.5903263195435096e-05, "loss": 0.0047, "step": 45970 }, { "epoch": 8.2, "learning_rate": 4.590237161198289e-05, "loss": 0.0048, "step": 45980 }, { "epoch": 8.2, "learning_rate": 4.590148002853067e-05, "loss": 0.0047, "step": 45990 }, { "epoch": 8.2, "learning_rate": 4.590058844507846e-05, "loss": 0.0087, "step": 46000 }, { "epoch": 8.2, "learning_rate": 4.5899696861626254e-05, "loss": 0.0034, "step": 46010 }, { "epoch": 8.21, "learning_rate": 4.589880527817404e-05, "loss": 0.0051, "step": 46020 }, { "epoch": 8.21, "learning_rate": 4.589791369472183e-05, "loss": 0.0051, "step": 46030 }, { "epoch": 8.21, "learning_rate": 4.5897022111269614e-05, "loss": 0.0082, "step": 46040 }, { "epoch": 8.21, "learning_rate": 4.5896130527817405e-05, "loss": 0.0041, "step": 46050 }, { "epoch": 8.21, "learning_rate": 4.5895238944365196e-05, "loss": 0.0035, "step": 46060 }, { "epoch": 8.22, "learning_rate": 4.589434736091298e-05, "loss": 0.0034, "step": 46070 }, { "epoch": 8.22, "learning_rate": 4.589345577746077e-05, "loss": 0.0049, "step": 46080 }, { "epoch": 8.22, "learning_rate": 4.589256419400856e-05, "loss": 0.0028, "step": 46090 }, { "epoch": 8.22, "learning_rate": 4.5891672610556354e-05, "loss": 0.003, "step": 46100 }, { "epoch": 8.22, "learning_rate": 4.589078102710414e-05, "loss": 0.0051, "step": 46110 }, { "epoch": 8.22, "learning_rate": 4.588988944365193e-05, "loss": 0.0045, "step": 46120 }, { "epoch": 8.23, "learning_rate": 4.5888997860199714e-05, "loss": 0.0106, "step": 46130 }, { "epoch": 8.23, "learning_rate": 4.5888106276747506e-05, "loss": 0.005, "step": 46140 }, { "epoch": 8.23, "learning_rate": 4.58872146932953e-05, "loss": 0.0071, "step": 46150 }, { "epoch": 8.23, "learning_rate": 4.588632310984308e-05, "loss": 0.0061, "step": 46160 }, { "epoch": 8.23, "learning_rate": 4.588543152639087e-05, "loss": 0.005, "step": 46170 }, { "epoch": 8.23, "learning_rate": 4.588453994293866e-05, "loss": 0.0062, "step": 46180 }, { "epoch": 8.24, "learning_rate": 4.588364835948645e-05, "loss": 0.0038, "step": 46190 }, { "epoch": 8.24, "learning_rate": 4.588275677603424e-05, "loss": 0.0043, "step": 46200 }, { "epoch": 8.24, "learning_rate": 4.588186519258203e-05, "loss": 0.0029, "step": 46210 }, { "epoch": 8.24, "learning_rate": 4.5880973609129815e-05, "loss": 0.0063, "step": 46220 }, { "epoch": 8.24, "learning_rate": 4.5880082025677606e-05, "loss": 0.0028, "step": 46230 }, { "epoch": 8.25, "learning_rate": 4.58791904422254e-05, "loss": 0.0038, "step": 46240 }, { "epoch": 8.25, "learning_rate": 4.587829885877318e-05, "loss": 0.0039, "step": 46250 }, { "epoch": 8.25, "learning_rate": 4.587740727532097e-05, "loss": 0.0027, "step": 46260 }, { "epoch": 8.25, "learning_rate": 4.587651569186876e-05, "loss": 0.0026, "step": 46270 }, { "epoch": 8.25, "learning_rate": 4.587562410841655e-05, "loss": 0.0047, "step": 46280 }, { "epoch": 8.25, "learning_rate": 4.587473252496434e-05, "loss": 0.0038, "step": 46290 }, { "epoch": 8.26, "learning_rate": 4.5873840941512124e-05, "loss": 0.0051, "step": 46300 }, { "epoch": 8.26, "learning_rate": 4.587294935805992e-05, "loss": 0.0049, "step": 46310 }, { "epoch": 8.26, "learning_rate": 4.587205777460771e-05, "loss": 0.0036, "step": 46320 }, { "epoch": 8.26, "learning_rate": 4.58711661911555e-05, "loss": 0.0058, "step": 46330 }, { "epoch": 8.26, "learning_rate": 4.587027460770328e-05, "loss": 0.0038, "step": 46340 }, { "epoch": 8.26, "learning_rate": 4.5869383024251073e-05, "loss": 0.003, "step": 46350 }, { "epoch": 8.27, "learning_rate": 4.586849144079886e-05, "loss": 0.0034, "step": 46360 }, { "epoch": 8.27, "learning_rate": 4.586759985734665e-05, "loss": 0.0081, "step": 46370 }, { "epoch": 8.27, "learning_rate": 4.586670827389444e-05, "loss": 0.0038, "step": 46380 }, { "epoch": 8.27, "learning_rate": 4.5865816690442225e-05, "loss": 0.0074, "step": 46390 }, { "epoch": 8.27, "learning_rate": 4.5864925106990016e-05, "loss": 0.0023, "step": 46400 }, { "epoch": 8.28, "learning_rate": 4.58640335235378e-05, "loss": 0.0041, "step": 46410 }, { "epoch": 8.28, "learning_rate": 4.58631419400856e-05, "loss": 0.0043, "step": 46420 }, { "epoch": 8.28, "learning_rate": 4.586225035663338e-05, "loss": 0.0034, "step": 46430 }, { "epoch": 8.28, "learning_rate": 4.5861358773181174e-05, "loss": 0.0047, "step": 46440 }, { "epoch": 8.28, "learning_rate": 4.586046718972896e-05, "loss": 0.0031, "step": 46450 }, { "epoch": 8.28, "learning_rate": 4.585957560627675e-05, "loss": 0.0047, "step": 46460 }, { "epoch": 8.29, "learning_rate": 4.585868402282454e-05, "loss": 0.0065, "step": 46470 }, { "epoch": 8.29, "learning_rate": 4.5857792439372325e-05, "loss": 0.003, "step": 46480 }, { "epoch": 8.29, "learning_rate": 4.5856900855920117e-05, "loss": 0.0059, "step": 46490 }, { "epoch": 8.29, "learning_rate": 4.58560092724679e-05, "loss": 0.0025, "step": 46500 }, { "epoch": 8.29, "learning_rate": 4.585511768901569e-05, "loss": 0.0038, "step": 46510 }, { "epoch": 8.3, "learning_rate": 4.5854226105563483e-05, "loss": 0.009, "step": 46520 }, { "epoch": 8.3, "learning_rate": 4.5853334522111275e-05, "loss": 0.005, "step": 46530 }, { "epoch": 8.3, "learning_rate": 4.5852442938659066e-05, "loss": 0.0028, "step": 46540 }, { "epoch": 8.3, "learning_rate": 4.585155135520685e-05, "loss": 0.0067, "step": 46550 }, { "epoch": 8.3, "learning_rate": 4.585065977175464e-05, "loss": 0.0065, "step": 46560 }, { "epoch": 8.3, "learning_rate": 4.5849768188302426e-05, "loss": 0.0051, "step": 46570 }, { "epoch": 8.31, "learning_rate": 4.584887660485022e-05, "loss": 0.0048, "step": 46580 }, { "epoch": 8.31, "learning_rate": 4.5847985021398e-05, "loss": 0.0032, "step": 46590 }, { "epoch": 8.31, "learning_rate": 4.584709343794579e-05, "loss": 0.0025, "step": 46600 }, { "epoch": 8.31, "learning_rate": 4.5846201854493584e-05, "loss": 0.0053, "step": 46610 }, { "epoch": 8.31, "learning_rate": 4.584531027104137e-05, "loss": 0.0041, "step": 46620 }, { "epoch": 8.31, "learning_rate": 4.584441868758916e-05, "loss": 0.0071, "step": 46630 }, { "epoch": 8.32, "learning_rate": 4.584352710413695e-05, "loss": 0.0022, "step": 46640 }, { "epoch": 8.32, "learning_rate": 4.584263552068474e-05, "loss": 0.0025, "step": 46650 }, { "epoch": 8.32, "learning_rate": 4.5841743937232526e-05, "loss": 0.0065, "step": 46660 }, { "epoch": 8.32, "learning_rate": 4.584085235378032e-05, "loss": 0.0051, "step": 46670 }, { "epoch": 8.32, "learning_rate": 4.58399607703281e-05, "loss": 0.0053, "step": 46680 }, { "epoch": 8.33, "learning_rate": 4.583906918687589e-05, "loss": 0.0041, "step": 46690 }, { "epoch": 8.33, "learning_rate": 4.5838177603423685e-05, "loss": 0.0043, "step": 46700 }, { "epoch": 8.33, "learning_rate": 4.583728601997147e-05, "loss": 0.0039, "step": 46710 }, { "epoch": 8.33, "learning_rate": 4.583639443651926e-05, "loss": 0.005, "step": 46720 }, { "epoch": 8.33, "learning_rate": 4.5835502853067045e-05, "loss": 0.0036, "step": 46730 }, { "epoch": 8.33, "learning_rate": 4.5834611269614836e-05, "loss": 0.0075, "step": 46740 }, { "epoch": 8.34, "learning_rate": 4.583371968616263e-05, "loss": 0.0032, "step": 46750 }, { "epoch": 8.34, "learning_rate": 4.583282810271042e-05, "loss": 0.0031, "step": 46760 }, { "epoch": 8.34, "learning_rate": 4.583193651925821e-05, "loss": 0.0072, "step": 46770 }, { "epoch": 8.34, "learning_rate": 4.5831044935805994e-05, "loss": 0.0039, "step": 46780 }, { "epoch": 8.34, "learning_rate": 4.5830153352353785e-05, "loss": 0.0052, "step": 46790 }, { "epoch": 8.35, "learning_rate": 4.582926176890157e-05, "loss": 0.0048, "step": 46800 }, { "epoch": 8.35, "learning_rate": 4.582837018544936e-05, "loss": 0.0036, "step": 46810 }, { "epoch": 8.35, "learning_rate": 4.5827478601997145e-05, "loss": 0.0058, "step": 46820 }, { "epoch": 8.35, "learning_rate": 4.5826587018544936e-05, "loss": 0.0038, "step": 46830 }, { "epoch": 8.35, "learning_rate": 4.582569543509273e-05, "loss": 0.0039, "step": 46840 }, { "epoch": 8.35, "learning_rate": 4.582480385164051e-05, "loss": 0.0051, "step": 46850 }, { "epoch": 8.36, "learning_rate": 4.582391226818831e-05, "loss": 0.0038, "step": 46860 }, { "epoch": 8.36, "learning_rate": 4.5823020684736094e-05, "loss": 0.0032, "step": 46870 }, { "epoch": 8.36, "learning_rate": 4.5822129101283886e-05, "loss": 0.0031, "step": 46880 }, { "epoch": 8.36, "learning_rate": 4.582123751783167e-05, "loss": 0.0068, "step": 46890 }, { "epoch": 8.36, "learning_rate": 4.582034593437946e-05, "loss": 0.0032, "step": 46900 }, { "epoch": 8.36, "learning_rate": 4.5819454350927246e-05, "loss": 0.0074, "step": 46910 }, { "epoch": 8.37, "learning_rate": 4.581856276747504e-05, "loss": 0.0049, "step": 46920 }, { "epoch": 8.37, "learning_rate": 4.581767118402283e-05, "loss": 0.0035, "step": 46930 }, { "epoch": 8.37, "learning_rate": 4.581677960057061e-05, "loss": 0.0074, "step": 46940 }, { "epoch": 8.37, "learning_rate": 4.5815888017118404e-05, "loss": 0.0062, "step": 46950 }, { "epoch": 8.37, "learning_rate": 4.581499643366619e-05, "loss": 0.0074, "step": 46960 }, { "epoch": 8.38, "learning_rate": 4.5814104850213986e-05, "loss": 0.0038, "step": 46970 }, { "epoch": 8.38, "learning_rate": 4.581321326676177e-05, "loss": 0.0033, "step": 46980 }, { "epoch": 8.38, "learning_rate": 4.581232168330956e-05, "loss": 0.0085, "step": 46990 }, { "epoch": 8.38, "learning_rate": 4.581143009985735e-05, "loss": 0.0056, "step": 47000 }, { "epoch": 8.38, "learning_rate": 4.581053851640514e-05, "loss": 0.0056, "step": 47010 }, { "epoch": 8.38, "learning_rate": 4.580964693295293e-05, "loss": 0.0076, "step": 47020 }, { "epoch": 8.39, "learning_rate": 4.580875534950071e-05, "loss": 0.0059, "step": 47030 }, { "epoch": 8.39, "learning_rate": 4.5807863766048504e-05, "loss": 0.0037, "step": 47040 }, { "epoch": 8.39, "learning_rate": 4.580697218259629e-05, "loss": 0.0041, "step": 47050 }, { "epoch": 8.39, "learning_rate": 4.580608059914408e-05, "loss": 0.0028, "step": 47060 }, { "epoch": 8.39, "learning_rate": 4.580518901569187e-05, "loss": 0.0054, "step": 47070 }, { "epoch": 8.4, "learning_rate": 4.580429743223966e-05, "loss": 0.006, "step": 47080 }, { "epoch": 8.4, "learning_rate": 4.5803405848787454e-05, "loss": 0.0048, "step": 47090 }, { "epoch": 8.4, "learning_rate": 4.580251426533524e-05, "loss": 0.0045, "step": 47100 }, { "epoch": 8.4, "learning_rate": 4.580162268188303e-05, "loss": 0.0065, "step": 47110 }, { "epoch": 8.4, "learning_rate": 4.5800731098430814e-05, "loss": 0.0043, "step": 47120 }, { "epoch": 8.4, "learning_rate": 4.5799839514978605e-05, "loss": 0.0075, "step": 47130 }, { "epoch": 8.41, "learning_rate": 4.579894793152639e-05, "loss": 0.004, "step": 47140 }, { "epoch": 8.41, "learning_rate": 4.579805634807418e-05, "loss": 0.0081, "step": 47150 }, { "epoch": 8.41, "learning_rate": 4.579716476462197e-05, "loss": 0.0059, "step": 47160 }, { "epoch": 8.41, "learning_rate": 4.5796273181169756e-05, "loss": 0.005, "step": 47170 }, { "epoch": 8.41, "learning_rate": 4.579538159771755e-05, "loss": 0.0036, "step": 47180 }, { "epoch": 8.41, "learning_rate": 4.579449001426534e-05, "loss": 0.011, "step": 47190 }, { "epoch": 8.42, "learning_rate": 4.579359843081313e-05, "loss": 0.0071, "step": 47200 }, { "epoch": 8.42, "learning_rate": 4.5792706847360914e-05, "loss": 0.006, "step": 47210 }, { "epoch": 8.42, "learning_rate": 4.5791815263908705e-05, "loss": 0.0034, "step": 47220 }, { "epoch": 8.42, "learning_rate": 4.57909236804565e-05, "loss": 0.0038, "step": 47230 }, { "epoch": 8.42, "learning_rate": 4.579003209700428e-05, "loss": 0.0036, "step": 47240 }, { "epoch": 8.43, "learning_rate": 4.578914051355207e-05, "loss": 0.0043, "step": 47250 }, { "epoch": 8.43, "learning_rate": 4.578824893009986e-05, "loss": 0.0039, "step": 47260 }, { "epoch": 8.43, "learning_rate": 4.578735734664765e-05, "loss": 0.0031, "step": 47270 }, { "epoch": 8.43, "learning_rate": 4.578646576319543e-05, "loss": 0.0078, "step": 47280 }, { "epoch": 8.43, "learning_rate": 4.5785574179743224e-05, "loss": 0.0051, "step": 47290 }, { "epoch": 8.43, "learning_rate": 4.5784682596291015e-05, "loss": 0.0065, "step": 47300 }, { "epoch": 8.44, "learning_rate": 4.5783791012838806e-05, "loss": 0.0064, "step": 47310 }, { "epoch": 8.44, "learning_rate": 4.57828994293866e-05, "loss": 0.0063, "step": 47320 }, { "epoch": 8.44, "learning_rate": 4.578200784593438e-05, "loss": 0.0068, "step": 47330 }, { "epoch": 8.44, "learning_rate": 4.578111626248217e-05, "loss": 0.0055, "step": 47340 }, { "epoch": 8.44, "learning_rate": 4.578022467902996e-05, "loss": 0.0071, "step": 47350 }, { "epoch": 8.45, "learning_rate": 4.577933309557775e-05, "loss": 0.0054, "step": 47360 }, { "epoch": 8.45, "learning_rate": 4.577844151212553e-05, "loss": 0.0045, "step": 47370 }, { "epoch": 8.45, "learning_rate": 4.5777549928673324e-05, "loss": 0.0068, "step": 47380 }, { "epoch": 8.45, "learning_rate": 4.5776658345221115e-05, "loss": 0.005, "step": 47390 }, { "epoch": 8.45, "learning_rate": 4.57757667617689e-05, "loss": 0.0031, "step": 47400 }, { "epoch": 8.45, "learning_rate": 4.57748751783167e-05, "loss": 0.007, "step": 47410 }, { "epoch": 8.46, "learning_rate": 4.577398359486448e-05, "loss": 0.0035, "step": 47420 }, { "epoch": 8.46, "learning_rate": 4.5773092011412273e-05, "loss": 0.0068, "step": 47430 }, { "epoch": 8.46, "learning_rate": 4.577220042796006e-05, "loss": 0.004, "step": 47440 }, { "epoch": 8.46, "learning_rate": 4.577130884450785e-05, "loss": 0.0043, "step": 47450 }, { "epoch": 8.46, "learning_rate": 4.577041726105564e-05, "loss": 0.0069, "step": 47460 }, { "epoch": 8.46, "learning_rate": 4.5769525677603425e-05, "loss": 0.0018, "step": 47470 }, { "epoch": 8.47, "learning_rate": 4.5768634094151216e-05, "loss": 0.0043, "step": 47480 }, { "epoch": 8.47, "learning_rate": 4.5767742510699e-05, "loss": 0.0068, "step": 47490 }, { "epoch": 8.47, "learning_rate": 4.576685092724679e-05, "loss": 0.0046, "step": 47500 }, { "epoch": 8.47, "learning_rate": 4.5765959343794576e-05, "loss": 0.0043, "step": 47510 }, { "epoch": 8.47, "learning_rate": 4.5765067760342374e-05, "loss": 0.0045, "step": 47520 }, { "epoch": 8.48, "learning_rate": 4.576417617689016e-05, "loss": 0.0033, "step": 47530 }, { "epoch": 8.48, "learning_rate": 4.576328459343795e-05, "loss": 0.0043, "step": 47540 }, { "epoch": 8.48, "learning_rate": 4.576239300998574e-05, "loss": 0.006, "step": 47550 }, { "epoch": 8.48, "learning_rate": 4.5761501426533525e-05, "loss": 0.0044, "step": 47560 }, { "epoch": 8.48, "learning_rate": 4.5760609843081317e-05, "loss": 0.0063, "step": 47570 }, { "epoch": 8.48, "learning_rate": 4.57597182596291e-05, "loss": 0.0072, "step": 47580 }, { "epoch": 8.49, "learning_rate": 4.575882667617689e-05, "loss": 0.0058, "step": 47590 }, { "epoch": 8.49, "learning_rate": 4.5757935092724677e-05, "loss": 0.0028, "step": 47600 }, { "epoch": 8.49, "learning_rate": 4.575704350927247e-05, "loss": 0.0047, "step": 47610 }, { "epoch": 8.49, "learning_rate": 4.575615192582026e-05, "loss": 0.0056, "step": 47620 }, { "epoch": 8.49, "learning_rate": 4.575526034236805e-05, "loss": 0.0042, "step": 47630 }, { "epoch": 8.5, "learning_rate": 4.575436875891584e-05, "loss": 0.0043, "step": 47640 }, { "epoch": 8.5, "learning_rate": 4.5753477175463626e-05, "loss": 0.0046, "step": 47650 }, { "epoch": 8.5, "learning_rate": 4.575258559201142e-05, "loss": 0.0037, "step": 47660 }, { "epoch": 8.5, "learning_rate": 4.57516940085592e-05, "loss": 0.0027, "step": 47670 }, { "epoch": 8.5, "learning_rate": 4.575080242510699e-05, "loss": 0.003, "step": 47680 }, { "epoch": 8.5, "learning_rate": 4.5749910841654784e-05, "loss": 0.006, "step": 47690 }, { "epoch": 8.51, "learning_rate": 4.574901925820257e-05, "loss": 0.0036, "step": 47700 }, { "epoch": 8.51, "learning_rate": 4.574812767475036e-05, "loss": 0.0023, "step": 47710 }, { "epoch": 8.51, "learning_rate": 4.5747236091298144e-05, "loss": 0.0066, "step": 47720 }, { "epoch": 8.51, "learning_rate": 4.5746344507845935e-05, "loss": 0.0054, "step": 47730 }, { "epoch": 8.51, "learning_rate": 4.5745452924393726e-05, "loss": 0.0066, "step": 47740 }, { "epoch": 8.51, "learning_rate": 4.574456134094152e-05, "loss": 0.0057, "step": 47750 }, { "epoch": 8.52, "learning_rate": 4.57436697574893e-05, "loss": 0.0028, "step": 47760 }, { "epoch": 8.52, "learning_rate": 4.574277817403709e-05, "loss": 0.0067, "step": 47770 }, { "epoch": 8.52, "learning_rate": 4.5741886590584884e-05, "loss": 0.0055, "step": 47780 }, { "epoch": 8.52, "learning_rate": 4.574099500713267e-05, "loss": 0.0035, "step": 47790 }, { "epoch": 8.52, "learning_rate": 4.574010342368046e-05, "loss": 0.0032, "step": 47800 }, { "epoch": 8.53, "learning_rate": 4.5739211840228245e-05, "loss": 0.0045, "step": 47810 }, { "epoch": 8.53, "learning_rate": 4.5738320256776036e-05, "loss": 0.0026, "step": 47820 }, { "epoch": 8.53, "learning_rate": 4.573742867332382e-05, "loss": 0.0037, "step": 47830 }, { "epoch": 8.53, "learning_rate": 4.573653708987161e-05, "loss": 0.0041, "step": 47840 }, { "epoch": 8.53, "learning_rate": 4.57356455064194e-05, "loss": 0.0053, "step": 47850 }, { "epoch": 8.53, "learning_rate": 4.5734753922967194e-05, "loss": 0.0047, "step": 47860 }, { "epoch": 8.54, "learning_rate": 4.5733862339514985e-05, "loss": 0.0029, "step": 47870 }, { "epoch": 8.54, "learning_rate": 4.573297075606277e-05, "loss": 0.0045, "step": 47880 }, { "epoch": 8.54, "learning_rate": 4.573207917261056e-05, "loss": 0.0026, "step": 47890 }, { "epoch": 8.54, "learning_rate": 4.5731187589158345e-05, "loss": 0.0062, "step": 47900 }, { "epoch": 8.54, "learning_rate": 4.5730296005706136e-05, "loss": 0.006, "step": 47910 }, { "epoch": 8.54, "learning_rate": 4.572940442225393e-05, "loss": 0.0077, "step": 47920 }, { "epoch": 8.55, "learning_rate": 4.572851283880171e-05, "loss": 0.0059, "step": 47930 }, { "epoch": 8.55, "learning_rate": 4.57276212553495e-05, "loss": 0.0058, "step": 47940 }, { "epoch": 8.55, "learning_rate": 4.572672967189729e-05, "loss": 0.0072, "step": 47950 }, { "epoch": 8.55, "learning_rate": 4.5725838088445086e-05, "loss": 0.0028, "step": 47960 }, { "epoch": 8.55, "learning_rate": 4.572494650499287e-05, "loss": 0.004, "step": 47970 }, { "epoch": 8.56, "learning_rate": 4.572405492154066e-05, "loss": 0.0041, "step": 47980 }, { "epoch": 8.56, "learning_rate": 4.5723163338088446e-05, "loss": 0.0044, "step": 47990 }, { "epoch": 8.56, "learning_rate": 4.572227175463624e-05, "loss": 0.0053, "step": 48000 }, { "epoch": 8.56, "learning_rate": 4.572138017118403e-05, "loss": 0.0025, "step": 48010 }, { "epoch": 8.56, "learning_rate": 4.572048858773181e-05, "loss": 0.0088, "step": 48020 }, { "epoch": 8.56, "learning_rate": 4.5719597004279604e-05, "loss": 0.0034, "step": 48030 }, { "epoch": 8.57, "learning_rate": 4.571870542082739e-05, "loss": 0.0048, "step": 48040 }, { "epoch": 8.57, "learning_rate": 4.571781383737518e-05, "loss": 0.0035, "step": 48050 }, { "epoch": 8.57, "learning_rate": 4.5716922253922964e-05, "loss": 0.0039, "step": 48060 }, { "epoch": 8.57, "learning_rate": 4.571603067047076e-05, "loss": 0.0034, "step": 48070 }, { "epoch": 8.57, "learning_rate": 4.5715139087018546e-05, "loss": 0.0049, "step": 48080 }, { "epoch": 8.58, "learning_rate": 4.571424750356634e-05, "loss": 0.0025, "step": 48090 }, { "epoch": 8.58, "learning_rate": 4.571335592011413e-05, "loss": 0.0053, "step": 48100 }, { "epoch": 8.58, "learning_rate": 4.571246433666191e-05, "loss": 0.0042, "step": 48110 }, { "epoch": 8.58, "learning_rate": 4.5711572753209704e-05, "loss": 0.0033, "step": 48120 }, { "epoch": 8.58, "learning_rate": 4.571068116975749e-05, "loss": 0.0043, "step": 48130 }, { "epoch": 8.58, "learning_rate": 4.570978958630528e-05, "loss": 0.0052, "step": 48140 }, { "epoch": 8.59, "learning_rate": 4.570889800285307e-05, "loss": 0.0045, "step": 48150 }, { "epoch": 8.59, "learning_rate": 4.5708006419400856e-05, "loss": 0.0057, "step": 48160 }, { "epoch": 8.59, "learning_rate": 4.570720399429387e-05, "loss": 0.0064, "step": 48170 }, { "epoch": 8.59, "learning_rate": 4.5706312410841655e-05, "loss": 0.0041, "step": 48180 }, { "epoch": 8.59, "learning_rate": 4.5705420827389446e-05, "loss": 0.0027, "step": 48190 }, { "epoch": 8.59, "learning_rate": 4.570452924393723e-05, "loss": 0.0042, "step": 48200 }, { "epoch": 8.6, "learning_rate": 4.570363766048502e-05, "loss": 0.0032, "step": 48210 }, { "epoch": 8.6, "learning_rate": 4.570274607703281e-05, "loss": 0.0046, "step": 48220 }, { "epoch": 8.6, "learning_rate": 4.5701854493580604e-05, "loss": 0.0039, "step": 48230 }, { "epoch": 8.6, "learning_rate": 4.5700962910128395e-05, "loss": 0.0038, "step": 48240 }, { "epoch": 8.6, "learning_rate": 4.570007132667618e-05, "loss": 0.0046, "step": 48250 }, { "epoch": 8.61, "learning_rate": 4.569917974322397e-05, "loss": 0.0066, "step": 48260 }, { "epoch": 8.61, "learning_rate": 4.5698288159771755e-05, "loss": 0.0037, "step": 48270 }, { "epoch": 8.61, "learning_rate": 4.5697396576319547e-05, "loss": 0.004, "step": 48280 }, { "epoch": 8.61, "learning_rate": 4.569650499286733e-05, "loss": 0.0059, "step": 48290 }, { "epoch": 8.61, "learning_rate": 4.569561340941512e-05, "loss": 0.0035, "step": 48300 }, { "epoch": 8.61, "learning_rate": 4.5694721825962913e-05, "loss": 0.0066, "step": 48310 }, { "epoch": 8.62, "learning_rate": 4.56938302425107e-05, "loss": 0.0049, "step": 48320 }, { "epoch": 8.62, "learning_rate": 4.569293865905849e-05, "loss": 0.0045, "step": 48330 }, { "epoch": 8.62, "learning_rate": 4.569204707560628e-05, "loss": 0.0037, "step": 48340 }, { "epoch": 8.62, "learning_rate": 4.569115549215407e-05, "loss": 0.0059, "step": 48350 }, { "epoch": 8.62, "learning_rate": 4.5690263908701856e-05, "loss": 0.0072, "step": 48360 }, { "epoch": 8.63, "learning_rate": 4.568937232524965e-05, "loss": 0.0047, "step": 48370 }, { "epoch": 8.63, "learning_rate": 4.568848074179743e-05, "loss": 0.006, "step": 48380 }, { "epoch": 8.63, "learning_rate": 4.568758915834522e-05, "loss": 0.0056, "step": 48390 }, { "epoch": 8.63, "learning_rate": 4.5686697574893014e-05, "loss": 0.0043, "step": 48400 }, { "epoch": 8.63, "learning_rate": 4.56858059914408e-05, "loss": 0.0059, "step": 48410 }, { "epoch": 8.63, "learning_rate": 4.568491440798859e-05, "loss": 0.0037, "step": 48420 }, { "epoch": 8.64, "learning_rate": 4.5684022824536374e-05, "loss": 0.0037, "step": 48430 }, { "epoch": 8.64, "learning_rate": 4.5683131241084165e-05, "loss": 0.0047, "step": 48440 }, { "epoch": 8.64, "learning_rate": 4.5682239657631957e-05, "loss": 0.0048, "step": 48450 }, { "epoch": 8.64, "learning_rate": 4.568134807417975e-05, "loss": 0.0042, "step": 48460 }, { "epoch": 8.64, "learning_rate": 4.568045649072754e-05, "loss": 0.0045, "step": 48470 }, { "epoch": 8.64, "learning_rate": 4.567956490727532e-05, "loss": 0.0032, "step": 48480 }, { "epoch": 8.65, "learning_rate": 4.5678673323823115e-05, "loss": 0.0031, "step": 48490 }, { "epoch": 8.65, "learning_rate": 4.56777817403709e-05, "loss": 0.0043, "step": 48500 }, { "epoch": 8.65, "learning_rate": 4.567689015691869e-05, "loss": 0.0068, "step": 48510 }, { "epoch": 8.65, "learning_rate": 4.5675998573466475e-05, "loss": 0.0037, "step": 48520 }, { "epoch": 8.65, "learning_rate": 4.5675106990014266e-05, "loss": 0.0035, "step": 48530 }, { "epoch": 8.66, "learning_rate": 4.567421540656206e-05, "loss": 0.0052, "step": 48540 }, { "epoch": 8.66, "learning_rate": 4.567332382310984e-05, "loss": 0.0045, "step": 48550 }, { "epoch": 8.66, "learning_rate": 4.567243223965764e-05, "loss": 0.0062, "step": 48560 }, { "epoch": 8.66, "learning_rate": 4.5671540656205424e-05, "loss": 0.0068, "step": 48570 }, { "epoch": 8.66, "learning_rate": 4.5670649072753215e-05, "loss": 0.0049, "step": 48580 }, { "epoch": 8.66, "learning_rate": 4.5669757489301e-05, "loss": 0.0046, "step": 48590 }, { "epoch": 8.67, "learning_rate": 4.566886590584879e-05, "loss": 0.003, "step": 48600 }, { "epoch": 8.67, "learning_rate": 4.5667974322396575e-05, "loss": 0.0048, "step": 48610 }, { "epoch": 8.67, "learning_rate": 4.5667082738944366e-05, "loss": 0.0042, "step": 48620 }, { "epoch": 8.67, "learning_rate": 4.566619115549216e-05, "loss": 0.0058, "step": 48630 }, { "epoch": 8.67, "learning_rate": 4.566529957203994e-05, "loss": 0.0037, "step": 48640 }, { "epoch": 8.68, "learning_rate": 4.566440798858773e-05, "loss": 0.0046, "step": 48650 }, { "epoch": 8.68, "learning_rate": 4.566351640513552e-05, "loss": 0.003, "step": 48660 }, { "epoch": 8.68, "learning_rate": 4.5662624821683316e-05, "loss": 0.0032, "step": 48670 }, { "epoch": 8.68, "learning_rate": 4.56617332382311e-05, "loss": 0.004, "step": 48680 }, { "epoch": 8.68, "learning_rate": 4.566084165477889e-05, "loss": 0.0042, "step": 48690 }, { "epoch": 8.68, "learning_rate": 4.565995007132668e-05, "loss": 0.0033, "step": 48700 }, { "epoch": 8.69, "learning_rate": 4.565905848787447e-05, "loss": 0.0057, "step": 48710 }, { "epoch": 8.69, "learning_rate": 4.565816690442226e-05, "loss": 0.0076, "step": 48720 }, { "epoch": 8.69, "learning_rate": 4.565727532097004e-05, "loss": 0.0044, "step": 48730 }, { "epoch": 8.69, "learning_rate": 4.5656383737517834e-05, "loss": 0.0093, "step": 48740 }, { "epoch": 8.69, "learning_rate": 4.565549215406562e-05, "loss": 0.0079, "step": 48750 }, { "epoch": 8.69, "learning_rate": 4.565460057061341e-05, "loss": 0.0063, "step": 48760 }, { "epoch": 8.7, "learning_rate": 4.56537089871612e-05, "loss": 0.0048, "step": 48770 }, { "epoch": 8.7, "learning_rate": 4.565281740370899e-05, "loss": 0.0045, "step": 48780 }, { "epoch": 8.7, "learning_rate": 4.565192582025678e-05, "loss": 0.0049, "step": 48790 }, { "epoch": 8.7, "learning_rate": 4.565103423680457e-05, "loss": 0.0044, "step": 48800 }, { "epoch": 8.7, "learning_rate": 4.565014265335236e-05, "loss": 0.0044, "step": 48810 }, { "epoch": 8.71, "learning_rate": 4.564925106990014e-05, "loss": 0.0064, "step": 48820 }, { "epoch": 8.71, "learning_rate": 4.5648359486447934e-05, "loss": 0.0042, "step": 48830 }, { "epoch": 8.71, "learning_rate": 4.564746790299572e-05, "loss": 0.0037, "step": 48840 }, { "epoch": 8.71, "learning_rate": 4.564657631954351e-05, "loss": 0.0046, "step": 48850 }, { "epoch": 8.71, "learning_rate": 4.56456847360913e-05, "loss": 0.004, "step": 48860 }, { "epoch": 8.71, "learning_rate": 4.5644793152639086e-05, "loss": 0.0053, "step": 48870 }, { "epoch": 8.72, "learning_rate": 4.564390156918688e-05, "loss": 0.004, "step": 48880 }, { "epoch": 8.72, "learning_rate": 4.564300998573467e-05, "loss": 0.0035, "step": 48890 }, { "epoch": 8.72, "learning_rate": 4.564211840228246e-05, "loss": 0.0053, "step": 48900 }, { "epoch": 8.72, "learning_rate": 4.5641226818830244e-05, "loss": 0.0071, "step": 48910 }, { "epoch": 8.72, "learning_rate": 4.5640335235378035e-05, "loss": 0.0029, "step": 48920 }, { "epoch": 8.73, "learning_rate": 4.5639443651925826e-05, "loss": 0.0028, "step": 48930 }, { "epoch": 8.73, "learning_rate": 4.563855206847361e-05, "loss": 0.0088, "step": 48940 }, { "epoch": 8.73, "learning_rate": 4.56376604850214e-05, "loss": 0.0058, "step": 48950 }, { "epoch": 8.73, "learning_rate": 4.5636768901569186e-05, "loss": 0.0026, "step": 48960 }, { "epoch": 8.73, "learning_rate": 4.563587731811698e-05, "loss": 0.0061, "step": 48970 }, { "epoch": 8.73, "learning_rate": 4.563498573466476e-05, "loss": 0.0038, "step": 48980 }, { "epoch": 8.74, "learning_rate": 4.563409415121255e-05, "loss": 0.0106, "step": 48990 }, { "epoch": 8.74, "learning_rate": 4.5633202567760344e-05, "loss": 0.004, "step": 49000 }, { "epoch": 8.74, "learning_rate": 4.5632310984308136e-05, "loss": 0.0064, "step": 49010 }, { "epoch": 8.74, "learning_rate": 4.563141940085593e-05, "loss": 0.0061, "step": 49020 }, { "epoch": 8.74, "learning_rate": 4.563052781740371e-05, "loss": 0.0069, "step": 49030 }, { "epoch": 8.74, "learning_rate": 4.56296362339515e-05, "loss": 0.0046, "step": 49040 }, { "epoch": 8.75, "learning_rate": 4.562874465049929e-05, "loss": 0.0069, "step": 49050 }, { "epoch": 8.75, "learning_rate": 4.562785306704708e-05, "loss": 0.0032, "step": 49060 }, { "epoch": 8.75, "learning_rate": 4.562696148359486e-05, "loss": 0.005, "step": 49070 }, { "epoch": 8.75, "learning_rate": 4.5626069900142654e-05, "loss": 0.0062, "step": 49080 }, { "epoch": 8.75, "learning_rate": 4.5625178316690445e-05, "loss": 0.0069, "step": 49090 }, { "epoch": 8.76, "learning_rate": 4.562428673323823e-05, "loss": 0.0028, "step": 49100 }, { "epoch": 8.76, "learning_rate": 4.562339514978603e-05, "loss": 0.0041, "step": 49110 }, { "epoch": 8.76, "learning_rate": 4.562250356633381e-05, "loss": 0.0051, "step": 49120 }, { "epoch": 8.76, "learning_rate": 4.56216119828816e-05, "loss": 0.006, "step": 49130 }, { "epoch": 8.76, "learning_rate": 4.562072039942939e-05, "loss": 0.0026, "step": 49140 }, { "epoch": 8.76, "learning_rate": 4.561982881597718e-05, "loss": 0.0045, "step": 49150 }, { "epoch": 8.77, "learning_rate": 4.561893723252497e-05, "loss": 0.0049, "step": 49160 }, { "epoch": 8.77, "learning_rate": 4.5618045649072754e-05, "loss": 0.0033, "step": 49170 }, { "epoch": 8.77, "learning_rate": 4.5617154065620545e-05, "loss": 0.0034, "step": 49180 }, { "epoch": 8.77, "learning_rate": 4.561626248216833e-05, "loss": 0.0045, "step": 49190 }, { "epoch": 8.77, "learning_rate": 4.561537089871612e-05, "loss": 0.0055, "step": 49200 }, { "epoch": 8.77, "learning_rate": 4.5614479315263906e-05, "loss": 0.0039, "step": 49210 }, { "epoch": 8.78, "learning_rate": 4.5613587731811704e-05, "loss": 0.006, "step": 49220 }, { "epoch": 8.78, "learning_rate": 4.561269614835949e-05, "loss": 0.0044, "step": 49230 }, { "epoch": 8.78, "learning_rate": 4.561180456490728e-05, "loss": 0.0027, "step": 49240 }, { "epoch": 8.78, "learning_rate": 4.561091298145507e-05, "loss": 0.0028, "step": 49250 }, { "epoch": 8.78, "learning_rate": 4.5610021398002855e-05, "loss": 0.006, "step": 49260 }, { "epoch": 8.79, "learning_rate": 4.5609129814550646e-05, "loss": 0.0027, "step": 49270 }, { "epoch": 8.79, "learning_rate": 4.560823823109843e-05, "loss": 0.0026, "step": 49280 }, { "epoch": 8.79, "learning_rate": 4.560734664764622e-05, "loss": 0.0036, "step": 49290 }, { "epoch": 8.79, "learning_rate": 4.5606455064194006e-05, "loss": 0.0055, "step": 49300 }, { "epoch": 8.79, "learning_rate": 4.56055634807418e-05, "loss": 0.0023, "step": 49310 }, { "epoch": 8.79, "learning_rate": 4.560467189728959e-05, "loss": 0.0038, "step": 49320 }, { "epoch": 8.8, "learning_rate": 4.560378031383738e-05, "loss": 0.0031, "step": 49330 }, { "epoch": 8.8, "learning_rate": 4.560288873038517e-05, "loss": 0.0047, "step": 49340 }, { "epoch": 8.8, "learning_rate": 4.5601997146932955e-05, "loss": 0.004, "step": 49350 }, { "epoch": 8.8, "learning_rate": 4.5601105563480747e-05, "loss": 0.0034, "step": 49360 }, { "epoch": 8.8, "learning_rate": 4.560021398002853e-05, "loss": 0.0047, "step": 49370 }, { "epoch": 8.81, "learning_rate": 4.559932239657632e-05, "loss": 0.0038, "step": 49380 }, { "epoch": 8.81, "learning_rate": 4.5598430813124113e-05, "loss": 0.0035, "step": 49390 }, { "epoch": 8.81, "learning_rate": 4.55975392296719e-05, "loss": 0.0064, "step": 49400 }, { "epoch": 8.81, "learning_rate": 4.559664764621969e-05, "loss": 0.004, "step": 49410 }, { "epoch": 8.81, "learning_rate": 4.5595756062767473e-05, "loss": 0.0069, "step": 49420 }, { "epoch": 8.81, "learning_rate": 4.5594864479315265e-05, "loss": 0.0059, "step": 49430 }, { "epoch": 8.82, "learning_rate": 4.5593972895863056e-05, "loss": 0.0048, "step": 49440 }, { "epoch": 8.82, "learning_rate": 4.559308131241085e-05, "loss": 0.0031, "step": 49450 }, { "epoch": 8.82, "learning_rate": 4.559218972895863e-05, "loss": 0.0024, "step": 49460 }, { "epoch": 8.82, "learning_rate": 4.559129814550642e-05, "loss": 0.0036, "step": 49470 }, { "epoch": 8.82, "learning_rate": 4.5590406562054214e-05, "loss": 0.0049, "step": 49480 }, { "epoch": 8.82, "learning_rate": 4.5589514978602e-05, "loss": 0.0046, "step": 49490 }, { "epoch": 8.83, "learning_rate": 4.558862339514979e-05, "loss": 0.0076, "step": 49500 }, { "epoch": 8.83, "learning_rate": 4.5587731811697574e-05, "loss": 0.0046, "step": 49510 }, { "epoch": 8.83, "learning_rate": 4.5586840228245365e-05, "loss": 0.0046, "step": 49520 }, { "epoch": 8.83, "learning_rate": 4.558594864479315e-05, "loss": 0.0062, "step": 49530 }, { "epoch": 8.83, "learning_rate": 4.558505706134094e-05, "loss": 0.0048, "step": 49540 }, { "epoch": 8.84, "learning_rate": 4.558416547788873e-05, "loss": 0.0051, "step": 49550 }, { "epoch": 8.84, "learning_rate": 4.558327389443652e-05, "loss": 0.0053, "step": 49560 }, { "epoch": 8.84, "learning_rate": 4.5582382310984315e-05, "loss": 0.0043, "step": 49570 }, { "epoch": 8.84, "learning_rate": 4.55814907275321e-05, "loss": 0.0048, "step": 49580 }, { "epoch": 8.84, "learning_rate": 4.558059914407989e-05, "loss": 0.0036, "step": 49590 }, { "epoch": 8.84, "learning_rate": 4.5579707560627675e-05, "loss": 0.0061, "step": 49600 }, { "epoch": 8.85, "learning_rate": 4.5578815977175466e-05, "loss": 0.007, "step": 49610 }, { "epoch": 8.85, "learning_rate": 4.557792439372325e-05, "loss": 0.0042, "step": 49620 }, { "epoch": 8.85, "learning_rate": 4.557703281027104e-05, "loss": 0.0036, "step": 49630 }, { "epoch": 8.85, "learning_rate": 4.557614122681883e-05, "loss": 0.0032, "step": 49640 }, { "epoch": 8.85, "learning_rate": 4.557524964336662e-05, "loss": 0.0062, "step": 49650 }, { "epoch": 8.86, "learning_rate": 4.5574358059914415e-05, "loss": 0.0058, "step": 49660 }, { "epoch": 8.86, "learning_rate": 4.55734664764622e-05, "loss": 0.0067, "step": 49670 }, { "epoch": 8.86, "learning_rate": 4.557257489300999e-05, "loss": 0.0056, "step": 49680 }, { "epoch": 8.86, "learning_rate": 4.5571683309557775e-05, "loss": 0.0055, "step": 49690 }, { "epoch": 8.86, "learning_rate": 4.5570791726105566e-05, "loss": 0.0035, "step": 49700 }, { "epoch": 8.86, "learning_rate": 4.556990014265336e-05, "loss": 0.0069, "step": 49710 }, { "epoch": 8.87, "learning_rate": 4.556900855920114e-05, "loss": 0.0055, "step": 49720 }, { "epoch": 8.87, "learning_rate": 4.556811697574893e-05, "loss": 0.0049, "step": 49730 }, { "epoch": 8.87, "learning_rate": 4.556722539229672e-05, "loss": 0.0064, "step": 49740 }, { "epoch": 8.87, "learning_rate": 4.556633380884451e-05, "loss": 0.0062, "step": 49750 }, { "epoch": 8.87, "learning_rate": 4.556544222539229e-05, "loss": 0.0052, "step": 49760 }, { "epoch": 8.87, "learning_rate": 4.556455064194009e-05, "loss": 0.0071, "step": 49770 }, { "epoch": 8.88, "learning_rate": 4.5563659058487876e-05, "loss": 0.0061, "step": 49780 }, { "epoch": 8.88, "learning_rate": 4.556276747503567e-05, "loss": 0.0062, "step": 49790 }, { "epoch": 8.88, "learning_rate": 4.556187589158346e-05, "loss": 0.0052, "step": 49800 }, { "epoch": 8.88, "learning_rate": 4.556098430813124e-05, "loss": 0.0045, "step": 49810 }, { "epoch": 8.88, "learning_rate": 4.5560092724679034e-05, "loss": 0.0059, "step": 49820 }, { "epoch": 8.89, "learning_rate": 4.555920114122682e-05, "loss": 0.003, "step": 49830 }, { "epoch": 8.89, "learning_rate": 4.555830955777461e-05, "loss": 0.0071, "step": 49840 }, { "epoch": 8.89, "learning_rate": 4.5557417974322394e-05, "loss": 0.0067, "step": 49850 }, { "epoch": 8.89, "learning_rate": 4.5556526390870185e-05, "loss": 0.0045, "step": 49860 }, { "epoch": 8.89, "learning_rate": 4.5555634807417976e-05, "loss": 0.0037, "step": 49870 }, { "epoch": 8.89, "learning_rate": 4.555474322396577e-05, "loss": 0.0034, "step": 49880 }, { "epoch": 8.9, "learning_rate": 4.555385164051356e-05, "loss": 0.007, "step": 49890 }, { "epoch": 8.9, "learning_rate": 4.555296005706134e-05, "loss": 0.0056, "step": 49900 }, { "epoch": 8.9, "learning_rate": 4.5552068473609134e-05, "loss": 0.0041, "step": 49910 }, { "epoch": 8.9, "learning_rate": 4.555117689015692e-05, "loss": 0.0074, "step": 49920 }, { "epoch": 8.9, "learning_rate": 4.555028530670471e-05, "loss": 0.0051, "step": 49930 }, { "epoch": 8.91, "learning_rate": 4.55493937232525e-05, "loss": 0.0056, "step": 49940 }, { "epoch": 8.91, "learning_rate": 4.5548502139800286e-05, "loss": 0.0071, "step": 49950 }, { "epoch": 8.91, "learning_rate": 4.554761055634808e-05, "loss": 0.004, "step": 49960 }, { "epoch": 8.91, "learning_rate": 4.554671897289586e-05, "loss": 0.0057, "step": 49970 }, { "epoch": 8.91, "learning_rate": 4.554582738944365e-05, "loss": 0.0046, "step": 49980 }, { "epoch": 8.91, "learning_rate": 4.5544935805991444e-05, "loss": 0.0039, "step": 49990 }, { "epoch": 8.92, "learning_rate": 4.5544044222539235e-05, "loss": 0.0051, "step": 50000 }, { "epoch": 8.92, "learning_rate": 4.554315263908702e-05, "loss": 0.0056, "step": 50010 }, { "epoch": 8.92, "learning_rate": 4.554226105563481e-05, "loss": 0.0035, "step": 50020 }, { "epoch": 8.92, "learning_rate": 4.55413694721826e-05, "loss": 0.003, "step": 50030 }, { "epoch": 8.92, "learning_rate": 4.5540477888730386e-05, "loss": 0.0051, "step": 50040 }, { "epoch": 8.92, "learning_rate": 4.553958630527818e-05, "loss": 0.0062, "step": 50050 }, { "epoch": 8.93, "learning_rate": 4.553869472182596e-05, "loss": 0.0045, "step": 50060 }, { "epoch": 8.93, "learning_rate": 4.553780313837375e-05, "loss": 0.0059, "step": 50070 }, { "epoch": 8.93, "learning_rate": 4.553691155492154e-05, "loss": 0.0055, "step": 50080 }, { "epoch": 8.93, "learning_rate": 4.553601997146933e-05, "loss": 0.0069, "step": 50090 }, { "epoch": 8.93, "learning_rate": 4.553512838801712e-05, "loss": 0.0055, "step": 50100 }, { "epoch": 8.94, "learning_rate": 4.553423680456491e-05, "loss": 0.0036, "step": 50110 }, { "epoch": 8.94, "learning_rate": 4.55333452211127e-05, "loss": 0.0063, "step": 50120 }, { "epoch": 8.94, "learning_rate": 4.553245363766049e-05, "loss": 0.0049, "step": 50130 }, { "epoch": 8.94, "learning_rate": 4.553156205420828e-05, "loss": 0.0037, "step": 50140 }, { "epoch": 8.94, "learning_rate": 4.553067047075606e-05, "loss": 0.0041, "step": 50150 }, { "epoch": 8.94, "learning_rate": 4.5529778887303854e-05, "loss": 0.0064, "step": 50160 }, { "epoch": 8.95, "learning_rate": 4.5528887303851645e-05, "loss": 0.0051, "step": 50170 }, { "epoch": 8.95, "learning_rate": 4.552799572039943e-05, "loss": 0.006, "step": 50180 }, { "epoch": 8.95, "learning_rate": 4.552710413694722e-05, "loss": 0.0044, "step": 50190 }, { "epoch": 8.95, "learning_rate": 4.5526212553495005e-05, "loss": 0.0044, "step": 50200 }, { "epoch": 8.95, "learning_rate": 4.55253209700428e-05, "loss": 0.0053, "step": 50210 }, { "epoch": 8.96, "learning_rate": 4.552442938659059e-05, "loss": 0.0055, "step": 50220 }, { "epoch": 8.96, "learning_rate": 4.552353780313838e-05, "loss": 0.0039, "step": 50230 }, { "epoch": 8.96, "learning_rate": 4.552264621968616e-05, "loss": 0.0049, "step": 50240 }, { "epoch": 8.96, "learning_rate": 4.5521754636233954e-05, "loss": 0.0063, "step": 50250 }, { "epoch": 8.96, "learning_rate": 4.5520863052781745e-05, "loss": 0.0035, "step": 50260 }, { "epoch": 8.96, "learning_rate": 4.551997146932953e-05, "loss": 0.0052, "step": 50270 }, { "epoch": 8.97, "learning_rate": 4.551907988587732e-05, "loss": 0.0054, "step": 50280 }, { "epoch": 8.97, "learning_rate": 4.5518188302425105e-05, "loss": 0.0051, "step": 50290 }, { "epoch": 8.97, "learning_rate": 4.55172967189729e-05, "loss": 0.0038, "step": 50300 }, { "epoch": 8.97, "learning_rate": 4.551640513552068e-05, "loss": 0.0041, "step": 50310 }, { "epoch": 8.97, "learning_rate": 4.551551355206848e-05, "loss": 0.0101, "step": 50320 }, { "epoch": 8.97, "learning_rate": 4.5514621968616264e-05, "loss": 0.0062, "step": 50330 }, { "epoch": 8.98, "learning_rate": 4.5513730385164055e-05, "loss": 0.0048, "step": 50340 }, { "epoch": 8.98, "learning_rate": 4.5512838801711846e-05, "loss": 0.0074, "step": 50350 }, { "epoch": 8.98, "learning_rate": 4.551194721825963e-05, "loss": 0.0094, "step": 50360 }, { "epoch": 8.98, "learning_rate": 4.551105563480742e-05, "loss": 0.0047, "step": 50370 }, { "epoch": 8.98, "learning_rate": 4.5510164051355206e-05, "loss": 0.0054, "step": 50380 }, { "epoch": 8.99, "learning_rate": 4.5509272467903e-05, "loss": 0.0047, "step": 50390 }, { "epoch": 8.99, "learning_rate": 4.550838088445079e-05, "loss": 0.0063, "step": 50400 }, { "epoch": 8.99, "learning_rate": 4.550748930099857e-05, "loss": 0.005, "step": 50410 }, { "epoch": 8.99, "learning_rate": 4.5506597717546364e-05, "loss": 0.0045, "step": 50420 }, { "epoch": 8.99, "learning_rate": 4.5505706134094155e-05, "loss": 0.0057, "step": 50430 }, { "epoch": 8.99, "learning_rate": 4.5504814550641947e-05, "loss": 0.0037, "step": 50440 }, { "epoch": 9.0, "learning_rate": 4.550392296718973e-05, "loss": 0.0081, "step": 50450 }, { "epoch": 9.0, "learning_rate": 4.550303138373752e-05, "loss": 0.004, "step": 50460 }, { "epoch": 9.0, "learning_rate": 4.5502139800285307e-05, "loss": 0.0036, "step": 50470 }, { "epoch": 9.0, "eval_loss": 0.014716032892465591, "eval_runtime": 197.987, "eval_samples_per_second": 23.431, "eval_steps_per_second": 2.929, "step": 50472 }, { "epoch": 9.0, "learning_rate": 4.55012482168331e-05, "loss": 0.0047, "step": 50480 }, { "epoch": 9.0, "learning_rate": 4.550035663338089e-05, "loss": 0.0052, "step": 50490 }, { "epoch": 9.0, "learning_rate": 4.5499465049928673e-05, "loss": 0.0039, "step": 50500 }, { "epoch": 9.01, "learning_rate": 4.5498573466476465e-05, "loss": 0.0028, "step": 50510 }, { "epoch": 9.01, "learning_rate": 4.549768188302425e-05, "loss": 0.0035, "step": 50520 }, { "epoch": 9.01, "learning_rate": 4.549679029957204e-05, "loss": 0.0074, "step": 50530 }, { "epoch": 9.01, "learning_rate": 4.549589871611983e-05, "loss": 0.0032, "step": 50540 }, { "epoch": 9.01, "learning_rate": 4.549500713266762e-05, "loss": 0.0076, "step": 50550 }, { "epoch": 9.02, "learning_rate": 4.549411554921541e-05, "loss": 0.0052, "step": 50560 }, { "epoch": 9.02, "learning_rate": 4.54932239657632e-05, "loss": 0.0069, "step": 50570 }, { "epoch": 9.02, "learning_rate": 4.549233238231099e-05, "loss": 0.0041, "step": 50580 }, { "epoch": 9.02, "learning_rate": 4.5491440798858774e-05, "loss": 0.003, "step": 50590 }, { "epoch": 9.02, "learning_rate": 4.5490549215406565e-05, "loss": 0.0047, "step": 50600 }, { "epoch": 9.02, "learning_rate": 4.548965763195435e-05, "loss": 0.0049, "step": 50610 }, { "epoch": 9.03, "learning_rate": 4.548876604850214e-05, "loss": 0.0032, "step": 50620 }, { "epoch": 9.03, "learning_rate": 4.548787446504993e-05, "loss": 0.0084, "step": 50630 }, { "epoch": 9.03, "learning_rate": 4.5486982881597717e-05, "loss": 0.004, "step": 50640 }, { "epoch": 9.03, "learning_rate": 4.5486091298145514e-05, "loss": 0.0034, "step": 50650 }, { "epoch": 9.03, "learning_rate": 4.54851997146933e-05, "loss": 0.0042, "step": 50660 }, { "epoch": 9.04, "learning_rate": 4.548430813124109e-05, "loss": 0.0036, "step": 50670 }, { "epoch": 9.04, "learning_rate": 4.5483416547788875e-05, "loss": 0.0035, "step": 50680 }, { "epoch": 9.04, "learning_rate": 4.5482524964336666e-05, "loss": 0.0019, "step": 50690 }, { "epoch": 9.04, "learning_rate": 4.548163338088445e-05, "loss": 0.0046, "step": 50700 }, { "epoch": 9.04, "learning_rate": 4.548074179743224e-05, "loss": 0.0061, "step": 50710 }, { "epoch": 9.04, "learning_rate": 4.547985021398003e-05, "loss": 0.0033, "step": 50720 }, { "epoch": 9.05, "learning_rate": 4.547895863052782e-05, "loss": 0.0033, "step": 50730 }, { "epoch": 9.05, "learning_rate": 4.547806704707561e-05, "loss": 0.003, "step": 50740 }, { "epoch": 9.05, "learning_rate": 4.547717546362339e-05, "loss": 0.0027, "step": 50750 }, { "epoch": 9.05, "learning_rate": 4.547628388017119e-05, "loss": 0.0025, "step": 50760 }, { "epoch": 9.05, "learning_rate": 4.5475392296718975e-05, "loss": 0.0032, "step": 50770 }, { "epoch": 9.05, "learning_rate": 4.5474500713266766e-05, "loss": 0.0063, "step": 50780 }, { "epoch": 9.06, "learning_rate": 4.547360912981455e-05, "loss": 0.0054, "step": 50790 }, { "epoch": 9.06, "learning_rate": 4.547271754636234e-05, "loss": 0.0045, "step": 50800 }, { "epoch": 9.06, "learning_rate": 4.547182596291013e-05, "loss": 0.0041, "step": 50810 }, { "epoch": 9.06, "learning_rate": 4.547093437945792e-05, "loss": 0.0029, "step": 50820 }, { "epoch": 9.06, "learning_rate": 4.547004279600571e-05, "loss": 0.0049, "step": 50830 }, { "epoch": 9.07, "learning_rate": 4.546915121255349e-05, "loss": 0.0037, "step": 50840 }, { "epoch": 9.07, "learning_rate": 4.5468259629101284e-05, "loss": 0.0042, "step": 50850 }, { "epoch": 9.07, "learning_rate": 4.5467368045649076e-05, "loss": 0.0058, "step": 50860 }, { "epoch": 9.07, "learning_rate": 4.546647646219687e-05, "loss": 0.0041, "step": 50870 }, { "epoch": 9.07, "learning_rate": 4.546558487874466e-05, "loss": 0.0033, "step": 50880 }, { "epoch": 9.07, "learning_rate": 4.546469329529244e-05, "loss": 0.0024, "step": 50890 }, { "epoch": 9.08, "learning_rate": 4.5463801711840234e-05, "loss": 0.003, "step": 50900 }, { "epoch": 9.08, "learning_rate": 4.546291012838802e-05, "loss": 0.0058, "step": 50910 }, { "epoch": 9.08, "learning_rate": 4.546201854493581e-05, "loss": 0.0049, "step": 50920 }, { "epoch": 9.08, "learning_rate": 4.5461126961483594e-05, "loss": 0.0027, "step": 50930 }, { "epoch": 9.08, "learning_rate": 4.5460235378031385e-05, "loss": 0.0053, "step": 50940 }, { "epoch": 9.09, "learning_rate": 4.5459343794579176e-05, "loss": 0.0031, "step": 50950 }, { "epoch": 9.09, "learning_rate": 4.545845221112696e-05, "loss": 0.0054, "step": 50960 }, { "epoch": 9.09, "learning_rate": 4.545756062767475e-05, "loss": 0.0025, "step": 50970 }, { "epoch": 9.09, "learning_rate": 4.545666904422254e-05, "loss": 0.005, "step": 50980 }, { "epoch": 9.09, "learning_rate": 4.5455777460770334e-05, "loss": 0.0048, "step": 50990 }, { "epoch": 9.09, "learning_rate": 4.545488587731812e-05, "loss": 0.0045, "step": 51000 }, { "epoch": 9.1, "learning_rate": 4.545399429386591e-05, "loss": 0.0049, "step": 51010 }, { "epoch": 9.1, "learning_rate": 4.5453102710413694e-05, "loss": 0.0047, "step": 51020 }, { "epoch": 9.1, "learning_rate": 4.5452211126961486e-05, "loss": 0.0038, "step": 51030 }, { "epoch": 9.1, "learning_rate": 4.545131954350928e-05, "loss": 0.0053, "step": 51040 }, { "epoch": 9.1, "learning_rate": 4.545042796005706e-05, "loss": 0.0062, "step": 51050 }, { "epoch": 9.1, "learning_rate": 4.544953637660485e-05, "loss": 0.0053, "step": 51060 }, { "epoch": 9.11, "learning_rate": 4.544864479315264e-05, "loss": 0.0062, "step": 51070 }, { "epoch": 9.11, "learning_rate": 4.544775320970043e-05, "loss": 0.0061, "step": 51080 }, { "epoch": 9.11, "learning_rate": 4.544686162624822e-05, "loss": 0.005, "step": 51090 }, { "epoch": 9.11, "learning_rate": 4.544597004279601e-05, "loss": 0.0039, "step": 51100 }, { "epoch": 9.11, "learning_rate": 4.54450784593438e-05, "loss": 0.0034, "step": 51110 }, { "epoch": 9.12, "learning_rate": 4.5444186875891586e-05, "loss": 0.0058, "step": 51120 }, { "epoch": 9.12, "learning_rate": 4.544329529243938e-05, "loss": 0.0041, "step": 51130 }, { "epoch": 9.12, "learning_rate": 4.544240370898716e-05, "loss": 0.0038, "step": 51140 }, { "epoch": 9.12, "learning_rate": 4.544151212553495e-05, "loss": 0.0043, "step": 51150 }, { "epoch": 9.12, "learning_rate": 4.544062054208274e-05, "loss": 0.0027, "step": 51160 }, { "epoch": 9.12, "learning_rate": 4.543972895863053e-05, "loss": 0.0029, "step": 51170 }, { "epoch": 9.13, "learning_rate": 4.543883737517832e-05, "loss": 0.0048, "step": 51180 }, { "epoch": 9.13, "learning_rate": 4.5437945791726104e-05, "loss": 0.0034, "step": 51190 }, { "epoch": 9.13, "learning_rate": 4.54370542082739e-05, "loss": 0.0036, "step": 51200 }, { "epoch": 9.13, "learning_rate": 4.543616262482169e-05, "loss": 0.0064, "step": 51210 }, { "epoch": 9.13, "learning_rate": 4.543527104136948e-05, "loss": 0.007, "step": 51220 }, { "epoch": 9.14, "learning_rate": 4.543437945791726e-05, "loss": 0.0049, "step": 51230 }, { "epoch": 9.14, "learning_rate": 4.5433487874465054e-05, "loss": 0.0026, "step": 51240 }, { "epoch": 9.14, "learning_rate": 4.543259629101284e-05, "loss": 0.0045, "step": 51250 }, { "epoch": 9.14, "learning_rate": 4.543170470756063e-05, "loss": 0.0033, "step": 51260 }, { "epoch": 9.14, "learning_rate": 4.543081312410842e-05, "loss": 0.0043, "step": 51270 }, { "epoch": 9.14, "learning_rate": 4.5429921540656205e-05, "loss": 0.0039, "step": 51280 }, { "epoch": 9.15, "learning_rate": 4.5429029957203996e-05, "loss": 0.0072, "step": 51290 }, { "epoch": 9.15, "learning_rate": 4.542813837375178e-05, "loss": 0.0033, "step": 51300 }, { "epoch": 9.15, "learning_rate": 4.542724679029958e-05, "loss": 0.0038, "step": 51310 }, { "epoch": 9.15, "learning_rate": 4.542635520684736e-05, "loss": 0.0045, "step": 51320 }, { "epoch": 9.15, "learning_rate": 4.5425463623395154e-05, "loss": 0.0031, "step": 51330 }, { "epoch": 9.15, "learning_rate": 4.5424572039942945e-05, "loss": 0.004, "step": 51340 }, { "epoch": 9.16, "learning_rate": 4.542368045649073e-05, "loss": 0.005, "step": 51350 }, { "epoch": 9.16, "learning_rate": 4.542278887303852e-05, "loss": 0.0046, "step": 51360 }, { "epoch": 9.16, "learning_rate": 4.5421897289586305e-05, "loss": 0.0032, "step": 51370 }, { "epoch": 9.16, "learning_rate": 4.54210057061341e-05, "loss": 0.0094, "step": 51380 }, { "epoch": 9.16, "learning_rate": 4.542011412268188e-05, "loss": 0.0046, "step": 51390 }, { "epoch": 9.17, "learning_rate": 4.541922253922967e-05, "loss": 0.003, "step": 51400 }, { "epoch": 9.17, "learning_rate": 4.5418330955777463e-05, "loss": 0.0069, "step": 51410 }, { "epoch": 9.17, "learning_rate": 4.5417439372325255e-05, "loss": 0.003, "step": 51420 }, { "epoch": 9.17, "learning_rate": 4.5416547788873046e-05, "loss": 0.004, "step": 51430 }, { "epoch": 9.17, "learning_rate": 4.541574536376605e-05, "loss": 0.0075, "step": 51440 }, { "epoch": 9.17, "learning_rate": 4.541485378031384e-05, "loss": 0.0027, "step": 51450 }, { "epoch": 9.18, "learning_rate": 4.541396219686162e-05, "loss": 0.0025, "step": 51460 }, { "epoch": 9.18, "learning_rate": 4.541307061340942e-05, "loss": 0.0056, "step": 51470 }, { "epoch": 9.18, "learning_rate": 4.5412179029957205e-05, "loss": 0.004, "step": 51480 }, { "epoch": 9.18, "learning_rate": 4.5411287446504996e-05, "loss": 0.0043, "step": 51490 }, { "epoch": 9.18, "learning_rate": 4.541039586305279e-05, "loss": 0.0059, "step": 51500 }, { "epoch": 9.19, "learning_rate": 4.540950427960057e-05, "loss": 0.0046, "step": 51510 }, { "epoch": 9.19, "learning_rate": 4.540861269614836e-05, "loss": 0.0042, "step": 51520 }, { "epoch": 9.19, "learning_rate": 4.540772111269615e-05, "loss": 0.0045, "step": 51530 }, { "epoch": 9.19, "learning_rate": 4.540682952924394e-05, "loss": 0.0033, "step": 51540 }, { "epoch": 9.19, "learning_rate": 4.540593794579172e-05, "loss": 0.0068, "step": 51550 }, { "epoch": 9.19, "learning_rate": 4.5405046362339515e-05, "loss": 0.0049, "step": 51560 }, { "epoch": 9.2, "learning_rate": 4.5404154778887306e-05, "loss": 0.0069, "step": 51570 }, { "epoch": 9.2, "learning_rate": 4.54032631954351e-05, "loss": 0.005, "step": 51580 }, { "epoch": 9.2, "learning_rate": 4.540237161198289e-05, "loss": 0.0038, "step": 51590 }, { "epoch": 9.2, "learning_rate": 4.540148002853067e-05, "loss": 0.0031, "step": 51600 }, { "epoch": 9.2, "learning_rate": 4.5400588445078464e-05, "loss": 0.0038, "step": 51610 }, { "epoch": 9.2, "learning_rate": 4.539969686162625e-05, "loss": 0.0037, "step": 51620 }, { "epoch": 9.21, "learning_rate": 4.539880527817404e-05, "loss": 0.0066, "step": 51630 }, { "epoch": 9.21, "learning_rate": 4.539791369472183e-05, "loss": 0.0042, "step": 51640 }, { "epoch": 9.21, "learning_rate": 4.5397022111269615e-05, "loss": 0.0035, "step": 51650 }, { "epoch": 9.21, "learning_rate": 4.5396130527817406e-05, "loss": 0.0027, "step": 51660 }, { "epoch": 9.21, "learning_rate": 4.539523894436519e-05, "loss": 0.0042, "step": 51670 }, { "epoch": 9.22, "learning_rate": 4.539434736091298e-05, "loss": 0.0045, "step": 51680 }, { "epoch": 9.22, "learning_rate": 4.539345577746077e-05, "loss": 0.0041, "step": 51690 }, { "epoch": 9.22, "learning_rate": 4.5392564194008564e-05, "loss": 0.0041, "step": 51700 }, { "epoch": 9.22, "learning_rate": 4.539167261055635e-05, "loss": 0.0038, "step": 51710 }, { "epoch": 9.22, "learning_rate": 4.539078102710414e-05, "loss": 0.003, "step": 51720 }, { "epoch": 9.22, "learning_rate": 4.538988944365193e-05, "loss": 0.0037, "step": 51730 }, { "epoch": 9.23, "learning_rate": 4.5388997860199716e-05, "loss": 0.0023, "step": 51740 }, { "epoch": 9.23, "learning_rate": 4.538810627674751e-05, "loss": 0.0026, "step": 51750 }, { "epoch": 9.23, "learning_rate": 4.538721469329529e-05, "loss": 0.0027, "step": 51760 }, { "epoch": 9.23, "learning_rate": 4.538632310984308e-05, "loss": 0.0024, "step": 51770 }, { "epoch": 9.23, "learning_rate": 4.538543152639087e-05, "loss": 0.005, "step": 51780 }, { "epoch": 9.24, "learning_rate": 4.538453994293866e-05, "loss": 0.0074, "step": 51790 }, { "epoch": 9.24, "learning_rate": 4.538364835948645e-05, "loss": 0.0054, "step": 51800 }, { "epoch": 9.24, "learning_rate": 4.538275677603424e-05, "loss": 0.0064, "step": 51810 }, { "epoch": 9.24, "learning_rate": 4.538186519258203e-05, "loss": 0.0031, "step": 51820 }, { "epoch": 9.24, "learning_rate": 4.5380973609129816e-05, "loss": 0.0038, "step": 51830 }, { "epoch": 9.24, "learning_rate": 4.538008202567761e-05, "loss": 0.0047, "step": 51840 }, { "epoch": 9.25, "learning_rate": 4.537919044222539e-05, "loss": 0.0061, "step": 51850 }, { "epoch": 9.25, "learning_rate": 4.537829885877318e-05, "loss": 0.0038, "step": 51860 }, { "epoch": 9.25, "learning_rate": 4.5377407275320974e-05, "loss": 0.0039, "step": 51870 }, { "epoch": 9.25, "learning_rate": 4.537651569186876e-05, "loss": 0.0041, "step": 51880 }, { "epoch": 9.25, "learning_rate": 4.537562410841655e-05, "loss": 0.0048, "step": 51890 }, { "epoch": 9.25, "learning_rate": 4.5374732524964334e-05, "loss": 0.0042, "step": 51900 }, { "epoch": 9.26, "learning_rate": 4.537384094151213e-05, "loss": 0.0055, "step": 51910 }, { "epoch": 9.26, "learning_rate": 4.537294935805992e-05, "loss": 0.0048, "step": 51920 }, { "epoch": 9.26, "learning_rate": 4.537205777460771e-05, "loss": 0.0035, "step": 51930 }, { "epoch": 9.26, "learning_rate": 4.537116619115549e-05, "loss": 0.0018, "step": 51940 }, { "epoch": 9.26, "learning_rate": 4.5370274607703284e-05, "loss": 0.004, "step": 51950 }, { "epoch": 9.27, "learning_rate": 4.5369383024251075e-05, "loss": 0.0071, "step": 51960 }, { "epoch": 9.27, "learning_rate": 4.536849144079886e-05, "loss": 0.0055, "step": 51970 }, { "epoch": 9.27, "learning_rate": 4.536759985734665e-05, "loss": 0.0048, "step": 51980 }, { "epoch": 9.27, "learning_rate": 4.5366708273894435e-05, "loss": 0.0028, "step": 51990 }, { "epoch": 9.27, "learning_rate": 4.5365816690442226e-05, "loss": 0.0056, "step": 52000 }, { "epoch": 9.27, "learning_rate": 4.536492510699001e-05, "loss": 0.0063, "step": 52010 }, { "epoch": 9.28, "learning_rate": 4.536403352353781e-05, "loss": 0.0038, "step": 52020 }, { "epoch": 9.28, "learning_rate": 4.536314194008559e-05, "loss": 0.006, "step": 52030 }, { "epoch": 9.28, "learning_rate": 4.5362250356633384e-05, "loss": 0.0029, "step": 52040 }, { "epoch": 9.28, "learning_rate": 4.5361358773181175e-05, "loss": 0.0042, "step": 52050 }, { "epoch": 9.28, "learning_rate": 4.536046718972896e-05, "loss": 0.0039, "step": 52060 }, { "epoch": 9.28, "learning_rate": 4.535957560627675e-05, "loss": 0.0049, "step": 52070 }, { "epoch": 9.29, "learning_rate": 4.5358684022824536e-05, "loss": 0.0037, "step": 52080 }, { "epoch": 9.29, "learning_rate": 4.535779243937233e-05, "loss": 0.0026, "step": 52090 }, { "epoch": 9.29, "learning_rate": 4.535690085592012e-05, "loss": 0.003, "step": 52100 }, { "epoch": 9.29, "learning_rate": 4.53560092724679e-05, "loss": 0.002, "step": 52110 }, { "epoch": 9.29, "learning_rate": 4.5355117689015694e-05, "loss": 0.0014, "step": 52120 }, { "epoch": 9.3, "learning_rate": 4.5354226105563485e-05, "loss": 0.003, "step": 52130 }, { "epoch": 9.3, "learning_rate": 4.5353334522111276e-05, "loss": 0.0054, "step": 52140 }, { "epoch": 9.3, "learning_rate": 4.535244293865906e-05, "loss": 0.0074, "step": 52150 }, { "epoch": 9.3, "learning_rate": 4.535155135520685e-05, "loss": 0.0054, "step": 52160 }, { "epoch": 9.3, "learning_rate": 4.5350659771754636e-05, "loss": 0.0119, "step": 52170 }, { "epoch": 9.3, "learning_rate": 4.534976818830243e-05, "loss": 0.0043, "step": 52180 }, { "epoch": 9.31, "learning_rate": 4.534887660485022e-05, "loss": 0.0057, "step": 52190 }, { "epoch": 9.31, "learning_rate": 4.5347985021398e-05, "loss": 0.0059, "step": 52200 }, { "epoch": 9.31, "learning_rate": 4.5347093437945794e-05, "loss": 0.0052, "step": 52210 }, { "epoch": 9.31, "learning_rate": 4.534620185449358e-05, "loss": 0.0028, "step": 52220 }, { "epoch": 9.31, "learning_rate": 4.534531027104137e-05, "loss": 0.0049, "step": 52230 }, { "epoch": 9.32, "learning_rate": 4.534441868758916e-05, "loss": 0.008, "step": 52240 }, { "epoch": 9.32, "learning_rate": 4.534352710413695e-05, "loss": 0.0047, "step": 52250 }, { "epoch": 9.32, "learning_rate": 4.534263552068474e-05, "loss": 0.0026, "step": 52260 }, { "epoch": 9.32, "learning_rate": 4.534174393723253e-05, "loss": 0.0059, "step": 52270 }, { "epoch": 9.32, "learning_rate": 4.534085235378032e-05, "loss": 0.0054, "step": 52280 }, { "epoch": 9.32, "learning_rate": 4.5339960770328104e-05, "loss": 0.0038, "step": 52290 }, { "epoch": 9.33, "learning_rate": 4.5339069186875895e-05, "loss": 0.0036, "step": 52300 }, { "epoch": 9.33, "learning_rate": 4.533817760342368e-05, "loss": 0.0027, "step": 52310 }, { "epoch": 9.33, "learning_rate": 4.533728601997147e-05, "loss": 0.0078, "step": 52320 }, { "epoch": 9.33, "learning_rate": 4.533639443651926e-05, "loss": 0.0018, "step": 52330 }, { "epoch": 9.33, "learning_rate": 4.5335502853067046e-05, "loss": 0.0033, "step": 52340 }, { "epoch": 9.33, "learning_rate": 4.5334611269614844e-05, "loss": 0.0049, "step": 52350 }, { "epoch": 9.34, "learning_rate": 4.533371968616263e-05, "loss": 0.0044, "step": 52360 }, { "epoch": 9.34, "learning_rate": 4.533282810271042e-05, "loss": 0.0064, "step": 52370 }, { "epoch": 9.34, "learning_rate": 4.5331936519258204e-05, "loss": 0.0094, "step": 52380 }, { "epoch": 9.34, "learning_rate": 4.5331044935805995e-05, "loss": 0.0044, "step": 52390 }, { "epoch": 9.34, "learning_rate": 4.533015335235378e-05, "loss": 0.0051, "step": 52400 }, { "epoch": 9.35, "learning_rate": 4.532926176890157e-05, "loss": 0.0051, "step": 52410 }, { "epoch": 9.35, "learning_rate": 4.532837018544936e-05, "loss": 0.0034, "step": 52420 }, { "epoch": 9.35, "learning_rate": 4.5327478601997147e-05, "loss": 0.0042, "step": 52430 }, { "epoch": 9.35, "learning_rate": 4.532658701854494e-05, "loss": 0.0054, "step": 52440 }, { "epoch": 9.35, "learning_rate": 4.532569543509272e-05, "loss": 0.0032, "step": 52450 }, { "epoch": 9.35, "learning_rate": 4.532480385164052e-05, "loss": 0.0045, "step": 52460 }, { "epoch": 9.36, "learning_rate": 4.5323912268188305e-05, "loss": 0.0056, "step": 52470 }, { "epoch": 9.36, "learning_rate": 4.5323020684736096e-05, "loss": 0.0049, "step": 52480 }, { "epoch": 9.36, "learning_rate": 4.532212910128388e-05, "loss": 0.005, "step": 52490 }, { "epoch": 9.36, "learning_rate": 4.532123751783167e-05, "loss": 0.0053, "step": 52500 }, { "epoch": 9.36, "learning_rate": 4.532034593437946e-05, "loss": 0.002, "step": 52510 }, { "epoch": 9.37, "learning_rate": 4.531945435092725e-05, "loss": 0.0028, "step": 52520 }, { "epoch": 9.37, "learning_rate": 4.531856276747504e-05, "loss": 0.0038, "step": 52530 }, { "epoch": 9.37, "learning_rate": 4.531767118402282e-05, "loss": 0.0045, "step": 52540 }, { "epoch": 9.37, "learning_rate": 4.5316779600570614e-05, "loss": 0.0054, "step": 52550 }, { "epoch": 9.37, "learning_rate": 4.5315888017118405e-05, "loss": 0.0034, "step": 52560 }, { "epoch": 9.37, "learning_rate": 4.5314996433666196e-05, "loss": 0.0039, "step": 52570 }, { "epoch": 9.38, "learning_rate": 4.531410485021398e-05, "loss": 0.0047, "step": 52580 }, { "epoch": 9.38, "learning_rate": 4.531321326676177e-05, "loss": 0.0099, "step": 52590 }, { "epoch": 9.38, "learning_rate": 4.531232168330956e-05, "loss": 0.0029, "step": 52600 }, { "epoch": 9.38, "learning_rate": 4.531143009985735e-05, "loss": 0.0044, "step": 52610 }, { "epoch": 9.38, "learning_rate": 4.531053851640514e-05, "loss": 0.0031, "step": 52620 }, { "epoch": 9.38, "learning_rate": 4.530964693295292e-05, "loss": 0.0075, "step": 52630 }, { "epoch": 9.39, "learning_rate": 4.5308755349500715e-05, "loss": 0.0036, "step": 52640 }, { "epoch": 9.39, "learning_rate": 4.5307863766048506e-05, "loss": 0.0061, "step": 52650 }, { "epoch": 9.39, "learning_rate": 4.530697218259629e-05, "loss": 0.0043, "step": 52660 }, { "epoch": 9.39, "learning_rate": 4.530608059914408e-05, "loss": 0.0049, "step": 52670 }, { "epoch": 9.39, "learning_rate": 4.530518901569187e-05, "loss": 0.0032, "step": 52680 }, { "epoch": 9.4, "learning_rate": 4.5304297432239664e-05, "loss": 0.0045, "step": 52690 }, { "epoch": 9.4, "learning_rate": 4.530340584878745e-05, "loss": 0.0084, "step": 52700 }, { "epoch": 9.4, "learning_rate": 4.530251426533524e-05, "loss": 0.0046, "step": 52710 }, { "epoch": 9.4, "learning_rate": 4.5301622681883024e-05, "loss": 0.0031, "step": 52720 }, { "epoch": 9.4, "learning_rate": 4.5300731098430815e-05, "loss": 0.0032, "step": 52730 }, { "epoch": 9.4, "learning_rate": 4.5299839514978606e-05, "loss": 0.0059, "step": 52740 }, { "epoch": 9.41, "learning_rate": 4.529894793152639e-05, "loss": 0.0067, "step": 52750 }, { "epoch": 9.41, "learning_rate": 4.529805634807418e-05, "loss": 0.0036, "step": 52760 }, { "epoch": 9.41, "learning_rate": 4.5297164764621966e-05, "loss": 0.0035, "step": 52770 }, { "epoch": 9.41, "learning_rate": 4.529627318116976e-05, "loss": 0.0054, "step": 52780 }, { "epoch": 9.41, "learning_rate": 4.529538159771755e-05, "loss": 0.006, "step": 52790 }, { "epoch": 9.42, "learning_rate": 4.529449001426534e-05, "loss": 0.003, "step": 52800 }, { "epoch": 9.42, "learning_rate": 4.5293598430813124e-05, "loss": 0.0038, "step": 52810 }, { "epoch": 9.42, "learning_rate": 4.5292706847360916e-05, "loss": 0.0043, "step": 52820 }, { "epoch": 9.42, "learning_rate": 4.529181526390871e-05, "loss": 0.0046, "step": 52830 }, { "epoch": 9.42, "learning_rate": 4.529092368045649e-05, "loss": 0.0029, "step": 52840 }, { "epoch": 9.42, "learning_rate": 4.529003209700428e-05, "loss": 0.0049, "step": 52850 }, { "epoch": 9.43, "learning_rate": 4.528914051355207e-05, "loss": 0.0053, "step": 52860 }, { "epoch": 9.43, "learning_rate": 4.528824893009986e-05, "loss": 0.0083, "step": 52870 }, { "epoch": 9.43, "learning_rate": 4.528735734664765e-05, "loss": 0.0049, "step": 52880 }, { "epoch": 9.43, "learning_rate": 4.5286465763195434e-05, "loss": 0.0039, "step": 52890 }, { "epoch": 9.43, "learning_rate": 4.528557417974323e-05, "loss": 0.0032, "step": 52900 }, { "epoch": 9.43, "learning_rate": 4.5284682596291016e-05, "loss": 0.0052, "step": 52910 }, { "epoch": 9.44, "learning_rate": 4.528379101283881e-05, "loss": 0.0034, "step": 52920 }, { "epoch": 9.44, "learning_rate": 4.528289942938659e-05, "loss": 0.0071, "step": 52930 }, { "epoch": 9.44, "learning_rate": 4.528200784593438e-05, "loss": 0.0041, "step": 52940 }, { "epoch": 9.44, "learning_rate": 4.528111626248217e-05, "loss": 0.006, "step": 52950 }, { "epoch": 9.44, "learning_rate": 4.528022467902996e-05, "loss": 0.0037, "step": 52960 }, { "epoch": 9.45, "learning_rate": 4.527933309557775e-05, "loss": 0.0036, "step": 52970 }, { "epoch": 9.45, "learning_rate": 4.5278441512125534e-05, "loss": 0.0036, "step": 52980 }, { "epoch": 9.45, "learning_rate": 4.5277549928673326e-05, "loss": 0.0054, "step": 52990 }, { "epoch": 9.45, "learning_rate": 4.527665834522111e-05, "loss": 0.0033, "step": 53000 }, { "epoch": 9.45, "learning_rate": 4.527576676176891e-05, "loss": 0.0028, "step": 53010 }, { "epoch": 9.45, "learning_rate": 4.527487517831669e-05, "loss": 0.0044, "step": 53020 }, { "epoch": 9.46, "learning_rate": 4.5273983594864484e-05, "loss": 0.0059, "step": 53030 }, { "epoch": 9.46, "learning_rate": 4.527309201141227e-05, "loss": 0.0062, "step": 53040 }, { "epoch": 9.46, "learning_rate": 4.527220042796006e-05, "loss": 0.0047, "step": 53050 }, { "epoch": 9.46, "learning_rate": 4.527130884450785e-05, "loss": 0.0062, "step": 53060 }, { "epoch": 9.46, "learning_rate": 4.5270417261055635e-05, "loss": 0.0036, "step": 53070 }, { "epoch": 9.47, "learning_rate": 4.5269525677603426e-05, "loss": 0.0038, "step": 53080 }, { "epoch": 9.47, "learning_rate": 4.526863409415121e-05, "loss": 0.0074, "step": 53090 }, { "epoch": 9.47, "learning_rate": 4.5267742510699e-05, "loss": 0.0039, "step": 53100 }, { "epoch": 9.47, "learning_rate": 4.526685092724679e-05, "loss": 0.0029, "step": 53110 }, { "epoch": 9.47, "learning_rate": 4.5265959343794584e-05, "loss": 0.004, "step": 53120 }, { "epoch": 9.47, "learning_rate": 4.5265067760342375e-05, "loss": 0.003, "step": 53130 }, { "epoch": 9.48, "learning_rate": 4.526417617689016e-05, "loss": 0.0033, "step": 53140 }, { "epoch": 9.48, "learning_rate": 4.526328459343795e-05, "loss": 0.0055, "step": 53150 }, { "epoch": 9.48, "learning_rate": 4.5262393009985735e-05, "loss": 0.0037, "step": 53160 }, { "epoch": 9.48, "learning_rate": 4.526150142653353e-05, "loss": 0.0042, "step": 53170 }, { "epoch": 9.48, "learning_rate": 4.526060984308131e-05, "loss": 0.0032, "step": 53180 }, { "epoch": 9.48, "learning_rate": 4.52597182596291e-05, "loss": 0.0041, "step": 53190 }, { "epoch": 9.49, "learning_rate": 4.5258826676176894e-05, "loss": 0.0036, "step": 53200 }, { "epoch": 9.49, "learning_rate": 4.525793509272468e-05, "loss": 0.0052, "step": 53210 }, { "epoch": 9.49, "learning_rate": 4.525704350927247e-05, "loss": 0.0074, "step": 53220 }, { "epoch": 9.49, "learning_rate": 4.525615192582026e-05, "loss": 0.0046, "step": 53230 }, { "epoch": 9.49, "learning_rate": 4.525526034236805e-05, "loss": 0.0036, "step": 53240 }, { "epoch": 9.5, "learning_rate": 4.5254368758915836e-05, "loss": 0.002, "step": 53250 }, { "epoch": 9.5, "learning_rate": 4.525347717546363e-05, "loss": 0.0047, "step": 53260 }, { "epoch": 9.5, "learning_rate": 4.525258559201141e-05, "loss": 0.004, "step": 53270 }, { "epoch": 9.5, "learning_rate": 4.52516940085592e-05, "loss": 0.0055, "step": 53280 }, { "epoch": 9.5, "learning_rate": 4.5250802425106994e-05, "loss": 0.0047, "step": 53290 }, { "epoch": 9.5, "learning_rate": 4.524991084165478e-05, "loss": 0.0041, "step": 53300 }, { "epoch": 9.51, "learning_rate": 4.524901925820257e-05, "loss": 0.0055, "step": 53310 }, { "epoch": 9.51, "learning_rate": 4.5248127674750354e-05, "loss": 0.0041, "step": 53320 }, { "epoch": 9.51, "learning_rate": 4.5247236091298145e-05, "loss": 0.0032, "step": 53330 }, { "epoch": 9.51, "learning_rate": 4.5246344507845937e-05, "loss": 0.0028, "step": 53340 }, { "epoch": 9.51, "learning_rate": 4.524545292439373e-05, "loss": 0.0048, "step": 53350 }, { "epoch": 9.51, "learning_rate": 4.524456134094152e-05, "loss": 0.0066, "step": 53360 }, { "epoch": 9.52, "learning_rate": 4.5243669757489303e-05, "loss": 0.0031, "step": 53370 }, { "epoch": 9.52, "learning_rate": 4.5242778174037095e-05, "loss": 0.0039, "step": 53380 }, { "epoch": 9.52, "learning_rate": 4.524188659058488e-05, "loss": 0.0041, "step": 53390 }, { "epoch": 9.52, "learning_rate": 4.524099500713267e-05, "loss": 0.004, "step": 53400 }, { "epoch": 9.52, "learning_rate": 4.5240103423680455e-05, "loss": 0.0065, "step": 53410 }, { "epoch": 9.53, "learning_rate": 4.5239211840228246e-05, "loss": 0.0064, "step": 53420 }, { "epoch": 9.53, "learning_rate": 4.523832025677604e-05, "loss": 0.0035, "step": 53430 }, { "epoch": 9.53, "learning_rate": 4.523742867332382e-05, "loss": 0.0031, "step": 53440 }, { "epoch": 9.53, "learning_rate": 4.523653708987162e-05, "loss": 0.0032, "step": 53450 }, { "epoch": 9.53, "learning_rate": 4.5235645506419404e-05, "loss": 0.006, "step": 53460 }, { "epoch": 9.53, "learning_rate": 4.5234753922967195e-05, "loss": 0.0044, "step": 53470 }, { "epoch": 9.54, "learning_rate": 4.523386233951498e-05, "loss": 0.005, "step": 53480 }, { "epoch": 9.54, "learning_rate": 4.523297075606277e-05, "loss": 0.0042, "step": 53490 }, { "epoch": 9.54, "learning_rate": 4.5232079172610555e-05, "loss": 0.0051, "step": 53500 }, { "epoch": 9.54, "learning_rate": 4.5231187589158347e-05, "loss": 0.0033, "step": 53510 }, { "epoch": 9.54, "learning_rate": 4.523029600570614e-05, "loss": 0.004, "step": 53520 }, { "epoch": 9.55, "learning_rate": 4.522940442225392e-05, "loss": 0.0028, "step": 53530 }, { "epoch": 9.55, "learning_rate": 4.522851283880171e-05, "loss": 0.0027, "step": 53540 }, { "epoch": 9.55, "learning_rate": 4.52276212553495e-05, "loss": 0.0045, "step": 53550 }, { "epoch": 9.55, "learning_rate": 4.5226729671897296e-05, "loss": 0.0034, "step": 53560 }, { "epoch": 9.55, "learning_rate": 4.522583808844508e-05, "loss": 0.004, "step": 53570 }, { "epoch": 9.55, "learning_rate": 4.522494650499287e-05, "loss": 0.003, "step": 53580 }, { "epoch": 9.56, "learning_rate": 4.522405492154066e-05, "loss": 0.0045, "step": 53590 }, { "epoch": 9.56, "learning_rate": 4.522316333808845e-05, "loss": 0.004, "step": 53600 }, { "epoch": 9.56, "learning_rate": 4.522227175463624e-05, "loss": 0.0036, "step": 53610 }, { "epoch": 9.56, "learning_rate": 4.522138017118402e-05, "loss": 0.0031, "step": 53620 }, { "epoch": 9.56, "learning_rate": 4.5220488587731814e-05, "loss": 0.0044, "step": 53630 }, { "epoch": 9.56, "learning_rate": 4.52195970042796e-05, "loss": 0.004, "step": 53640 }, { "epoch": 9.57, "learning_rate": 4.521870542082739e-05, "loss": 0.0045, "step": 53650 }, { "epoch": 9.57, "learning_rate": 4.521781383737518e-05, "loss": 0.0047, "step": 53660 }, { "epoch": 9.57, "learning_rate": 4.521692225392297e-05, "loss": 0.0045, "step": 53670 }, { "epoch": 9.57, "learning_rate": 4.521603067047076e-05, "loss": 0.0043, "step": 53680 }, { "epoch": 9.57, "learning_rate": 4.521513908701855e-05, "loss": 0.0043, "step": 53690 }, { "epoch": 9.58, "learning_rate": 4.521424750356634e-05, "loss": 0.0042, "step": 53700 }, { "epoch": 9.58, "learning_rate": 4.521335592011412e-05, "loss": 0.0037, "step": 53710 }, { "epoch": 9.58, "learning_rate": 4.5212464336661914e-05, "loss": 0.0036, "step": 53720 }, { "epoch": 9.58, "learning_rate": 4.52115727532097e-05, "loss": 0.002, "step": 53730 }, { "epoch": 9.58, "learning_rate": 4.521068116975749e-05, "loss": 0.0047, "step": 53740 }, { "epoch": 9.58, "learning_rate": 4.520978958630528e-05, "loss": 0.0063, "step": 53750 }, { "epoch": 9.59, "learning_rate": 4.5208898002853066e-05, "loss": 0.0046, "step": 53760 }, { "epoch": 9.59, "learning_rate": 4.520800641940086e-05, "loss": 0.0052, "step": 53770 }, { "epoch": 9.59, "learning_rate": 4.520711483594865e-05, "loss": 0.0037, "step": 53780 }, { "epoch": 9.59, "learning_rate": 4.520622325249644e-05, "loss": 0.0032, "step": 53790 }, { "epoch": 9.59, "learning_rate": 4.5205331669044224e-05, "loss": 0.0056, "step": 53800 }, { "epoch": 9.6, "learning_rate": 4.5204440085592015e-05, "loss": 0.0031, "step": 53810 }, { "epoch": 9.6, "learning_rate": 4.5203548502139806e-05, "loss": 0.0036, "step": 53820 }, { "epoch": 9.6, "learning_rate": 4.520265691868759e-05, "loss": 0.0046, "step": 53830 }, { "epoch": 9.6, "learning_rate": 4.520176533523538e-05, "loss": 0.0062, "step": 53840 }, { "epoch": 9.6, "learning_rate": 4.5200873751783166e-05, "loss": 0.0041, "step": 53850 }, { "epoch": 9.6, "learning_rate": 4.519998216833096e-05, "loss": 0.0046, "step": 53860 }, { "epoch": 9.61, "learning_rate": 4.519909058487874e-05, "loss": 0.0056, "step": 53870 }, { "epoch": 9.61, "learning_rate": 4.519819900142653e-05, "loss": 0.0044, "step": 53880 }, { "epoch": 9.61, "learning_rate": 4.5197307417974324e-05, "loss": 0.0053, "step": 53890 }, { "epoch": 9.61, "learning_rate": 4.5196415834522116e-05, "loss": 0.0039, "step": 53900 }, { "epoch": 9.61, "learning_rate": 4.519552425106991e-05, "loss": 0.0027, "step": 53910 }, { "epoch": 9.61, "learning_rate": 4.519463266761769e-05, "loss": 0.0069, "step": 53920 }, { "epoch": 9.62, "learning_rate": 4.519374108416548e-05, "loss": 0.006, "step": 53930 }, { "epoch": 9.62, "learning_rate": 4.519284950071327e-05, "loss": 0.0058, "step": 53940 }, { "epoch": 9.62, "learning_rate": 4.519195791726106e-05, "loss": 0.0033, "step": 53950 }, { "epoch": 9.62, "learning_rate": 4.519106633380884e-05, "loss": 0.0029, "step": 53960 }, { "epoch": 9.62, "learning_rate": 4.5190174750356634e-05, "loss": 0.0041, "step": 53970 }, { "epoch": 9.63, "learning_rate": 4.5189283166904425e-05, "loss": 0.0038, "step": 53980 }, { "epoch": 9.63, "learning_rate": 4.518839158345221e-05, "loss": 0.0029, "step": 53990 }, { "epoch": 9.63, "learning_rate": 4.518750000000001e-05, "loss": 0.0042, "step": 54000 }, { "epoch": 9.63, "learning_rate": 4.518660841654779e-05, "loss": 0.0057, "step": 54010 }, { "epoch": 9.63, "learning_rate": 4.518571683309558e-05, "loss": 0.0033, "step": 54020 }, { "epoch": 9.63, "learning_rate": 4.518482524964337e-05, "loss": 0.004, "step": 54030 }, { "epoch": 9.64, "learning_rate": 4.518393366619116e-05, "loss": 0.0075, "step": 54040 }, { "epoch": 9.64, "learning_rate": 4.518304208273895e-05, "loss": 0.0038, "step": 54050 }, { "epoch": 9.64, "learning_rate": 4.5182150499286734e-05, "loss": 0.0035, "step": 54060 }, { "epoch": 9.64, "learning_rate": 4.5181258915834526e-05, "loss": 0.0048, "step": 54070 }, { "epoch": 9.64, "learning_rate": 4.518036733238231e-05, "loss": 0.005, "step": 54080 }, { "epoch": 9.65, "learning_rate": 4.51794757489301e-05, "loss": 0.0048, "step": 54090 }, { "epoch": 9.65, "learning_rate": 4.5178584165477886e-05, "loss": 0.0061, "step": 54100 }, { "epoch": 9.65, "learning_rate": 4.5177692582025684e-05, "loss": 0.0068, "step": 54110 }, { "epoch": 9.65, "learning_rate": 4.517680099857347e-05, "loss": 0.004, "step": 54120 }, { "epoch": 9.65, "learning_rate": 4.517590941512126e-05, "loss": 0.0051, "step": 54130 }, { "epoch": 9.65, "learning_rate": 4.517501783166905e-05, "loss": 0.0042, "step": 54140 }, { "epoch": 9.66, "learning_rate": 4.5174126248216835e-05, "loss": 0.0041, "step": 54150 }, { "epoch": 9.66, "learning_rate": 4.5173234664764626e-05, "loss": 0.0056, "step": 54160 }, { "epoch": 9.66, "learning_rate": 4.517234308131241e-05, "loss": 0.0045, "step": 54170 }, { "epoch": 9.66, "learning_rate": 4.51714514978602e-05, "loss": 0.003, "step": 54180 }, { "epoch": 9.66, "learning_rate": 4.5170559914407986e-05, "loss": 0.0021, "step": 54190 }, { "epoch": 9.66, "learning_rate": 4.516966833095578e-05, "loss": 0.0045, "step": 54200 }, { "epoch": 9.67, "learning_rate": 4.516877674750357e-05, "loss": 0.0041, "step": 54210 }, { "epoch": 9.67, "learning_rate": 4.516788516405136e-05, "loss": 0.0047, "step": 54220 }, { "epoch": 9.67, "learning_rate": 4.516699358059915e-05, "loss": 0.0042, "step": 54230 }, { "epoch": 9.67, "learning_rate": 4.5166101997146935e-05, "loss": 0.0026, "step": 54240 }, { "epoch": 9.67, "learning_rate": 4.516521041369473e-05, "loss": 0.0032, "step": 54250 }, { "epoch": 9.68, "learning_rate": 4.516431883024251e-05, "loss": 0.0044, "step": 54260 }, { "epoch": 9.68, "learning_rate": 4.51634272467903e-05, "loss": 0.0063, "step": 54270 }, { "epoch": 9.68, "learning_rate": 4.5162535663338094e-05, "loss": 0.0048, "step": 54280 }, { "epoch": 9.68, "learning_rate": 4.516164407988588e-05, "loss": 0.0044, "step": 54290 }, { "epoch": 9.68, "learning_rate": 4.516075249643367e-05, "loss": 0.0055, "step": 54300 }, { "epoch": 9.68, "learning_rate": 4.5159860912981454e-05, "loss": 0.0082, "step": 54310 }, { "epoch": 9.69, "learning_rate": 4.5158969329529245e-05, "loss": 0.0057, "step": 54320 }, { "epoch": 9.69, "learning_rate": 4.5158077746077036e-05, "loss": 0.0052, "step": 54330 }, { "epoch": 9.69, "learning_rate": 4.515718616262483e-05, "loss": 0.0049, "step": 54340 }, { "epoch": 9.69, "learning_rate": 4.515629457917261e-05, "loss": 0.0075, "step": 54350 }, { "epoch": 9.69, "learning_rate": 4.51554029957204e-05, "loss": 0.0045, "step": 54360 }, { "epoch": 9.7, "learning_rate": 4.5154511412268194e-05, "loss": 0.0056, "step": 54370 }, { "epoch": 9.7, "learning_rate": 4.515361982881598e-05, "loss": 0.0037, "step": 54380 }, { "epoch": 9.7, "learning_rate": 4.515272824536377e-05, "loss": 0.0069, "step": 54390 }, { "epoch": 9.7, "learning_rate": 4.5151836661911554e-05, "loss": 0.0054, "step": 54400 }, { "epoch": 9.7, "learning_rate": 4.5150945078459345e-05, "loss": 0.0054, "step": 54410 }, { "epoch": 9.7, "learning_rate": 4.515005349500713e-05, "loss": 0.0055, "step": 54420 }, { "epoch": 9.71, "learning_rate": 4.514916191155492e-05, "loss": 0.0046, "step": 54430 }, { "epoch": 9.71, "learning_rate": 4.514827032810271e-05, "loss": 0.0067, "step": 54440 }, { "epoch": 9.71, "learning_rate": 4.5147378744650503e-05, "loss": 0.0065, "step": 54450 }, { "epoch": 9.71, "learning_rate": 4.5146487161198295e-05, "loss": 0.0038, "step": 54460 }, { "epoch": 9.71, "learning_rate": 4.514559557774608e-05, "loss": 0.0032, "step": 54470 }, { "epoch": 9.71, "learning_rate": 4.514470399429387e-05, "loss": 0.0025, "step": 54480 }, { "epoch": 9.72, "learning_rate": 4.5143812410841655e-05, "loss": 0.0049, "step": 54490 }, { "epoch": 9.72, "learning_rate": 4.5142920827389446e-05, "loss": 0.0046, "step": 54500 }, { "epoch": 9.72, "learning_rate": 4.514202924393724e-05, "loss": 0.006, "step": 54510 }, { "epoch": 9.72, "learning_rate": 4.514113766048502e-05, "loss": 0.0039, "step": 54520 }, { "epoch": 9.72, "learning_rate": 4.514024607703281e-05, "loss": 0.0024, "step": 54530 }, { "epoch": 9.73, "learning_rate": 4.51393544935806e-05, "loss": 0.0029, "step": 54540 }, { "epoch": 9.73, "learning_rate": 4.5138462910128395e-05, "loss": 0.0031, "step": 54550 }, { "epoch": 9.73, "learning_rate": 4.513757132667618e-05, "loss": 0.0035, "step": 54560 }, { "epoch": 9.73, "learning_rate": 4.513667974322397e-05, "loss": 0.0032, "step": 54570 }, { "epoch": 9.73, "learning_rate": 4.5135788159771755e-05, "loss": 0.0032, "step": 54580 }, { "epoch": 9.73, "learning_rate": 4.5134896576319546e-05, "loss": 0.0054, "step": 54590 }, { "epoch": 9.74, "learning_rate": 4.513400499286734e-05, "loss": 0.0066, "step": 54600 }, { "epoch": 9.74, "learning_rate": 4.513311340941512e-05, "loss": 0.0037, "step": 54610 }, { "epoch": 9.74, "learning_rate": 4.513222182596291e-05, "loss": 0.0048, "step": 54620 }, { "epoch": 9.74, "learning_rate": 4.51313302425107e-05, "loss": 0.0059, "step": 54630 }, { "epoch": 9.74, "learning_rate": 4.513043865905849e-05, "loss": 0.0059, "step": 54640 }, { "epoch": 9.75, "learning_rate": 4.5129547075606273e-05, "loss": 0.0079, "step": 54650 }, { "epoch": 9.75, "learning_rate": 4.512865549215407e-05, "loss": 0.0038, "step": 54660 }, { "epoch": 9.75, "learning_rate": 4.5127763908701856e-05, "loss": 0.0048, "step": 54670 }, { "epoch": 9.75, "learning_rate": 4.512687232524965e-05, "loss": 0.0029, "step": 54680 }, { "epoch": 9.75, "learning_rate": 4.512598074179744e-05, "loss": 0.0053, "step": 54690 }, { "epoch": 9.75, "learning_rate": 4.512508915834522e-05, "loss": 0.0049, "step": 54700 }, { "epoch": 9.76, "learning_rate": 4.5124197574893014e-05, "loss": 0.004, "step": 54710 }, { "epoch": 9.76, "learning_rate": 4.51233059914408e-05, "loss": 0.0028, "step": 54720 }, { "epoch": 9.76, "learning_rate": 4.512241440798859e-05, "loss": 0.005, "step": 54730 }, { "epoch": 9.76, "learning_rate": 4.512152282453638e-05, "loss": 0.0074, "step": 54740 }, { "epoch": 9.76, "learning_rate": 4.5120631241084165e-05, "loss": 0.004, "step": 54750 }, { "epoch": 9.76, "learning_rate": 4.5119739657631956e-05, "loss": 0.0047, "step": 54760 }, { "epoch": 9.77, "learning_rate": 4.511884807417975e-05, "loss": 0.0069, "step": 54770 }, { "epoch": 9.77, "learning_rate": 4.511795649072754e-05, "loss": 0.0049, "step": 54780 }, { "epoch": 9.77, "learning_rate": 4.511706490727532e-05, "loss": 0.0049, "step": 54790 }, { "epoch": 9.77, "learning_rate": 4.5116173323823114e-05, "loss": 0.0028, "step": 54800 }, { "epoch": 9.77, "learning_rate": 4.51152817403709e-05, "loss": 0.0047, "step": 54810 }, { "epoch": 9.78, "learning_rate": 4.511439015691869e-05, "loss": 0.0041, "step": 54820 }, { "epoch": 9.78, "learning_rate": 4.511349857346648e-05, "loss": 0.0051, "step": 54830 }, { "epoch": 9.78, "learning_rate": 4.5112606990014266e-05, "loss": 0.0034, "step": 54840 }, { "epoch": 9.78, "learning_rate": 4.511171540656206e-05, "loss": 0.0037, "step": 54850 }, { "epoch": 9.78, "learning_rate": 4.511082382310984e-05, "loss": 0.0036, "step": 54860 }, { "epoch": 9.78, "learning_rate": 4.510993223965763e-05, "loss": 0.0031, "step": 54870 }, { "epoch": 9.79, "learning_rate": 4.5109040656205424e-05, "loss": 0.0042, "step": 54880 }, { "epoch": 9.79, "learning_rate": 4.5108149072753215e-05, "loss": 0.0039, "step": 54890 }, { "epoch": 9.79, "learning_rate": 4.5107257489301e-05, "loss": 0.0033, "step": 54900 }, { "epoch": 9.79, "learning_rate": 4.510636590584879e-05, "loss": 0.0047, "step": 54910 }, { "epoch": 9.79, "learning_rate": 4.510547432239658e-05, "loss": 0.0026, "step": 54920 }, { "epoch": 9.79, "learning_rate": 4.5104582738944366e-05, "loss": 0.0043, "step": 54930 }, { "epoch": 9.8, "learning_rate": 4.510369115549216e-05, "loss": 0.0052, "step": 54940 }, { "epoch": 9.8, "learning_rate": 4.510279957203994e-05, "loss": 0.0067, "step": 54950 }, { "epoch": 9.8, "learning_rate": 4.510190798858773e-05, "loss": 0.0027, "step": 54960 }, { "epoch": 9.8, "learning_rate": 4.5101016405135524e-05, "loss": 0.0042, "step": 54970 }, { "epoch": 9.8, "learning_rate": 4.510012482168331e-05, "loss": 0.0048, "step": 54980 }, { "epoch": 9.81, "learning_rate": 4.50992332382311e-05, "loss": 0.0078, "step": 54990 }, { "epoch": 9.81, "learning_rate": 4.509834165477889e-05, "loss": 0.0058, "step": 55000 }, { "epoch": 9.81, "learning_rate": 4.509745007132668e-05, "loss": 0.0031, "step": 55010 }, { "epoch": 9.81, "learning_rate": 4.509655848787447e-05, "loss": 0.003, "step": 55020 }, { "epoch": 9.81, "learning_rate": 4.509566690442226e-05, "loss": 0.0028, "step": 55030 }, { "epoch": 9.81, "learning_rate": 4.509477532097004e-05, "loss": 0.006, "step": 55040 }, { "epoch": 9.82, "learning_rate": 4.5093883737517834e-05, "loss": 0.0039, "step": 55050 }, { "epoch": 9.82, "learning_rate": 4.5092992154065625e-05, "loss": 0.0052, "step": 55060 }, { "epoch": 9.82, "learning_rate": 4.509210057061341e-05, "loss": 0.0044, "step": 55070 }, { "epoch": 9.82, "learning_rate": 4.50912089871612e-05, "loss": 0.0046, "step": 55080 }, { "epoch": 9.82, "learning_rate": 4.5090317403708985e-05, "loss": 0.0065, "step": 55090 }, { "epoch": 9.83, "learning_rate": 4.508942582025678e-05, "loss": 0.0053, "step": 55100 }, { "epoch": 9.83, "learning_rate": 4.508853423680457e-05, "loss": 0.0058, "step": 55110 }, { "epoch": 9.83, "learning_rate": 4.508764265335236e-05, "loss": 0.003, "step": 55120 }, { "epoch": 9.83, "learning_rate": 4.508675106990014e-05, "loss": 0.0046, "step": 55130 }, { "epoch": 9.83, "learning_rate": 4.5085859486447934e-05, "loss": 0.0089, "step": 55140 }, { "epoch": 9.83, "learning_rate": 4.5084967902995725e-05, "loss": 0.0048, "step": 55150 }, { "epoch": 9.84, "learning_rate": 4.508407631954351e-05, "loss": 0.0051, "step": 55160 }, { "epoch": 9.84, "learning_rate": 4.50831847360913e-05, "loss": 0.0052, "step": 55170 }, { "epoch": 9.84, "learning_rate": 4.5082293152639086e-05, "loss": 0.0048, "step": 55180 }, { "epoch": 9.84, "learning_rate": 4.508140156918688e-05, "loss": 0.002, "step": 55190 }, { "epoch": 9.84, "learning_rate": 4.508050998573467e-05, "loss": 0.0037, "step": 55200 }, { "epoch": 9.84, "learning_rate": 4.507961840228245e-05, "loss": 0.0077, "step": 55210 }, { "epoch": 9.85, "learning_rate": 4.5078726818830244e-05, "loss": 0.0042, "step": 55220 }, { "epoch": 9.85, "learning_rate": 4.5077835235378035e-05, "loss": 0.0046, "step": 55230 }, { "epoch": 9.85, "learning_rate": 4.5076943651925826e-05, "loss": 0.0033, "step": 55240 }, { "epoch": 9.85, "learning_rate": 4.507605206847361e-05, "loss": 0.0058, "step": 55250 }, { "epoch": 9.85, "learning_rate": 4.50751604850214e-05, "loss": 0.0032, "step": 55260 }, { "epoch": 9.86, "learning_rate": 4.5074268901569186e-05, "loss": 0.0028, "step": 55270 }, { "epoch": 9.86, "learning_rate": 4.507337731811698e-05, "loss": 0.0037, "step": 55280 }, { "epoch": 9.86, "learning_rate": 4.507248573466477e-05, "loss": 0.002, "step": 55290 }, { "epoch": 9.86, "learning_rate": 4.507159415121255e-05, "loss": 0.0055, "step": 55300 }, { "epoch": 9.86, "learning_rate": 4.5070702567760344e-05, "loss": 0.0042, "step": 55310 }, { "epoch": 9.86, "learning_rate": 4.506981098430813e-05, "loss": 0.0042, "step": 55320 }, { "epoch": 9.87, "learning_rate": 4.506891940085593e-05, "loss": 0.0079, "step": 55330 }, { "epoch": 9.87, "learning_rate": 4.506802781740371e-05, "loss": 0.0037, "step": 55340 }, { "epoch": 9.87, "learning_rate": 4.50671362339515e-05, "loss": 0.0027, "step": 55350 }, { "epoch": 9.87, "learning_rate": 4.506624465049929e-05, "loss": 0.0033, "step": 55360 }, { "epoch": 9.87, "learning_rate": 4.506535306704708e-05, "loss": 0.0057, "step": 55370 }, { "epoch": 9.88, "learning_rate": 4.506446148359487e-05, "loss": 0.0068, "step": 55380 }, { "epoch": 9.88, "learning_rate": 4.5063569900142654e-05, "loss": 0.0036, "step": 55390 }, { "epoch": 9.88, "learning_rate": 4.5062678316690445e-05, "loss": 0.0056, "step": 55400 }, { "epoch": 9.88, "learning_rate": 4.506178673323823e-05, "loss": 0.0056, "step": 55410 }, { "epoch": 9.88, "learning_rate": 4.506089514978602e-05, "loss": 0.0044, "step": 55420 }, { "epoch": 9.88, "learning_rate": 4.506000356633381e-05, "loss": 0.0062, "step": 55430 }, { "epoch": 9.89, "learning_rate": 4.50591119828816e-05, "loss": 0.0046, "step": 55440 }, { "epoch": 9.89, "learning_rate": 4.505822039942939e-05, "loss": 0.006, "step": 55450 }, { "epoch": 9.89, "learning_rate": 4.505732881597718e-05, "loss": 0.0039, "step": 55460 }, { "epoch": 9.89, "learning_rate": 4.505643723252497e-05, "loss": 0.0029, "step": 55470 }, { "epoch": 9.89, "learning_rate": 4.5055545649072754e-05, "loss": 0.0063, "step": 55480 }, { "epoch": 9.89, "learning_rate": 4.5054654065620545e-05, "loss": 0.0041, "step": 55490 }, { "epoch": 9.9, "learning_rate": 4.505376248216833e-05, "loss": 0.0081, "step": 55500 }, { "epoch": 9.9, "learning_rate": 4.505287089871612e-05, "loss": 0.003, "step": 55510 }, { "epoch": 9.9, "learning_rate": 4.505197931526391e-05, "loss": 0.0058, "step": 55520 }, { "epoch": 9.9, "learning_rate": 4.5051087731811697e-05, "loss": 0.0062, "step": 55530 }, { "epoch": 9.9, "learning_rate": 4.505019614835949e-05, "loss": 0.0071, "step": 55540 }, { "epoch": 9.91, "learning_rate": 4.504930456490728e-05, "loss": 0.0057, "step": 55550 }, { "epoch": 9.91, "learning_rate": 4.504841298145507e-05, "loss": 0.003, "step": 55560 }, { "epoch": 9.91, "learning_rate": 4.504761055634807e-05, "loss": 0.0054, "step": 55570 }, { "epoch": 9.91, "learning_rate": 4.504671897289586e-05, "loss": 0.005, "step": 55580 }, { "epoch": 9.91, "learning_rate": 4.5045827389443654e-05, "loss": 0.0052, "step": 55590 }, { "epoch": 9.91, "learning_rate": 4.5044935805991445e-05, "loss": 0.0048, "step": 55600 }, { "epoch": 9.92, "learning_rate": 4.5044044222539236e-05, "loss": 0.0048, "step": 55610 }, { "epoch": 9.92, "learning_rate": 4.504315263908702e-05, "loss": 0.0033, "step": 55620 }, { "epoch": 9.92, "learning_rate": 4.504226105563481e-05, "loss": 0.0067, "step": 55630 }, { "epoch": 9.92, "learning_rate": 4.5041369472182596e-05, "loss": 0.004, "step": 55640 }, { "epoch": 9.92, "learning_rate": 4.504047788873039e-05, "loss": 0.0034, "step": 55650 }, { "epoch": 9.93, "learning_rate": 4.503958630527817e-05, "loss": 0.0052, "step": 55660 }, { "epoch": 9.93, "learning_rate": 4.503869472182596e-05, "loss": 0.0027, "step": 55670 }, { "epoch": 9.93, "learning_rate": 4.5037803138373754e-05, "loss": 0.0044, "step": 55680 }, { "epoch": 9.93, "learning_rate": 4.503691155492154e-05, "loss": 0.0056, "step": 55690 }, { "epoch": 9.93, "learning_rate": 4.503601997146934e-05, "loss": 0.0057, "step": 55700 }, { "epoch": 9.93, "learning_rate": 4.503512838801712e-05, "loss": 0.0043, "step": 55710 }, { "epoch": 9.94, "learning_rate": 4.503423680456491e-05, "loss": 0.0027, "step": 55720 }, { "epoch": 9.94, "learning_rate": 4.50333452211127e-05, "loss": 0.004, "step": 55730 }, { "epoch": 9.94, "learning_rate": 4.503245363766049e-05, "loss": 0.0042, "step": 55740 }, { "epoch": 9.94, "learning_rate": 4.503156205420828e-05, "loss": 0.0037, "step": 55750 }, { "epoch": 9.94, "learning_rate": 4.5030670470756064e-05, "loss": 0.0028, "step": 55760 }, { "epoch": 9.94, "learning_rate": 4.5029778887303855e-05, "loss": 0.0042, "step": 55770 }, { "epoch": 9.95, "learning_rate": 4.502888730385164e-05, "loss": 0.0044, "step": 55780 }, { "epoch": 9.95, "learning_rate": 4.502799572039943e-05, "loss": 0.006, "step": 55790 }, { "epoch": 9.95, "learning_rate": 4.5027104136947215e-05, "loss": 0.0025, "step": 55800 }, { "epoch": 9.95, "learning_rate": 4.502621255349501e-05, "loss": 0.0041, "step": 55810 }, { "epoch": 9.95, "learning_rate": 4.50253209700428e-05, "loss": 0.0031, "step": 55820 }, { "epoch": 9.96, "learning_rate": 4.502442938659059e-05, "loss": 0.0029, "step": 55830 }, { "epoch": 9.96, "learning_rate": 4.502353780313838e-05, "loss": 0.0056, "step": 55840 }, { "epoch": 9.96, "learning_rate": 4.5022646219686164e-05, "loss": 0.0039, "step": 55850 }, { "epoch": 9.96, "learning_rate": 4.5021754636233956e-05, "loss": 0.0032, "step": 55860 }, { "epoch": 9.96, "learning_rate": 4.502086305278174e-05, "loss": 0.0027, "step": 55870 }, { "epoch": 9.96, "learning_rate": 4.501997146932953e-05, "loss": 0.0041, "step": 55880 }, { "epoch": 9.97, "learning_rate": 4.5019079885877316e-05, "loss": 0.005, "step": 55890 }, { "epoch": 9.97, "learning_rate": 4.501818830242511e-05, "loss": 0.0034, "step": 55900 }, { "epoch": 9.97, "learning_rate": 4.50172967189729e-05, "loss": 0.0032, "step": 55910 }, { "epoch": 9.97, "learning_rate": 4.501640513552069e-05, "loss": 0.0042, "step": 55920 }, { "epoch": 9.97, "learning_rate": 4.501551355206848e-05, "loss": 0.0026, "step": 55930 }, { "epoch": 9.98, "learning_rate": 4.5014621968616265e-05, "loss": 0.004, "step": 55940 }, { "epoch": 9.98, "learning_rate": 4.5013730385164056e-05, "loss": 0.0049, "step": 55950 }, { "epoch": 9.98, "learning_rate": 4.501283880171184e-05, "loss": 0.0028, "step": 55960 }, { "epoch": 9.98, "learning_rate": 4.501194721825963e-05, "loss": 0.0081, "step": 55970 }, { "epoch": 9.98, "learning_rate": 4.501105563480742e-05, "loss": 0.0037, "step": 55980 }, { "epoch": 9.98, "learning_rate": 4.501016405135521e-05, "loss": 0.0028, "step": 55990 }, { "epoch": 9.99, "learning_rate": 4.5009272467903e-05, "loss": 0.0047, "step": 56000 }, { "epoch": 9.99, "learning_rate": 4.500838088445078e-05, "loss": 0.0058, "step": 56010 }, { "epoch": 9.99, "learning_rate": 4.5007489300998574e-05, "loss": 0.003, "step": 56020 }, { "epoch": 9.99, "learning_rate": 4.5006597717546365e-05, "loss": 0.0042, "step": 56030 }, { "epoch": 9.99, "learning_rate": 4.500570613409416e-05, "loss": 0.0032, "step": 56040 }, { "epoch": 9.99, "learning_rate": 4.500481455064194e-05, "loss": 0.0035, "step": 56050 }, { "epoch": 10.0, "learning_rate": 4.500392296718973e-05, "loss": 0.0053, "step": 56060 }, { "epoch": 10.0, "learning_rate": 4.5003031383737524e-05, "loss": 0.0055, "step": 56070 }, { "epoch": 10.0, "learning_rate": 4.500213980028531e-05, "loss": 0.0052, "step": 56080 }, { "epoch": 10.0, "eval_loss": 0.018937913700938225, "eval_runtime": 195.8855, "eval_samples_per_second": 23.682, "eval_steps_per_second": 2.961, "step": 56080 }, { "epoch": 10.0, "learning_rate": 4.50012482168331e-05, "loss": 0.0038, "step": 56090 }, { "epoch": 10.0, "learning_rate": 4.5000356633380884e-05, "loss": 0.0034, "step": 56100 }, { "epoch": 10.01, "learning_rate": 4.4999465049928675e-05, "loss": 0.003, "step": 56110 }, { "epoch": 10.01, "learning_rate": 4.499866262482168e-05, "loss": 0.005, "step": 56120 }, { "epoch": 10.01, "learning_rate": 4.4997771041369474e-05, "loss": 0.0026, "step": 56130 }, { "epoch": 10.01, "learning_rate": 4.4996879457917265e-05, "loss": 0.0019, "step": 56140 }, { "epoch": 10.01, "learning_rate": 4.499598787446505e-05, "loss": 0.0045, "step": 56150 }, { "epoch": 10.01, "learning_rate": 4.499509629101284e-05, "loss": 0.0028, "step": 56160 }, { "epoch": 10.02, "learning_rate": 4.4994204707560625e-05, "loss": 0.0072, "step": 56170 }, { "epoch": 10.02, "learning_rate": 4.4993313124108417e-05, "loss": 0.0046, "step": 56180 }, { "epoch": 10.02, "learning_rate": 4.499242154065621e-05, "loss": 0.0042, "step": 56190 }, { "epoch": 10.02, "learning_rate": 4.4991529957204e-05, "loss": 0.0057, "step": 56200 }, { "epoch": 10.02, "learning_rate": 4.4990638373751783e-05, "loss": 0.003, "step": 56210 }, { "epoch": 10.02, "learning_rate": 4.4989746790299575e-05, "loss": 0.0032, "step": 56220 }, { "epoch": 10.03, "learning_rate": 4.4988855206847366e-05, "loss": 0.0033, "step": 56230 }, { "epoch": 10.03, "learning_rate": 4.498796362339515e-05, "loss": 0.0024, "step": 56240 }, { "epoch": 10.03, "learning_rate": 4.498707203994294e-05, "loss": 0.0021, "step": 56250 }, { "epoch": 10.03, "learning_rate": 4.4986180456490726e-05, "loss": 0.004, "step": 56260 }, { "epoch": 10.03, "learning_rate": 4.498528887303852e-05, "loss": 0.006, "step": 56270 }, { "epoch": 10.04, "learning_rate": 4.498439728958631e-05, "loss": 0.0047, "step": 56280 }, { "epoch": 10.04, "learning_rate": 4.498350570613409e-05, "loss": 0.0031, "step": 56290 }, { "epoch": 10.04, "learning_rate": 4.498261412268189e-05, "loss": 0.0043, "step": 56300 }, { "epoch": 10.04, "learning_rate": 4.4981722539229675e-05, "loss": 0.0048, "step": 56310 }, { "epoch": 10.04, "learning_rate": 4.4980830955777466e-05, "loss": 0.0029, "step": 56320 }, { "epoch": 10.04, "learning_rate": 4.497993937232525e-05, "loss": 0.0054, "step": 56330 }, { "epoch": 10.05, "learning_rate": 4.497904778887304e-05, "loss": 0.0033, "step": 56340 }, { "epoch": 10.05, "learning_rate": 4.4978156205420826e-05, "loss": 0.0035, "step": 56350 }, { "epoch": 10.05, "learning_rate": 4.497726462196862e-05, "loss": 0.0035, "step": 56360 }, { "epoch": 10.05, "learning_rate": 4.497637303851641e-05, "loss": 0.004, "step": 56370 }, { "epoch": 10.05, "learning_rate": 4.497548145506419e-05, "loss": 0.0023, "step": 56380 }, { "epoch": 10.06, "learning_rate": 4.4974589871611985e-05, "loss": 0.0033, "step": 56390 }, { "epoch": 10.06, "learning_rate": 4.497369828815977e-05, "loss": 0.004, "step": 56400 }, { "epoch": 10.06, "learning_rate": 4.497280670470757e-05, "loss": 0.0034, "step": 56410 }, { "epoch": 10.06, "learning_rate": 4.497191512125535e-05, "loss": 0.0061, "step": 56420 }, { "epoch": 10.06, "learning_rate": 4.497102353780314e-05, "loss": 0.0042, "step": 56430 }, { "epoch": 10.06, "learning_rate": 4.497013195435093e-05, "loss": 0.004, "step": 56440 }, { "epoch": 10.07, "learning_rate": 4.496924037089872e-05, "loss": 0.0059, "step": 56450 }, { "epoch": 10.07, "learning_rate": 4.496834878744651e-05, "loss": 0.004, "step": 56460 }, { "epoch": 10.07, "learning_rate": 4.4967457203994294e-05, "loss": 0.0035, "step": 56470 }, { "epoch": 10.07, "learning_rate": 4.4966565620542085e-05, "loss": 0.0039, "step": 56480 }, { "epoch": 10.07, "learning_rate": 4.496567403708987e-05, "loss": 0.0036, "step": 56490 }, { "epoch": 10.07, "learning_rate": 4.496478245363766e-05, "loss": 0.0032, "step": 56500 }, { "epoch": 10.08, "learning_rate": 4.496389087018545e-05, "loss": 0.0028, "step": 56510 }, { "epoch": 10.08, "learning_rate": 4.496299928673324e-05, "loss": 0.0027, "step": 56520 }, { "epoch": 10.08, "learning_rate": 4.4962107703281034e-05, "loss": 0.0063, "step": 56530 }, { "epoch": 10.08, "learning_rate": 4.496121611982882e-05, "loss": 0.0041, "step": 56540 }, { "epoch": 10.08, "learning_rate": 4.496032453637661e-05, "loss": 0.0028, "step": 56550 }, { "epoch": 10.09, "learning_rate": 4.4959432952924394e-05, "loss": 0.0073, "step": 56560 }, { "epoch": 10.09, "learning_rate": 4.4958541369472186e-05, "loss": 0.0032, "step": 56570 }, { "epoch": 10.09, "learning_rate": 4.495764978601997e-05, "loss": 0.0038, "step": 56580 }, { "epoch": 10.09, "learning_rate": 4.495675820256776e-05, "loss": 0.0057, "step": 56590 }, { "epoch": 10.09, "learning_rate": 4.495586661911555e-05, "loss": 0.0043, "step": 56600 }, { "epoch": 10.09, "learning_rate": 4.495497503566334e-05, "loss": 0.002, "step": 56610 }, { "epoch": 10.1, "learning_rate": 4.495408345221113e-05, "loss": 0.0041, "step": 56620 }, { "epoch": 10.1, "learning_rate": 4.495319186875892e-05, "loss": 0.002, "step": 56630 }, { "epoch": 10.1, "learning_rate": 4.495230028530671e-05, "loss": 0.0038, "step": 56640 }, { "epoch": 10.1, "learning_rate": 4.4951408701854495e-05, "loss": 0.0053, "step": 56650 }, { "epoch": 10.1, "learning_rate": 4.4950517118402286e-05, "loss": 0.0037, "step": 56660 }, { "epoch": 10.11, "learning_rate": 4.494962553495007e-05, "loss": 0.008, "step": 56670 }, { "epoch": 10.11, "learning_rate": 4.494873395149786e-05, "loss": 0.0044, "step": 56680 }, { "epoch": 10.11, "learning_rate": 4.494784236804565e-05, "loss": 0.0044, "step": 56690 }, { "epoch": 10.11, "learning_rate": 4.494695078459344e-05, "loss": 0.0053, "step": 56700 }, { "epoch": 10.11, "learning_rate": 4.494605920114123e-05, "loss": 0.0031, "step": 56710 }, { "epoch": 10.11, "learning_rate": 4.494516761768901e-05, "loss": 0.0057, "step": 56720 }, { "epoch": 10.12, "learning_rate": 4.4944276034236804e-05, "loss": 0.0033, "step": 56730 }, { "epoch": 10.12, "learning_rate": 4.4943384450784596e-05, "loss": 0.0068, "step": 56740 }, { "epoch": 10.12, "learning_rate": 4.494249286733239e-05, "loss": 0.0046, "step": 56750 }, { "epoch": 10.12, "learning_rate": 4.4941690442225395e-05, "loss": 0.0043, "step": 56760 }, { "epoch": 10.12, "learning_rate": 4.494079885877318e-05, "loss": 0.0029, "step": 56770 }, { "epoch": 10.12, "learning_rate": 4.493990727532098e-05, "loss": 0.004, "step": 56780 }, { "epoch": 10.13, "learning_rate": 4.493901569186876e-05, "loss": 0.0026, "step": 56790 }, { "epoch": 10.13, "learning_rate": 4.493812410841655e-05, "loss": 0.0032, "step": 56800 }, { "epoch": 10.13, "learning_rate": 4.493723252496434e-05, "loss": 0.0041, "step": 56810 }, { "epoch": 10.13, "learning_rate": 4.493634094151213e-05, "loss": 0.0031, "step": 56820 }, { "epoch": 10.13, "learning_rate": 4.493544935805992e-05, "loss": 0.0028, "step": 56830 }, { "epoch": 10.14, "learning_rate": 4.4934557774607704e-05, "loss": 0.0041, "step": 56840 }, { "epoch": 10.14, "learning_rate": 4.4933666191155495e-05, "loss": 0.0045, "step": 56850 }, { "epoch": 10.14, "learning_rate": 4.493277460770328e-05, "loss": 0.0041, "step": 56860 }, { "epoch": 10.14, "learning_rate": 4.493188302425107e-05, "loss": 0.0053, "step": 56870 }, { "epoch": 10.14, "learning_rate": 4.4930991440798855e-05, "loss": 0.0018, "step": 56880 }, { "epoch": 10.14, "learning_rate": 4.4930099857346653e-05, "loss": 0.0042, "step": 56890 }, { "epoch": 10.15, "learning_rate": 4.492920827389444e-05, "loss": 0.0025, "step": 56900 }, { "epoch": 10.15, "learning_rate": 4.492831669044223e-05, "loss": 0.0047, "step": 56910 }, { "epoch": 10.15, "learning_rate": 4.492742510699002e-05, "loss": 0.0053, "step": 56920 }, { "epoch": 10.15, "learning_rate": 4.4926533523537805e-05, "loss": 0.0027, "step": 56930 }, { "epoch": 10.15, "learning_rate": 4.4925641940085596e-05, "loss": 0.0039, "step": 56940 }, { "epoch": 10.16, "learning_rate": 4.492475035663338e-05, "loss": 0.004, "step": 56950 }, { "epoch": 10.16, "learning_rate": 4.492385877318117e-05, "loss": 0.0054, "step": 56960 }, { "epoch": 10.16, "learning_rate": 4.492296718972896e-05, "loss": 0.0053, "step": 56970 }, { "epoch": 10.16, "learning_rate": 4.492207560627675e-05, "loss": 0.0046, "step": 56980 }, { "epoch": 10.16, "learning_rate": 4.492118402282454e-05, "loss": 0.0049, "step": 56990 }, { "epoch": 10.16, "learning_rate": 4.492029243937233e-05, "loss": 0.0055, "step": 57000 }, { "epoch": 10.17, "learning_rate": 4.491940085592012e-05, "loss": 0.0021, "step": 57010 }, { "epoch": 10.17, "learning_rate": 4.4918509272467905e-05, "loss": 0.0063, "step": 57020 }, { "epoch": 10.17, "learning_rate": 4.4917617689015697e-05, "loss": 0.0085, "step": 57030 }, { "epoch": 10.17, "learning_rate": 4.491672610556348e-05, "loss": 0.0034, "step": 57040 }, { "epoch": 10.17, "learning_rate": 4.491583452211127e-05, "loss": 0.0013, "step": 57050 }, { "epoch": 10.17, "learning_rate": 4.491494293865906e-05, "loss": 0.0045, "step": 57060 }, { "epoch": 10.18, "learning_rate": 4.491405135520685e-05, "loss": 0.0054, "step": 57070 }, { "epoch": 10.18, "learning_rate": 4.491315977175464e-05, "loss": 0.0047, "step": 57080 }, { "epoch": 10.18, "learning_rate": 4.4912268188302423e-05, "loss": 0.0023, "step": 57090 }, { "epoch": 10.18, "learning_rate": 4.4911376604850215e-05, "loss": 0.0049, "step": 57100 }, { "epoch": 10.18, "learning_rate": 4.4910485021398006e-05, "loss": 0.0064, "step": 57110 }, { "epoch": 10.19, "learning_rate": 4.49095934379458e-05, "loss": 0.0057, "step": 57120 }, { "epoch": 10.19, "learning_rate": 4.490870185449358e-05, "loss": 0.0035, "step": 57130 }, { "epoch": 10.19, "learning_rate": 4.490781027104137e-05, "loss": 0.0043, "step": 57140 }, { "epoch": 10.19, "learning_rate": 4.4906918687589164e-05, "loss": 0.0031, "step": 57150 }, { "epoch": 10.19, "learning_rate": 4.490602710413695e-05, "loss": 0.0055, "step": 57160 }, { "epoch": 10.19, "learning_rate": 4.490513552068474e-05, "loss": 0.0026, "step": 57170 }, { "epoch": 10.2, "learning_rate": 4.4904243937232524e-05, "loss": 0.0027, "step": 57180 }, { "epoch": 10.2, "learning_rate": 4.4903352353780315e-05, "loss": 0.0024, "step": 57190 }, { "epoch": 10.2, "learning_rate": 4.4902460770328106e-05, "loss": 0.0049, "step": 57200 }, { "epoch": 10.2, "learning_rate": 4.490156918687589e-05, "loss": 0.0039, "step": 57210 }, { "epoch": 10.2, "learning_rate": 4.490067760342368e-05, "loss": 0.0043, "step": 57220 }, { "epoch": 10.21, "learning_rate": 4.489978601997147e-05, "loss": 0.0035, "step": 57230 }, { "epoch": 10.21, "learning_rate": 4.4898894436519264e-05, "loss": 0.0028, "step": 57240 }, { "epoch": 10.21, "learning_rate": 4.489800285306705e-05, "loss": 0.0026, "step": 57250 }, { "epoch": 10.21, "learning_rate": 4.489711126961484e-05, "loss": 0.0036, "step": 57260 }, { "epoch": 10.21, "learning_rate": 4.4896219686162625e-05, "loss": 0.0039, "step": 57270 }, { "epoch": 10.21, "learning_rate": 4.4895328102710416e-05, "loss": 0.0027, "step": 57280 }, { "epoch": 10.22, "learning_rate": 4.489443651925821e-05, "loss": 0.0056, "step": 57290 }, { "epoch": 10.22, "learning_rate": 4.489354493580599e-05, "loss": 0.0025, "step": 57300 }, { "epoch": 10.22, "learning_rate": 4.489265335235378e-05, "loss": 0.0051, "step": 57310 }, { "epoch": 10.22, "learning_rate": 4.489176176890157e-05, "loss": 0.0034, "step": 57320 }, { "epoch": 10.22, "learning_rate": 4.4890870185449365e-05, "loss": 0.0063, "step": 57330 }, { "epoch": 10.22, "learning_rate": 4.488997860199715e-05, "loss": 0.006, "step": 57340 }, { "epoch": 10.23, "learning_rate": 4.488908701854494e-05, "loss": 0.0046, "step": 57350 }, { "epoch": 10.23, "learning_rate": 4.4888195435092725e-05, "loss": 0.0033, "step": 57360 }, { "epoch": 10.23, "learning_rate": 4.4887303851640516e-05, "loss": 0.0051, "step": 57370 }, { "epoch": 10.23, "learning_rate": 4.488641226818831e-05, "loss": 0.0043, "step": 57380 }, { "epoch": 10.23, "learning_rate": 4.488552068473609e-05, "loss": 0.0057, "step": 57390 }, { "epoch": 10.24, "learning_rate": 4.488462910128388e-05, "loss": 0.0036, "step": 57400 }, { "epoch": 10.24, "learning_rate": 4.488373751783167e-05, "loss": 0.0035, "step": 57410 }, { "epoch": 10.24, "learning_rate": 4.488284593437946e-05, "loss": 0.0064, "step": 57420 }, { "epoch": 10.24, "learning_rate": 4.488195435092724e-05, "loss": 0.005, "step": 57430 }, { "epoch": 10.24, "learning_rate": 4.488106276747504e-05, "loss": 0.0038, "step": 57440 }, { "epoch": 10.24, "learning_rate": 4.4880171184022826e-05, "loss": 0.0053, "step": 57450 }, { "epoch": 10.25, "learning_rate": 4.487927960057062e-05, "loss": 0.0076, "step": 57460 }, { "epoch": 10.25, "learning_rate": 4.487838801711841e-05, "loss": 0.0048, "step": 57470 }, { "epoch": 10.25, "learning_rate": 4.487749643366619e-05, "loss": 0.0051, "step": 57480 }, { "epoch": 10.25, "learning_rate": 4.4876604850213984e-05, "loss": 0.0063, "step": 57490 }, { "epoch": 10.25, "learning_rate": 4.487571326676177e-05, "loss": 0.0046, "step": 57500 }, { "epoch": 10.25, "learning_rate": 4.487482168330956e-05, "loss": 0.0031, "step": 57510 }, { "epoch": 10.26, "learning_rate": 4.487393009985735e-05, "loss": 0.0059, "step": 57520 }, { "epoch": 10.26, "learning_rate": 4.4873038516405135e-05, "loss": 0.0029, "step": 57530 }, { "epoch": 10.26, "learning_rate": 4.4872146932952926e-05, "loss": 0.0073, "step": 57540 }, { "epoch": 10.26, "learning_rate": 4.487125534950072e-05, "loss": 0.0037, "step": 57550 }, { "epoch": 10.26, "learning_rate": 4.487036376604851e-05, "loss": 0.0039, "step": 57560 }, { "epoch": 10.27, "learning_rate": 4.486947218259629e-05, "loss": 0.0049, "step": 57570 }, { "epoch": 10.27, "learning_rate": 4.4868580599144084e-05, "loss": 0.0049, "step": 57580 }, { "epoch": 10.27, "learning_rate": 4.486768901569187e-05, "loss": 0.0042, "step": 57590 }, { "epoch": 10.27, "learning_rate": 4.486679743223966e-05, "loss": 0.0063, "step": 57600 }, { "epoch": 10.27, "learning_rate": 4.486590584878745e-05, "loss": 0.006, "step": 57610 }, { "epoch": 10.27, "learning_rate": 4.4865014265335236e-05, "loss": 0.0048, "step": 57620 }, { "epoch": 10.28, "learning_rate": 4.486412268188303e-05, "loss": 0.0035, "step": 57630 }, { "epoch": 10.28, "learning_rate": 4.486323109843081e-05, "loss": 0.0038, "step": 57640 }, { "epoch": 10.28, "learning_rate": 4.48623395149786e-05, "loss": 0.0048, "step": 57650 }, { "epoch": 10.28, "learning_rate": 4.486144793152639e-05, "loss": 0.0045, "step": 57660 }, { "epoch": 10.28, "learning_rate": 4.4860556348074185e-05, "loss": 0.0028, "step": 57670 }, { "epoch": 10.29, "learning_rate": 4.485966476462197e-05, "loss": 0.003, "step": 57680 }, { "epoch": 10.29, "learning_rate": 4.485877318116976e-05, "loss": 0.0032, "step": 57690 }, { "epoch": 10.29, "learning_rate": 4.485788159771755e-05, "loss": 0.0038, "step": 57700 }, { "epoch": 10.29, "learning_rate": 4.4856990014265336e-05, "loss": 0.0076, "step": 57710 }, { "epoch": 10.29, "learning_rate": 4.485609843081313e-05, "loss": 0.0042, "step": 57720 }, { "epoch": 10.29, "learning_rate": 4.485520684736091e-05, "loss": 0.0041, "step": 57730 }, { "epoch": 10.3, "learning_rate": 4.48543152639087e-05, "loss": 0.0051, "step": 57740 }, { "epoch": 10.3, "learning_rate": 4.4853423680456494e-05, "loss": 0.0045, "step": 57750 }, { "epoch": 10.3, "learning_rate": 4.485253209700428e-05, "loss": 0.0028, "step": 57760 }, { "epoch": 10.3, "learning_rate": 4.485164051355207e-05, "loss": 0.0045, "step": 57770 }, { "epoch": 10.3, "learning_rate": 4.485074893009986e-05, "loss": 0.0045, "step": 57780 }, { "epoch": 10.3, "learning_rate": 4.484985734664765e-05, "loss": 0.0022, "step": 57790 }, { "epoch": 10.31, "learning_rate": 4.484896576319544e-05, "loss": 0.0038, "step": 57800 }, { "epoch": 10.31, "learning_rate": 4.484807417974323e-05, "loss": 0.0045, "step": 57810 }, { "epoch": 10.31, "learning_rate": 4.484718259629101e-05, "loss": 0.0044, "step": 57820 }, { "epoch": 10.31, "learning_rate": 4.4846291012838804e-05, "loss": 0.0067, "step": 57830 }, { "epoch": 10.31, "learning_rate": 4.4845399429386595e-05, "loss": 0.0049, "step": 57840 }, { "epoch": 10.32, "learning_rate": 4.484450784593438e-05, "loss": 0.0045, "step": 57850 }, { "epoch": 10.32, "learning_rate": 4.484361626248217e-05, "loss": 0.0064, "step": 57860 }, { "epoch": 10.32, "learning_rate": 4.4842724679029955e-05, "loss": 0.002, "step": 57870 }, { "epoch": 10.32, "learning_rate": 4.4841833095577746e-05, "loss": 0.0039, "step": 57880 }, { "epoch": 10.32, "learning_rate": 4.484094151212554e-05, "loss": 0.0036, "step": 57890 }, { "epoch": 10.32, "learning_rate": 4.484004992867333e-05, "loss": 0.0037, "step": 57900 }, { "epoch": 10.33, "learning_rate": 4.483915834522111e-05, "loss": 0.0036, "step": 57910 }, { "epoch": 10.33, "learning_rate": 4.4838266761768904e-05, "loss": 0.0045, "step": 57920 }, { "epoch": 10.33, "learning_rate": 4.4837375178316695e-05, "loss": 0.0058, "step": 57930 }, { "epoch": 10.33, "learning_rate": 4.483648359486448e-05, "loss": 0.0049, "step": 57940 }, { "epoch": 10.33, "learning_rate": 4.483559201141227e-05, "loss": 0.0024, "step": 57950 }, { "epoch": 10.34, "learning_rate": 4.4834700427960055e-05, "loss": 0.0032, "step": 57960 }, { "epoch": 10.34, "learning_rate": 4.483380884450785e-05, "loss": 0.0026, "step": 57970 }, { "epoch": 10.34, "learning_rate": 4.483291726105564e-05, "loss": 0.0042, "step": 57980 }, { "epoch": 10.34, "learning_rate": 4.483202567760342e-05, "loss": 0.0038, "step": 57990 }, { "epoch": 10.34, "learning_rate": 4.483113409415122e-05, "loss": 0.0019, "step": 58000 }, { "epoch": 10.34, "learning_rate": 4.4830242510699005e-05, "loss": 0.0063, "step": 58010 }, { "epoch": 10.35, "learning_rate": 4.4829350927246796e-05, "loss": 0.0038, "step": 58020 }, { "epoch": 10.35, "learning_rate": 4.482845934379458e-05, "loss": 0.0029, "step": 58030 }, { "epoch": 10.35, "learning_rate": 4.482756776034237e-05, "loss": 0.0021, "step": 58040 }, { "epoch": 10.35, "learning_rate": 4.4826676176890156e-05, "loss": 0.0038, "step": 58050 }, { "epoch": 10.35, "learning_rate": 4.482578459343795e-05, "loss": 0.003, "step": 58060 }, { "epoch": 10.35, "learning_rate": 4.482489300998574e-05, "loss": 0.0051, "step": 58070 }, { "epoch": 10.36, "learning_rate": 4.482400142653352e-05, "loss": 0.0035, "step": 58080 }, { "epoch": 10.36, "learning_rate": 4.4823109843081314e-05, "loss": 0.0045, "step": 58090 }, { "epoch": 10.36, "learning_rate": 4.48222182596291e-05, "loss": 0.0062, "step": 58100 }, { "epoch": 10.36, "learning_rate": 4.4821326676176896e-05, "loss": 0.005, "step": 58110 }, { "epoch": 10.36, "learning_rate": 4.482043509272468e-05, "loss": 0.0046, "step": 58120 }, { "epoch": 10.37, "learning_rate": 4.481954350927247e-05, "loss": 0.0046, "step": 58130 }, { "epoch": 10.37, "learning_rate": 4.4818651925820257e-05, "loss": 0.0046, "step": 58140 }, { "epoch": 10.37, "learning_rate": 4.481776034236805e-05, "loss": 0.0066, "step": 58150 }, { "epoch": 10.37, "learning_rate": 4.481686875891584e-05, "loss": 0.0049, "step": 58160 }, { "epoch": 10.37, "learning_rate": 4.4815977175463623e-05, "loss": 0.0068, "step": 58170 }, { "epoch": 10.37, "learning_rate": 4.4815085592011415e-05, "loss": 0.0031, "step": 58180 }, { "epoch": 10.38, "learning_rate": 4.48141940085592e-05, "loss": 0.0055, "step": 58190 }, { "epoch": 10.38, "learning_rate": 4.481330242510699e-05, "loss": 0.0046, "step": 58200 }, { "epoch": 10.38, "learning_rate": 4.481241084165478e-05, "loss": 0.0034, "step": 58210 }, { "epoch": 10.38, "learning_rate": 4.481151925820257e-05, "loss": 0.0048, "step": 58220 }, { "epoch": 10.38, "learning_rate": 4.4810627674750364e-05, "loss": 0.0064, "step": 58230 }, { "epoch": 10.39, "learning_rate": 4.480973609129815e-05, "loss": 0.0055, "step": 58240 }, { "epoch": 10.39, "learning_rate": 4.480884450784594e-05, "loss": 0.0065, "step": 58250 }, { "epoch": 10.39, "learning_rate": 4.4807952924393724e-05, "loss": 0.002, "step": 58260 }, { "epoch": 10.39, "learning_rate": 4.4807061340941515e-05, "loss": 0.0041, "step": 58270 }, { "epoch": 10.39, "learning_rate": 4.48061697574893e-05, "loss": 0.0055, "step": 58280 }, { "epoch": 10.39, "learning_rate": 4.480527817403709e-05, "loss": 0.0059, "step": 58290 }, { "epoch": 10.4, "learning_rate": 4.480438659058488e-05, "loss": 0.0024, "step": 58300 }, { "epoch": 10.4, "learning_rate": 4.4803495007132666e-05, "loss": 0.0051, "step": 58310 }, { "epoch": 10.4, "learning_rate": 4.480260342368046e-05, "loss": 0.0036, "step": 58320 }, { "epoch": 10.4, "learning_rate": 4.480171184022825e-05, "loss": 0.0033, "step": 58330 }, { "epoch": 10.4, "learning_rate": 4.480082025677604e-05, "loss": 0.0051, "step": 58340 }, { "epoch": 10.4, "learning_rate": 4.4799928673323825e-05, "loss": 0.0053, "step": 58350 }, { "epoch": 10.41, "learning_rate": 4.4799037089871616e-05, "loss": 0.0088, "step": 58360 }, { "epoch": 10.41, "learning_rate": 4.47981455064194e-05, "loss": 0.0046, "step": 58370 }, { "epoch": 10.41, "learning_rate": 4.479725392296719e-05, "loss": 0.0097, "step": 58380 }, { "epoch": 10.41, "learning_rate": 4.479636233951498e-05, "loss": 0.0037, "step": 58390 }, { "epoch": 10.41, "learning_rate": 4.479547075606277e-05, "loss": 0.0034, "step": 58400 }, { "epoch": 10.42, "learning_rate": 4.479457917261056e-05, "loss": 0.0029, "step": 58410 }, { "epoch": 10.42, "learning_rate": 4.479368758915834e-05, "loss": 0.0036, "step": 58420 }, { "epoch": 10.42, "learning_rate": 4.4792796005706134e-05, "loss": 0.0053, "step": 58430 }, { "epoch": 10.42, "learning_rate": 4.4791904422253925e-05, "loss": 0.0041, "step": 58440 }, { "epoch": 10.42, "learning_rate": 4.4791012838801716e-05, "loss": 0.0056, "step": 58450 }, { "epoch": 10.42, "learning_rate": 4.479012125534951e-05, "loss": 0.005, "step": 58460 }, { "epoch": 10.43, "learning_rate": 4.478922967189729e-05, "loss": 0.005, "step": 58470 }, { "epoch": 10.43, "learning_rate": 4.478833808844508e-05, "loss": 0.0043, "step": 58480 }, { "epoch": 10.43, "learning_rate": 4.478744650499287e-05, "loss": 0.0038, "step": 58490 }, { "epoch": 10.43, "learning_rate": 4.478655492154066e-05, "loss": 0.0043, "step": 58500 }, { "epoch": 10.43, "learning_rate": 4.478566333808844e-05, "loss": 0.0037, "step": 58510 }, { "epoch": 10.44, "learning_rate": 4.4784771754636234e-05, "loss": 0.0041, "step": 58520 }, { "epoch": 10.44, "learning_rate": 4.4783880171184026e-05, "loss": 0.0031, "step": 58530 }, { "epoch": 10.44, "learning_rate": 4.478298858773181e-05, "loss": 0.0052, "step": 58540 }, { "epoch": 10.44, "learning_rate": 4.478209700427961e-05, "loss": 0.005, "step": 58550 }, { "epoch": 10.44, "learning_rate": 4.478120542082739e-05, "loss": 0.0032, "step": 58560 }, { "epoch": 10.44, "learning_rate": 4.4780313837375184e-05, "loss": 0.0036, "step": 58570 }, { "epoch": 10.45, "learning_rate": 4.477942225392297e-05, "loss": 0.0045, "step": 58580 }, { "epoch": 10.45, "learning_rate": 4.477853067047076e-05, "loss": 0.006, "step": 58590 }, { "epoch": 10.45, "learning_rate": 4.4777639087018544e-05, "loss": 0.0039, "step": 58600 }, { "epoch": 10.45, "learning_rate": 4.4776747503566335e-05, "loss": 0.0029, "step": 58610 }, { "epoch": 10.45, "learning_rate": 4.4775855920114126e-05, "loss": 0.0044, "step": 58620 }, { "epoch": 10.45, "learning_rate": 4.477496433666191e-05, "loss": 0.0058, "step": 58630 }, { "epoch": 10.46, "learning_rate": 4.47740727532097e-05, "loss": 0.002, "step": 58640 }, { "epoch": 10.46, "learning_rate": 4.4773181169757486e-05, "loss": 0.0036, "step": 58650 }, { "epoch": 10.46, "learning_rate": 4.4772289586305284e-05, "loss": 0.0044, "step": 58660 }, { "epoch": 10.46, "learning_rate": 4.477139800285307e-05, "loss": 0.0045, "step": 58670 }, { "epoch": 10.46, "learning_rate": 4.477050641940086e-05, "loss": 0.0066, "step": 58680 }, { "epoch": 10.47, "learning_rate": 4.476961483594865e-05, "loss": 0.0031, "step": 58690 }, { "epoch": 10.47, "learning_rate": 4.4768723252496436e-05, "loss": 0.0051, "step": 58700 }, { "epoch": 10.47, "learning_rate": 4.476783166904423e-05, "loss": 0.0049, "step": 58710 }, { "epoch": 10.47, "learning_rate": 4.476694008559201e-05, "loss": 0.0031, "step": 58720 }, { "epoch": 10.47, "learning_rate": 4.47660485021398e-05, "loss": 0.0036, "step": 58730 }, { "epoch": 10.47, "learning_rate": 4.476515691868759e-05, "loss": 0.004, "step": 58740 }, { "epoch": 10.48, "learning_rate": 4.476426533523538e-05, "loss": 0.0031, "step": 58750 }, { "epoch": 10.48, "learning_rate": 4.476337375178317e-05, "loss": 0.0037, "step": 58760 }, { "epoch": 10.48, "learning_rate": 4.476248216833096e-05, "loss": 0.0043, "step": 58770 }, { "epoch": 10.48, "learning_rate": 4.476159058487875e-05, "loss": 0.0046, "step": 58780 }, { "epoch": 10.48, "learning_rate": 4.4760699001426536e-05, "loss": 0.0039, "step": 58790 }, { "epoch": 10.49, "learning_rate": 4.475980741797433e-05, "loss": 0.0043, "step": 58800 }, { "epoch": 10.49, "learning_rate": 4.475891583452211e-05, "loss": 0.0029, "step": 58810 }, { "epoch": 10.49, "learning_rate": 4.47580242510699e-05, "loss": 0.0057, "step": 58820 }, { "epoch": 10.49, "learning_rate": 4.475713266761769e-05, "loss": 0.0041, "step": 58830 }, { "epoch": 10.49, "learning_rate": 4.475624108416548e-05, "loss": 0.0043, "step": 58840 }, { "epoch": 10.49, "learning_rate": 4.475534950071327e-05, "loss": 0.0076, "step": 58850 }, { "epoch": 10.5, "learning_rate": 4.4754457917261054e-05, "loss": 0.0037, "step": 58860 }, { "epoch": 10.5, "learning_rate": 4.4753566333808845e-05, "loss": 0.0031, "step": 58870 }, { "epoch": 10.5, "learning_rate": 4.475267475035664e-05, "loss": 0.0042, "step": 58880 }, { "epoch": 10.5, "learning_rate": 4.475178316690443e-05, "loss": 0.0045, "step": 58890 }, { "epoch": 10.5, "learning_rate": 4.475089158345221e-05, "loss": 0.005, "step": 58900 }, { "epoch": 10.5, "learning_rate": 4.4750000000000004e-05, "loss": 0.0034, "step": 58910 }, { "epoch": 10.51, "learning_rate": 4.4749108416547795e-05, "loss": 0.0049, "step": 58920 }, { "epoch": 10.51, "learning_rate": 4.474821683309558e-05, "loss": 0.005, "step": 58930 }, { "epoch": 10.51, "learning_rate": 4.474732524964337e-05, "loss": 0.0048, "step": 58940 }, { "epoch": 10.51, "learning_rate": 4.4746433666191155e-05, "loss": 0.0059, "step": 58950 }, { "epoch": 10.51, "learning_rate": 4.4745542082738946e-05, "loss": 0.0044, "step": 58960 }, { "epoch": 10.52, "learning_rate": 4.474465049928673e-05, "loss": 0.0023, "step": 58970 }, { "epoch": 10.52, "learning_rate": 4.474375891583452e-05, "loss": 0.004, "step": 58980 }, { "epoch": 10.52, "learning_rate": 4.474286733238231e-05, "loss": 0.0037, "step": 58990 }, { "epoch": 10.52, "learning_rate": 4.4741975748930104e-05, "loss": 0.0048, "step": 59000 }, { "epoch": 10.52, "learning_rate": 4.4741084165477895e-05, "loss": 0.0041, "step": 59010 }, { "epoch": 10.52, "learning_rate": 4.474019258202568e-05, "loss": 0.0059, "step": 59020 }, { "epoch": 10.53, "learning_rate": 4.473930099857347e-05, "loss": 0.0038, "step": 59030 }, { "epoch": 10.53, "learning_rate": 4.4738409415121255e-05, "loss": 0.0043, "step": 59040 }, { "epoch": 10.53, "learning_rate": 4.4737517831669047e-05, "loss": 0.0025, "step": 59050 }, { "epoch": 10.53, "learning_rate": 4.473662624821683e-05, "loss": 0.0065, "step": 59060 }, { "epoch": 10.53, "learning_rate": 4.473573466476462e-05, "loss": 0.0047, "step": 59070 }, { "epoch": 10.53, "learning_rate": 4.4734843081312413e-05, "loss": 0.0026, "step": 59080 }, { "epoch": 10.54, "learning_rate": 4.47339514978602e-05, "loss": 0.0038, "step": 59090 }, { "epoch": 10.54, "learning_rate": 4.4733059914407996e-05, "loss": 0.0054, "step": 59100 }, { "epoch": 10.54, "learning_rate": 4.473216833095578e-05, "loss": 0.0043, "step": 59110 }, { "epoch": 10.54, "learning_rate": 4.473127674750357e-05, "loss": 0.0041, "step": 59120 }, { "epoch": 10.54, "learning_rate": 4.4730385164051356e-05, "loss": 0.0072, "step": 59130 }, { "epoch": 10.55, "learning_rate": 4.472949358059915e-05, "loss": 0.0043, "step": 59140 }, { "epoch": 10.55, "learning_rate": 4.472860199714694e-05, "loss": 0.0053, "step": 59150 }, { "epoch": 10.55, "learning_rate": 4.472771041369472e-05, "loss": 0.0027, "step": 59160 }, { "epoch": 10.55, "learning_rate": 4.4726818830242514e-05, "loss": 0.0053, "step": 59170 }, { "epoch": 10.55, "learning_rate": 4.47259272467903e-05, "loss": 0.0032, "step": 59180 }, { "epoch": 10.55, "learning_rate": 4.472503566333809e-05, "loss": 0.0052, "step": 59190 }, { "epoch": 10.56, "learning_rate": 4.4724144079885874e-05, "loss": 0.003, "step": 59200 }, { "epoch": 10.56, "learning_rate": 4.472325249643367e-05, "loss": 0.0049, "step": 59210 }, { "epoch": 10.56, "learning_rate": 4.4722360912981457e-05, "loss": 0.0061, "step": 59220 }, { "epoch": 10.56, "learning_rate": 4.472146932952925e-05, "loss": 0.0045, "step": 59230 }, { "epoch": 10.56, "learning_rate": 4.472057774607704e-05, "loss": 0.0045, "step": 59240 }, { "epoch": 10.57, "learning_rate": 4.471968616262482e-05, "loss": 0.0051, "step": 59250 }, { "epoch": 10.57, "learning_rate": 4.4718794579172615e-05, "loss": 0.0034, "step": 59260 }, { "epoch": 10.57, "learning_rate": 4.47179029957204e-05, "loss": 0.0043, "step": 59270 }, { "epoch": 10.57, "learning_rate": 4.471701141226819e-05, "loss": 0.0049, "step": 59280 }, { "epoch": 10.57, "learning_rate": 4.4716119828815975e-05, "loss": 0.0044, "step": 59290 }, { "epoch": 10.57, "learning_rate": 4.4715228245363766e-05, "loss": 0.0037, "step": 59300 }, { "epoch": 10.58, "learning_rate": 4.471433666191156e-05, "loss": 0.0023, "step": 59310 }, { "epoch": 10.58, "learning_rate": 4.471344507845935e-05, "loss": 0.0045, "step": 59320 }, { "epoch": 10.58, "learning_rate": 4.471255349500714e-05, "loss": 0.0045, "step": 59330 }, { "epoch": 10.58, "learning_rate": 4.4711661911554924e-05, "loss": 0.003, "step": 59340 }, { "epoch": 10.58, "learning_rate": 4.4710770328102715e-05, "loss": 0.0046, "step": 59350 }, { "epoch": 10.58, "learning_rate": 4.47098787446505e-05, "loss": 0.0035, "step": 59360 }, { "epoch": 10.59, "learning_rate": 4.470898716119829e-05, "loss": 0.0032, "step": 59370 }, { "epoch": 10.59, "learning_rate": 4.470809557774608e-05, "loss": 0.0033, "step": 59380 }, { "epoch": 10.59, "learning_rate": 4.4707203994293866e-05, "loss": 0.0046, "step": 59390 }, { "epoch": 10.59, "learning_rate": 4.470631241084166e-05, "loss": 0.0041, "step": 59400 }, { "epoch": 10.59, "learning_rate": 4.470542082738944e-05, "loss": 0.0062, "step": 59410 }, { "epoch": 10.6, "learning_rate": 4.470452924393723e-05, "loss": 0.0044, "step": 59420 }, { "epoch": 10.6, "learning_rate": 4.4703637660485024e-05, "loss": 0.0022, "step": 59430 }, { "epoch": 10.6, "learning_rate": 4.4702746077032816e-05, "loss": 0.0058, "step": 59440 }, { "epoch": 10.6, "learning_rate": 4.47018544935806e-05, "loss": 0.004, "step": 59450 }, { "epoch": 10.6, "learning_rate": 4.470096291012839e-05, "loss": 0.0029, "step": 59460 }, { "epoch": 10.6, "learning_rate": 4.470007132667618e-05, "loss": 0.0047, "step": 59470 }, { "epoch": 10.61, "learning_rate": 4.469917974322397e-05, "loss": 0.0031, "step": 59480 }, { "epoch": 10.61, "learning_rate": 4.469828815977176e-05, "loss": 0.0024, "step": 59490 }, { "epoch": 10.61, "learning_rate": 4.469739657631954e-05, "loss": 0.0024, "step": 59500 }, { "epoch": 10.61, "learning_rate": 4.4696504992867334e-05, "loss": 0.0039, "step": 59510 }, { "epoch": 10.61, "learning_rate": 4.469561340941512e-05, "loss": 0.0037, "step": 59520 }, { "epoch": 10.62, "learning_rate": 4.469472182596291e-05, "loss": 0.0039, "step": 59530 }, { "epoch": 10.62, "learning_rate": 4.46938302425107e-05, "loss": 0.0029, "step": 59540 }, { "epoch": 10.62, "learning_rate": 4.469293865905849e-05, "loss": 0.0052, "step": 59550 }, { "epoch": 10.62, "learning_rate": 4.469204707560628e-05, "loss": 0.0044, "step": 59560 }, { "epoch": 10.62, "learning_rate": 4.469115549215407e-05, "loss": 0.0038, "step": 59570 }, { "epoch": 10.62, "learning_rate": 4.469026390870186e-05, "loss": 0.0041, "step": 59580 }, { "epoch": 10.63, "learning_rate": 4.468937232524964e-05, "loss": 0.0042, "step": 59590 }, { "epoch": 10.63, "learning_rate": 4.4688480741797434e-05, "loss": 0.0037, "step": 59600 }, { "epoch": 10.63, "learning_rate": 4.468758915834522e-05, "loss": 0.0039, "step": 59610 }, { "epoch": 10.63, "learning_rate": 4.468669757489301e-05, "loss": 0.0057, "step": 59620 }, { "epoch": 10.63, "learning_rate": 4.46858059914408e-05, "loss": 0.0054, "step": 59630 }, { "epoch": 10.63, "learning_rate": 4.4684914407988586e-05, "loss": 0.0031, "step": 59640 }, { "epoch": 10.64, "learning_rate": 4.4684022824536384e-05, "loss": 0.0051, "step": 59650 }, { "epoch": 10.64, "learning_rate": 4.468313124108417e-05, "loss": 0.0048, "step": 59660 }, { "epoch": 10.64, "learning_rate": 4.468223965763196e-05, "loss": 0.006, "step": 59670 }, { "epoch": 10.64, "learning_rate": 4.4681348074179744e-05, "loss": 0.0037, "step": 59680 }, { "epoch": 10.64, "learning_rate": 4.4680456490727535e-05, "loss": 0.0031, "step": 59690 }, { "epoch": 10.65, "learning_rate": 4.4679564907275326e-05, "loss": 0.0037, "step": 59700 }, { "epoch": 10.65, "learning_rate": 4.467867332382311e-05, "loss": 0.0038, "step": 59710 }, { "epoch": 10.65, "learning_rate": 4.46777817403709e-05, "loss": 0.0052, "step": 59720 }, { "epoch": 10.65, "learning_rate": 4.4676890156918686e-05, "loss": 0.0055, "step": 59730 }, { "epoch": 10.65, "learning_rate": 4.467599857346648e-05, "loss": 0.0086, "step": 59740 }, { "epoch": 10.65, "learning_rate": 4.467510699001426e-05, "loss": 0.004, "step": 59750 }, { "epoch": 10.66, "learning_rate": 4.467421540656206e-05, "loss": 0.0059, "step": 59760 }, { "epoch": 10.66, "learning_rate": 4.4673323823109844e-05, "loss": 0.0034, "step": 59770 }, { "epoch": 10.66, "learning_rate": 4.4672432239657636e-05, "loss": 0.004, "step": 59780 }, { "epoch": 10.66, "learning_rate": 4.467154065620543e-05, "loss": 0.0036, "step": 59790 }, { "epoch": 10.66, "learning_rate": 4.467064907275321e-05, "loss": 0.0029, "step": 59800 }, { "epoch": 10.67, "learning_rate": 4.4669757489301e-05, "loss": 0.0025, "step": 59810 }, { "epoch": 10.67, "learning_rate": 4.466886590584879e-05, "loss": 0.0061, "step": 59820 }, { "epoch": 10.67, "learning_rate": 4.466797432239658e-05, "loss": 0.0026, "step": 59830 }, { "epoch": 10.67, "learning_rate": 4.466708273894436e-05, "loss": 0.0025, "step": 59840 }, { "epoch": 10.67, "learning_rate": 4.4666191155492154e-05, "loss": 0.0018, "step": 59850 }, { "epoch": 10.67, "learning_rate": 4.4665299572039945e-05, "loss": 0.0034, "step": 59860 }, { "epoch": 10.68, "learning_rate": 4.4664407988587736e-05, "loss": 0.0029, "step": 59870 }, { "epoch": 10.68, "learning_rate": 4.466351640513553e-05, "loss": 0.0034, "step": 59880 }, { "epoch": 10.68, "learning_rate": 4.466262482168331e-05, "loss": 0.0019, "step": 59890 }, { "epoch": 10.68, "learning_rate": 4.46617332382311e-05, "loss": 0.0029, "step": 59900 }, { "epoch": 10.68, "learning_rate": 4.466084165477889e-05, "loss": 0.0047, "step": 59910 }, { "epoch": 10.68, "learning_rate": 4.465995007132668e-05, "loss": 0.0045, "step": 59920 }, { "epoch": 10.69, "learning_rate": 4.465905848787447e-05, "loss": 0.0051, "step": 59930 }, { "epoch": 10.69, "learning_rate": 4.4658166904422254e-05, "loss": 0.0059, "step": 59940 }, { "epoch": 10.69, "learning_rate": 4.4657275320970045e-05, "loss": 0.0021, "step": 59950 }, { "epoch": 10.69, "learning_rate": 4.465638373751783e-05, "loss": 0.0047, "step": 59960 }, { "epoch": 10.69, "learning_rate": 4.465549215406562e-05, "loss": 0.0039, "step": 59970 }, { "epoch": 10.7, "learning_rate": 4.465460057061341e-05, "loss": 0.0031, "step": 59980 }, { "epoch": 10.7, "learning_rate": 4.4653708987161203e-05, "loss": 0.0065, "step": 59990 }, { "epoch": 10.7, "learning_rate": 4.465281740370899e-05, "loss": 0.0026, "step": 60000 }, { "epoch": 10.7, "learning_rate": 4.465192582025678e-05, "loss": 0.0034, "step": 60010 }, { "epoch": 10.7, "learning_rate": 4.465103423680457e-05, "loss": 0.0021, "step": 60020 }, { "epoch": 10.7, "learning_rate": 4.4650142653352355e-05, "loss": 0.0047, "step": 60030 }, { "epoch": 10.71, "learning_rate": 4.4649251069900146e-05, "loss": 0.004, "step": 60040 }, { "epoch": 10.71, "learning_rate": 4.464835948644793e-05, "loss": 0.0034, "step": 60050 }, { "epoch": 10.71, "learning_rate": 4.464746790299572e-05, "loss": 0.003, "step": 60060 }, { "epoch": 10.71, "learning_rate": 4.4646576319543506e-05, "loss": 0.0034, "step": 60070 }, { "epoch": 10.71, "learning_rate": 4.46456847360913e-05, "loss": 0.0025, "step": 60080 }, { "epoch": 10.72, "learning_rate": 4.464479315263909e-05, "loss": 0.0048, "step": 60090 }, { "epoch": 10.72, "learning_rate": 4.464390156918688e-05, "loss": 0.0051, "step": 60100 }, { "epoch": 10.72, "learning_rate": 4.464300998573467e-05, "loss": 0.0058, "step": 60110 }, { "epoch": 10.72, "learning_rate": 4.4642118402282455e-05, "loss": 0.0031, "step": 60120 }, { "epoch": 10.72, "learning_rate": 4.4641226818830247e-05, "loss": 0.0058, "step": 60130 }, { "epoch": 10.72, "learning_rate": 4.464033523537803e-05, "loss": 0.0058, "step": 60140 }, { "epoch": 10.73, "learning_rate": 4.463944365192582e-05, "loss": 0.0029, "step": 60150 }, { "epoch": 10.73, "learning_rate": 4.4638552068473613e-05, "loss": 0.0029, "step": 60160 }, { "epoch": 10.73, "learning_rate": 4.46376604850214e-05, "loss": 0.0069, "step": 60170 }, { "epoch": 10.73, "learning_rate": 4.463676890156919e-05, "loss": 0.0035, "step": 60180 }, { "epoch": 10.73, "learning_rate": 4.4635877318116973e-05, "loss": 0.0025, "step": 60190 }, { "epoch": 10.73, "learning_rate": 4.463498573466477e-05, "loss": 0.0077, "step": 60200 }, { "epoch": 10.74, "learning_rate": 4.4634094151212556e-05, "loss": 0.0041, "step": 60210 }, { "epoch": 10.74, "learning_rate": 4.463320256776035e-05, "loss": 0.0057, "step": 60220 }, { "epoch": 10.74, "learning_rate": 4.463231098430813e-05, "loss": 0.0046, "step": 60230 }, { "epoch": 10.74, "learning_rate": 4.463141940085592e-05, "loss": 0.0052, "step": 60240 }, { "epoch": 10.74, "learning_rate": 4.4630527817403714e-05, "loss": 0.0038, "step": 60250 }, { "epoch": 10.75, "learning_rate": 4.46296362339515e-05, "loss": 0.002, "step": 60260 }, { "epoch": 10.75, "learning_rate": 4.462874465049929e-05, "loss": 0.0027, "step": 60270 }, { "epoch": 10.75, "learning_rate": 4.4627853067047074e-05, "loss": 0.0033, "step": 60280 }, { "epoch": 10.75, "learning_rate": 4.4626961483594865e-05, "loss": 0.0034, "step": 60290 }, { "epoch": 10.75, "learning_rate": 4.462606990014265e-05, "loss": 0.0025, "step": 60300 }, { "epoch": 10.75, "learning_rate": 4.462517831669045e-05, "loss": 0.0035, "step": 60310 }, { "epoch": 10.76, "learning_rate": 4.462428673323823e-05, "loss": 0.0038, "step": 60320 }, { "epoch": 10.76, "learning_rate": 4.462339514978602e-05, "loss": 0.008, "step": 60330 }, { "epoch": 10.76, "learning_rate": 4.4622503566333815e-05, "loss": 0.0036, "step": 60340 }, { "epoch": 10.76, "learning_rate": 4.46216119828816e-05, "loss": 0.0015, "step": 60350 }, { "epoch": 10.76, "learning_rate": 4.462072039942939e-05, "loss": 0.0032, "step": 60360 }, { "epoch": 10.76, "learning_rate": 4.4619828815977175e-05, "loss": 0.0034, "step": 60370 }, { "epoch": 10.77, "learning_rate": 4.4618937232524966e-05, "loss": 0.0055, "step": 60380 }, { "epoch": 10.77, "learning_rate": 4.461804564907276e-05, "loss": 0.0024, "step": 60390 }, { "epoch": 10.77, "learning_rate": 4.461715406562054e-05, "loss": 0.0032, "step": 60400 }, { "epoch": 10.77, "learning_rate": 4.461626248216833e-05, "loss": 0.0046, "step": 60410 }, { "epoch": 10.77, "learning_rate": 4.4615370898716124e-05, "loss": 0.0044, "step": 60420 }, { "epoch": 10.78, "learning_rate": 4.4614479315263915e-05, "loss": 0.0081, "step": 60430 }, { "epoch": 10.78, "learning_rate": 4.46135877318117e-05, "loss": 0.006, "step": 60440 }, { "epoch": 10.78, "learning_rate": 4.461269614835949e-05, "loss": 0.0058, "step": 60450 }, { "epoch": 10.78, "learning_rate": 4.4611804564907275e-05, "loss": 0.0075, "step": 60460 }, { "epoch": 10.78, "learning_rate": 4.4610912981455066e-05, "loss": 0.0045, "step": 60470 }, { "epoch": 10.78, "learning_rate": 4.461002139800286e-05, "loss": 0.0054, "step": 60480 }, { "epoch": 10.79, "learning_rate": 4.460912981455064e-05, "loss": 0.004, "step": 60490 }, { "epoch": 10.79, "learning_rate": 4.460823823109843e-05, "loss": 0.0044, "step": 60500 }, { "epoch": 10.79, "learning_rate": 4.460734664764622e-05, "loss": 0.0026, "step": 60510 }, { "epoch": 10.79, "learning_rate": 4.460645506419401e-05, "loss": 0.0026, "step": 60520 }, { "epoch": 10.79, "learning_rate": 4.46055634807418e-05, "loss": 0.0045, "step": 60530 }, { "epoch": 10.8, "learning_rate": 4.460467189728959e-05, "loss": 0.0021, "step": 60540 }, { "epoch": 10.8, "learning_rate": 4.4603780313837376e-05, "loss": 0.0049, "step": 60550 }, { "epoch": 10.8, "learning_rate": 4.460288873038517e-05, "loss": 0.0025, "step": 60560 }, { "epoch": 10.8, "learning_rate": 4.460199714693296e-05, "loss": 0.0025, "step": 60570 }, { "epoch": 10.8, "learning_rate": 4.460110556348074e-05, "loss": 0.0032, "step": 60580 }, { "epoch": 10.8, "learning_rate": 4.4600213980028534e-05, "loss": 0.0038, "step": 60590 }, { "epoch": 10.81, "learning_rate": 4.459932239657632e-05, "loss": 0.0041, "step": 60600 }, { "epoch": 10.81, "learning_rate": 4.459843081312411e-05, "loss": 0.0069, "step": 60610 }, { "epoch": 10.81, "learning_rate": 4.45975392296719e-05, "loss": 0.0037, "step": 60620 }, { "epoch": 10.81, "learning_rate": 4.4596647646219685e-05, "loss": 0.0062, "step": 60630 }, { "epoch": 10.81, "learning_rate": 4.459575606276748e-05, "loss": 0.0035, "step": 60640 }, { "epoch": 10.81, "learning_rate": 4.459486447931527e-05, "loss": 0.0023, "step": 60650 }, { "epoch": 10.82, "learning_rate": 4.459397289586306e-05, "loss": 0.0044, "step": 60660 }, { "epoch": 10.82, "learning_rate": 4.459308131241084e-05, "loss": 0.004, "step": 60670 }, { "epoch": 10.82, "learning_rate": 4.4592189728958634e-05, "loss": 0.0045, "step": 60680 }, { "epoch": 10.82, "learning_rate": 4.459129814550642e-05, "loss": 0.0047, "step": 60690 }, { "epoch": 10.82, "learning_rate": 4.459040656205421e-05, "loss": 0.0061, "step": 60700 }, { "epoch": 10.83, "learning_rate": 4.4589514978602e-05, "loss": 0.0027, "step": 60710 }, { "epoch": 10.83, "learning_rate": 4.4588623395149786e-05, "loss": 0.0082, "step": 60720 }, { "epoch": 10.83, "learning_rate": 4.458773181169758e-05, "loss": 0.0025, "step": 60730 }, { "epoch": 10.83, "learning_rate": 4.458684022824536e-05, "loss": 0.0046, "step": 60740 }, { "epoch": 10.83, "learning_rate": 4.458594864479316e-05, "loss": 0.0032, "step": 60750 }, { "epoch": 10.83, "learning_rate": 4.4585057061340944e-05, "loss": 0.0052, "step": 60760 }, { "epoch": 10.84, "learning_rate": 4.4584165477888735e-05, "loss": 0.004, "step": 60770 }, { "epoch": 10.84, "learning_rate": 4.458327389443652e-05, "loss": 0.0062, "step": 60780 }, { "epoch": 10.84, "learning_rate": 4.458238231098431e-05, "loss": 0.0038, "step": 60790 }, { "epoch": 10.84, "learning_rate": 4.45814907275321e-05, "loss": 0.0025, "step": 60800 }, { "epoch": 10.84, "learning_rate": 4.4580599144079886e-05, "loss": 0.0036, "step": 60810 }, { "epoch": 10.85, "learning_rate": 4.457970756062768e-05, "loss": 0.0031, "step": 60820 }, { "epoch": 10.85, "learning_rate": 4.457881597717546e-05, "loss": 0.0041, "step": 60830 }, { "epoch": 10.85, "learning_rate": 4.457792439372325e-05, "loss": 0.0085, "step": 60840 }, { "epoch": 10.85, "learning_rate": 4.4577032810271044e-05, "loss": 0.003, "step": 60850 }, { "epoch": 10.85, "learning_rate": 4.4576141226818835e-05, "loss": 0.0028, "step": 60860 }, { "epoch": 10.85, "learning_rate": 4.457524964336663e-05, "loss": 0.0045, "step": 60870 }, { "epoch": 10.86, "learning_rate": 4.457435805991441e-05, "loss": 0.0044, "step": 60880 }, { "epoch": 10.86, "learning_rate": 4.45734664764622e-05, "loss": 0.0048, "step": 60890 }, { "epoch": 10.86, "learning_rate": 4.457257489300999e-05, "loss": 0.0031, "step": 60900 }, { "epoch": 10.86, "learning_rate": 4.457168330955778e-05, "loss": 0.0015, "step": 60910 }, { "epoch": 10.86, "learning_rate": 4.457079172610556e-05, "loss": 0.0039, "step": 60920 }, { "epoch": 10.86, "learning_rate": 4.4569900142653354e-05, "loss": 0.0044, "step": 60930 }, { "epoch": 10.87, "learning_rate": 4.4569008559201145e-05, "loss": 0.0031, "step": 60940 }, { "epoch": 10.87, "learning_rate": 4.456811697574893e-05, "loss": 0.0039, "step": 60950 }, { "epoch": 10.87, "learning_rate": 4.456722539229672e-05, "loss": 0.0037, "step": 60960 }, { "epoch": 10.87, "learning_rate": 4.456633380884451e-05, "loss": 0.0031, "step": 60970 }, { "epoch": 10.87, "learning_rate": 4.45654422253923e-05, "loss": 0.0032, "step": 60980 }, { "epoch": 10.88, "learning_rate": 4.456455064194009e-05, "loss": 0.0039, "step": 60990 }, { "epoch": 10.88, "learning_rate": 4.456365905848788e-05, "loss": 0.0045, "step": 61000 }, { "epoch": 10.88, "learning_rate": 4.456276747503566e-05, "loss": 0.0044, "step": 61010 }, { "epoch": 10.88, "learning_rate": 4.4561875891583454e-05, "loss": 0.0035, "step": 61020 }, { "epoch": 10.88, "learning_rate": 4.4560984308131245e-05, "loss": 0.0035, "step": 61030 }, { "epoch": 10.88, "learning_rate": 4.456009272467903e-05, "loss": 0.0023, "step": 61040 }, { "epoch": 10.89, "learning_rate": 4.455920114122682e-05, "loss": 0.0031, "step": 61050 }, { "epoch": 10.89, "learning_rate": 4.4558309557774605e-05, "loss": 0.0032, "step": 61060 }, { "epoch": 10.89, "learning_rate": 4.45574179743224e-05, "loss": 0.0044, "step": 61070 }, { "epoch": 10.89, "learning_rate": 4.455652639087019e-05, "loss": 0.0038, "step": 61080 }, { "epoch": 10.89, "learning_rate": 4.455563480741798e-05, "loss": 0.0066, "step": 61090 }, { "epoch": 10.9, "learning_rate": 4.455474322396577e-05, "loss": 0.0039, "step": 61100 }, { "epoch": 10.9, "learning_rate": 4.4553851640513555e-05, "loss": 0.0054, "step": 61110 }, { "epoch": 10.9, "learning_rate": 4.4552960057061346e-05, "loss": 0.0044, "step": 61120 }, { "epoch": 10.9, "learning_rate": 4.455206847360913e-05, "loss": 0.0025, "step": 61130 }, { "epoch": 10.9, "learning_rate": 4.455117689015692e-05, "loss": 0.003, "step": 61140 }, { "epoch": 10.9, "learning_rate": 4.4550285306704706e-05, "loss": 0.002, "step": 61150 }, { "epoch": 10.91, "learning_rate": 4.45493937232525e-05, "loss": 0.0044, "step": 61160 }, { "epoch": 10.91, "learning_rate": 4.454850213980029e-05, "loss": 0.0058, "step": 61170 }, { "epoch": 10.91, "learning_rate": 4.454761055634807e-05, "loss": 0.0026, "step": 61180 }, { "epoch": 10.91, "learning_rate": 4.454671897289587e-05, "loss": 0.0064, "step": 61190 }, { "epoch": 10.91, "learning_rate": 4.4545827389443655e-05, "loss": 0.0041, "step": 61200 }, { "epoch": 10.91, "learning_rate": 4.4544935805991447e-05, "loss": 0.0041, "step": 61210 }, { "epoch": 10.92, "learning_rate": 4.454404422253923e-05, "loss": 0.0074, "step": 61220 }, { "epoch": 10.92, "learning_rate": 4.454315263908702e-05, "loss": 0.0064, "step": 61230 }, { "epoch": 10.92, "learning_rate": 4.4542261055634807e-05, "loss": 0.0062, "step": 61240 }, { "epoch": 10.92, "learning_rate": 4.45413694721826e-05, "loss": 0.0056, "step": 61250 }, { "epoch": 10.92, "learning_rate": 4.454047788873039e-05, "loss": 0.0031, "step": 61260 }, { "epoch": 10.93, "learning_rate": 4.4539586305278173e-05, "loss": 0.0029, "step": 61270 }, { "epoch": 10.93, "learning_rate": 4.4538694721825965e-05, "loss": 0.0041, "step": 61280 }, { "epoch": 10.93, "learning_rate": 4.453780313837375e-05, "loss": 0.0026, "step": 61290 }, { "epoch": 10.93, "learning_rate": 4.453691155492155e-05, "loss": 0.0034, "step": 61300 }, { "epoch": 10.93, "learning_rate": 4.453601997146933e-05, "loss": 0.0051, "step": 61310 }, { "epoch": 10.93, "learning_rate": 4.453512838801712e-05, "loss": 0.0042, "step": 61320 }, { "epoch": 10.94, "learning_rate": 4.4534236804564914e-05, "loss": 0.0063, "step": 61330 }, { "epoch": 10.94, "learning_rate": 4.45333452211127e-05, "loss": 0.0052, "step": 61340 }, { "epoch": 10.94, "learning_rate": 4.453245363766049e-05, "loss": 0.0044, "step": 61350 }, { "epoch": 10.94, "learning_rate": 4.4531562054208274e-05, "loss": 0.0056, "step": 61360 }, { "epoch": 10.94, "learning_rate": 4.4530670470756065e-05, "loss": 0.0052, "step": 61370 }, { "epoch": 10.95, "learning_rate": 4.452977888730385e-05, "loss": 0.0038, "step": 61380 }, { "epoch": 10.95, "learning_rate": 4.452888730385164e-05, "loss": 0.0034, "step": 61390 }, { "epoch": 10.95, "learning_rate": 4.452799572039943e-05, "loss": 0.0051, "step": 61400 }, { "epoch": 10.95, "learning_rate": 4.452710413694722e-05, "loss": 0.006, "step": 61410 }, { "epoch": 10.95, "learning_rate": 4.4526212553495014e-05, "loss": 0.0036, "step": 61420 }, { "epoch": 10.95, "learning_rate": 4.45253209700428e-05, "loss": 0.0032, "step": 61430 }, { "epoch": 10.96, "learning_rate": 4.452442938659059e-05, "loss": 0.004, "step": 61440 }, { "epoch": 10.96, "learning_rate": 4.4523537803138375e-05, "loss": 0.0031, "step": 61450 }, { "epoch": 10.96, "learning_rate": 4.4522646219686166e-05, "loss": 0.0039, "step": 61460 }, { "epoch": 10.96, "learning_rate": 4.452175463623395e-05, "loss": 0.0047, "step": 61470 }, { "epoch": 10.96, "learning_rate": 4.452086305278174e-05, "loss": 0.004, "step": 61480 }, { "epoch": 10.96, "learning_rate": 4.451997146932953e-05, "loss": 0.0051, "step": 61490 }, { "epoch": 10.97, "learning_rate": 4.451907988587732e-05, "loss": 0.0014, "step": 61500 }, { "epoch": 10.97, "learning_rate": 4.451818830242511e-05, "loss": 0.0068, "step": 61510 }, { "epoch": 10.97, "learning_rate": 4.45172967189729e-05, "loss": 0.004, "step": 61520 }, { "epoch": 10.97, "learning_rate": 4.451640513552069e-05, "loss": 0.0088, "step": 61530 }, { "epoch": 10.97, "learning_rate": 4.4515513552068475e-05, "loss": 0.0033, "step": 61540 }, { "epoch": 10.98, "learning_rate": 4.4514621968616266e-05, "loss": 0.0042, "step": 61550 }, { "epoch": 10.98, "learning_rate": 4.451373038516405e-05, "loss": 0.0048, "step": 61560 }, { "epoch": 10.98, "learning_rate": 4.451283880171184e-05, "loss": 0.0024, "step": 61570 }, { "epoch": 10.98, "learning_rate": 4.451194721825963e-05, "loss": 0.004, "step": 61580 }, { "epoch": 10.98, "learning_rate": 4.451105563480742e-05, "loss": 0.0043, "step": 61590 }, { "epoch": 10.98, "learning_rate": 4.451016405135521e-05, "loss": 0.0053, "step": 61600 }, { "epoch": 10.99, "learning_rate": 4.450927246790299e-05, "loss": 0.0031, "step": 61610 }, { "epoch": 10.99, "learning_rate": 4.4508380884450784e-05, "loss": 0.0054, "step": 61620 }, { "epoch": 10.99, "learning_rate": 4.4507489300998576e-05, "loss": 0.0033, "step": 61630 }, { "epoch": 10.99, "learning_rate": 4.450659771754637e-05, "loss": 0.004, "step": 61640 }, { "epoch": 10.99, "learning_rate": 4.450570613409416e-05, "loss": 0.0046, "step": 61650 }, { "epoch": 11.0, "learning_rate": 4.450481455064194e-05, "loss": 0.0055, "step": 61660 }, { "epoch": 11.0, "learning_rate": 4.4503922967189734e-05, "loss": 0.0034, "step": 61670 }, { "epoch": 11.0, "learning_rate": 4.450303138373752e-05, "loss": 0.0049, "step": 61680 }, { "epoch": 11.0, "eval_loss": 0.018656417727470398, "eval_runtime": 195.9206, "eval_samples_per_second": 23.678, "eval_steps_per_second": 2.96, "step": 61688 }, { "epoch": 11.0, "learning_rate": 4.450213980028531e-05, "loss": 0.0024, "step": 61690 }, { "epoch": 11.0, "learning_rate": 4.4501248216833094e-05, "loss": 0.0022, "step": 61700 }, { "epoch": 11.0, "learning_rate": 4.4500356633380885e-05, "loss": 0.0028, "step": 61710 }, { "epoch": 11.01, "learning_rate": 4.4499465049928676e-05, "loss": 0.0021, "step": 61720 }, { "epoch": 11.01, "learning_rate": 4.449857346647646e-05, "loss": 0.0048, "step": 61730 }, { "epoch": 11.01, "learning_rate": 4.449768188302426e-05, "loss": 0.0034, "step": 61740 }, { "epoch": 11.01, "learning_rate": 4.449679029957204e-05, "loss": 0.0031, "step": 61750 }, { "epoch": 11.01, "learning_rate": 4.4495898716119834e-05, "loss": 0.0042, "step": 61760 }, { "epoch": 11.01, "learning_rate": 4.449500713266762e-05, "loss": 0.0045, "step": 61770 }, { "epoch": 11.02, "learning_rate": 4.449411554921541e-05, "loss": 0.0026, "step": 61780 }, { "epoch": 11.02, "learning_rate": 4.4493223965763194e-05, "loss": 0.004, "step": 61790 }, { "epoch": 11.02, "learning_rate": 4.4492332382310986e-05, "loss": 0.005, "step": 61800 }, { "epoch": 11.02, "learning_rate": 4.449144079885878e-05, "loss": 0.0025, "step": 61810 }, { "epoch": 11.02, "learning_rate": 4.449054921540656e-05, "loss": 0.0056, "step": 61820 }, { "epoch": 11.03, "learning_rate": 4.448965763195435e-05, "loss": 0.0046, "step": 61830 }, { "epoch": 11.03, "learning_rate": 4.448876604850214e-05, "loss": 0.0039, "step": 61840 }, { "epoch": 11.03, "learning_rate": 4.4487874465049935e-05, "loss": 0.003, "step": 61850 }, { "epoch": 11.03, "learning_rate": 4.448698288159772e-05, "loss": 0.006, "step": 61860 }, { "epoch": 11.03, "learning_rate": 4.448609129814551e-05, "loss": 0.0031, "step": 61870 }, { "epoch": 11.03, "learning_rate": 4.44851997146933e-05, "loss": 0.0028, "step": 61880 }, { "epoch": 11.04, "learning_rate": 4.4484308131241086e-05, "loss": 0.0047, "step": 61890 }, { "epoch": 11.04, "learning_rate": 4.448341654778888e-05, "loss": 0.007, "step": 61900 }, { "epoch": 11.04, "learning_rate": 4.448252496433666e-05, "loss": 0.0035, "step": 61910 }, { "epoch": 11.04, "learning_rate": 4.448163338088445e-05, "loss": 0.0051, "step": 61920 }, { "epoch": 11.04, "learning_rate": 4.448074179743224e-05, "loss": 0.003, "step": 61930 }, { "epoch": 11.04, "learning_rate": 4.447985021398003e-05, "loss": 0.0036, "step": 61940 }, { "epoch": 11.05, "learning_rate": 4.447895863052782e-05, "loss": 0.0057, "step": 61950 }, { "epoch": 11.05, "learning_rate": 4.447806704707561e-05, "loss": 0.0035, "step": 61960 }, { "epoch": 11.05, "learning_rate": 4.44771754636234e-05, "loss": 0.0029, "step": 61970 }, { "epoch": 11.05, "learning_rate": 4.447628388017119e-05, "loss": 0.0036, "step": 61980 }, { "epoch": 11.05, "learning_rate": 4.447539229671898e-05, "loss": 0.0024, "step": 61990 }, { "epoch": 11.06, "learning_rate": 4.447450071326676e-05, "loss": 0.0029, "step": 62000 }, { "epoch": 11.06, "learning_rate": 4.4473609129814554e-05, "loss": 0.003, "step": 62010 }, { "epoch": 11.06, "learning_rate": 4.447271754636234e-05, "loss": 0.0035, "step": 62020 }, { "epoch": 11.06, "learning_rate": 4.447182596291013e-05, "loss": 0.0052, "step": 62030 }, { "epoch": 11.06, "learning_rate": 4.447093437945792e-05, "loss": 0.0036, "step": 62040 }, { "epoch": 11.06, "learning_rate": 4.4470042796005705e-05, "loss": 0.0033, "step": 62050 }, { "epoch": 11.07, "learning_rate": 4.4469151212553496e-05, "loss": 0.0039, "step": 62060 }, { "epoch": 11.07, "learning_rate": 4.446825962910129e-05, "loss": 0.0024, "step": 62070 }, { "epoch": 11.07, "learning_rate": 4.446736804564908e-05, "loss": 0.0051, "step": 62080 }, { "epoch": 11.07, "learning_rate": 4.446647646219686e-05, "loss": 0.003, "step": 62090 }, { "epoch": 11.07, "learning_rate": 4.4465584878744654e-05, "loss": 0.0026, "step": 62100 }, { "epoch": 11.08, "learning_rate": 4.4464693295292445e-05, "loss": 0.006, "step": 62110 }, { "epoch": 11.08, "learning_rate": 4.446380171184023e-05, "loss": 0.0026, "step": 62120 }, { "epoch": 11.08, "learning_rate": 4.446291012838802e-05, "loss": 0.0045, "step": 62130 }, { "epoch": 11.08, "learning_rate": 4.4462018544935805e-05, "loss": 0.0054, "step": 62140 }, { "epoch": 11.08, "learning_rate": 4.44611269614836e-05, "loss": 0.0025, "step": 62150 }, { "epoch": 11.08, "learning_rate": 4.446023537803138e-05, "loss": 0.0036, "step": 62160 }, { "epoch": 11.09, "learning_rate": 4.445934379457917e-05, "loss": 0.0084, "step": 62170 }, { "epoch": 11.09, "learning_rate": 4.4458452211126963e-05, "loss": 0.0021, "step": 62180 }, { "epoch": 11.09, "learning_rate": 4.4457560627674755e-05, "loss": 0.0035, "step": 62190 }, { "epoch": 11.09, "learning_rate": 4.4456669044222546e-05, "loss": 0.0037, "step": 62200 }, { "epoch": 11.09, "learning_rate": 4.445577746077033e-05, "loss": 0.0038, "step": 62210 }, { "epoch": 11.09, "learning_rate": 4.445488587731812e-05, "loss": 0.0044, "step": 62220 }, { "epoch": 11.1, "learning_rate": 4.4453994293865906e-05, "loss": 0.0059, "step": 62230 }, { "epoch": 11.1, "learning_rate": 4.44531027104137e-05, "loss": 0.0028, "step": 62240 }, { "epoch": 11.1, "learning_rate": 4.445221112696148e-05, "loss": 0.0021, "step": 62250 }, { "epoch": 11.1, "learning_rate": 4.445131954350927e-05, "loss": 0.006, "step": 62260 }, { "epoch": 11.1, "learning_rate": 4.4450427960057064e-05, "loss": 0.0034, "step": 62270 }, { "epoch": 11.11, "learning_rate": 4.444953637660485e-05, "loss": 0.0034, "step": 62280 }, { "epoch": 11.11, "learning_rate": 4.4448644793152646e-05, "loss": 0.0017, "step": 62290 }, { "epoch": 11.11, "learning_rate": 4.444775320970043e-05, "loss": 0.0046, "step": 62300 }, { "epoch": 11.11, "learning_rate": 4.444686162624822e-05, "loss": 0.002, "step": 62310 }, { "epoch": 11.11, "learning_rate": 4.4445970042796007e-05, "loss": 0.0037, "step": 62320 }, { "epoch": 11.11, "learning_rate": 4.44450784593438e-05, "loss": 0.0042, "step": 62330 }, { "epoch": 11.12, "learning_rate": 4.444418687589159e-05, "loss": 0.003, "step": 62340 }, { "epoch": 11.12, "learning_rate": 4.4443295292439373e-05, "loss": 0.0033, "step": 62350 }, { "epoch": 11.12, "learning_rate": 4.4442403708987165e-05, "loss": 0.0044, "step": 62360 }, { "epoch": 11.12, "learning_rate": 4.444151212553495e-05, "loss": 0.0032, "step": 62370 }, { "epoch": 11.12, "learning_rate": 4.444062054208274e-05, "loss": 0.005, "step": 62380 }, { "epoch": 11.13, "learning_rate": 4.4439728958630525e-05, "loss": 0.0069, "step": 62390 }, { "epoch": 11.13, "learning_rate": 4.443883737517832e-05, "loss": 0.0038, "step": 62400 }, { "epoch": 11.13, "learning_rate": 4.443794579172611e-05, "loss": 0.0042, "step": 62410 }, { "epoch": 11.13, "learning_rate": 4.44370542082739e-05, "loss": 0.0045, "step": 62420 }, { "epoch": 11.13, "learning_rate": 4.443616262482169e-05, "loss": 0.0026, "step": 62430 }, { "epoch": 11.13, "learning_rate": 4.4435271041369474e-05, "loss": 0.0031, "step": 62440 }, { "epoch": 11.14, "learning_rate": 4.4434379457917265e-05, "loss": 0.0037, "step": 62450 }, { "epoch": 11.14, "learning_rate": 4.443348787446505e-05, "loss": 0.0022, "step": 62460 }, { "epoch": 11.14, "learning_rate": 4.443259629101284e-05, "loss": 0.003, "step": 62470 }, { "epoch": 11.14, "learning_rate": 4.4431704707560625e-05, "loss": 0.0038, "step": 62480 }, { "epoch": 11.14, "learning_rate": 4.4430813124108416e-05, "loss": 0.0045, "step": 62490 }, { "epoch": 11.14, "learning_rate": 4.442992154065621e-05, "loss": 0.0031, "step": 62500 }, { "epoch": 11.15, "learning_rate": 4.4429029957204e-05, "loss": 0.003, "step": 62510 }, { "epoch": 11.15, "learning_rate": 4.442813837375179e-05, "loss": 0.0021, "step": 62520 }, { "epoch": 11.15, "learning_rate": 4.4427246790299575e-05, "loss": 0.0052, "step": 62530 }, { "epoch": 11.15, "learning_rate": 4.4426355206847366e-05, "loss": 0.003, "step": 62540 }, { "epoch": 11.15, "learning_rate": 4.442546362339515e-05, "loss": 0.0042, "step": 62550 }, { "epoch": 11.16, "learning_rate": 4.442457203994294e-05, "loss": 0.0071, "step": 62560 }, { "epoch": 11.16, "learning_rate": 4.442368045649073e-05, "loss": 0.0032, "step": 62570 }, { "epoch": 11.16, "learning_rate": 4.442278887303852e-05, "loss": 0.0068, "step": 62580 }, { "epoch": 11.16, "learning_rate": 4.442189728958631e-05, "loss": 0.0079, "step": 62590 }, { "epoch": 11.16, "learning_rate": 4.442100570613409e-05, "loss": 0.0033, "step": 62600 }, { "epoch": 11.16, "learning_rate": 4.4420114122681884e-05, "loss": 0.0032, "step": 62610 }, { "epoch": 11.17, "learning_rate": 4.441922253922967e-05, "loss": 0.0046, "step": 62620 }, { "epoch": 11.17, "learning_rate": 4.4418330955777466e-05, "loss": 0.0033, "step": 62630 }, { "epoch": 11.17, "learning_rate": 4.441743937232525e-05, "loss": 0.0028, "step": 62640 }, { "epoch": 11.17, "learning_rate": 4.441654778887304e-05, "loss": 0.0014, "step": 62650 }, { "epoch": 11.17, "learning_rate": 4.441565620542083e-05, "loss": 0.0042, "step": 62660 }, { "epoch": 11.18, "learning_rate": 4.441476462196862e-05, "loss": 0.0051, "step": 62670 }, { "epoch": 11.18, "learning_rate": 4.441387303851641e-05, "loss": 0.0023, "step": 62680 }, { "epoch": 11.18, "learning_rate": 4.441298145506419e-05, "loss": 0.0052, "step": 62690 }, { "epoch": 11.18, "learning_rate": 4.4412089871611984e-05, "loss": 0.0055, "step": 62700 }, { "epoch": 11.18, "learning_rate": 4.441119828815977e-05, "loss": 0.0037, "step": 62710 }, { "epoch": 11.18, "learning_rate": 4.441030670470756e-05, "loss": 0.0028, "step": 62720 }, { "epoch": 11.19, "learning_rate": 4.440941512125535e-05, "loss": 0.0032, "step": 62730 }, { "epoch": 11.19, "learning_rate": 4.440852353780314e-05, "loss": 0.0037, "step": 62740 }, { "epoch": 11.19, "learning_rate": 4.4407631954350934e-05, "loss": 0.0035, "step": 62750 }, { "epoch": 11.19, "learning_rate": 4.440674037089872e-05, "loss": 0.0041, "step": 62760 }, { "epoch": 11.19, "learning_rate": 4.440584878744651e-05, "loss": 0.0032, "step": 62770 }, { "epoch": 11.19, "learning_rate": 4.4404957203994294e-05, "loss": 0.0034, "step": 62780 }, { "epoch": 11.2, "learning_rate": 4.4404065620542085e-05, "loss": 0.0063, "step": 62790 }, { "epoch": 11.2, "learning_rate": 4.4403174037089876e-05, "loss": 0.0026, "step": 62800 }, { "epoch": 11.2, "learning_rate": 4.440228245363766e-05, "loss": 0.0017, "step": 62810 }, { "epoch": 11.2, "learning_rate": 4.440139087018545e-05, "loss": 0.0015, "step": 62820 }, { "epoch": 11.2, "learning_rate": 4.4400499286733236e-05, "loss": 0.0045, "step": 62830 }, { "epoch": 11.21, "learning_rate": 4.439960770328103e-05, "loss": 0.002, "step": 62840 }, { "epoch": 11.21, "learning_rate": 4.439871611982882e-05, "loss": 0.0019, "step": 62850 }, { "epoch": 11.21, "learning_rate": 4.439782453637661e-05, "loss": 0.0029, "step": 62860 }, { "epoch": 11.21, "learning_rate": 4.4396932952924394e-05, "loss": 0.004, "step": 62870 }, { "epoch": 11.21, "learning_rate": 4.4396041369472186e-05, "loss": 0.0035, "step": 62880 }, { "epoch": 11.21, "learning_rate": 4.439514978601998e-05, "loss": 0.0029, "step": 62890 }, { "epoch": 11.22, "learning_rate": 4.439425820256776e-05, "loss": 0.0031, "step": 62900 }, { "epoch": 11.22, "learning_rate": 4.439336661911555e-05, "loss": 0.0031, "step": 62910 }, { "epoch": 11.22, "learning_rate": 4.439247503566334e-05, "loss": 0.0034, "step": 62920 }, { "epoch": 11.22, "learning_rate": 4.439158345221113e-05, "loss": 0.0062, "step": 62930 }, { "epoch": 11.22, "learning_rate": 4.439069186875891e-05, "loss": 0.003, "step": 62940 }, { "epoch": 11.23, "learning_rate": 4.4389800285306704e-05, "loss": 0.0024, "step": 62950 }, { "epoch": 11.23, "learning_rate": 4.4388908701854495e-05, "loss": 0.0038, "step": 62960 }, { "epoch": 11.23, "learning_rate": 4.4388017118402286e-05, "loss": 0.0025, "step": 62970 }, { "epoch": 11.23, "learning_rate": 4.438712553495008e-05, "loss": 0.004, "step": 62980 }, { "epoch": 11.23, "learning_rate": 4.438623395149786e-05, "loss": 0.0043, "step": 62990 }, { "epoch": 11.23, "learning_rate": 4.438534236804565e-05, "loss": 0.0031, "step": 63000 }, { "epoch": 11.24, "learning_rate": 4.438453994293866e-05, "loss": 0.0029, "step": 63010 }, { "epoch": 11.24, "learning_rate": 4.438364835948645e-05, "loss": 0.0057, "step": 63020 }, { "epoch": 11.24, "learning_rate": 4.438275677603424e-05, "loss": 0.0038, "step": 63030 }, { "epoch": 11.24, "learning_rate": 4.438186519258203e-05, "loss": 0.0038, "step": 63040 }, { "epoch": 11.24, "learning_rate": 4.438097360912982e-05, "loss": 0.0052, "step": 63050 }, { "epoch": 11.24, "learning_rate": 4.4380082025677603e-05, "loss": 0.0041, "step": 63060 }, { "epoch": 11.25, "learning_rate": 4.4379190442225395e-05, "loss": 0.0035, "step": 63070 }, { "epoch": 11.25, "learning_rate": 4.437829885877318e-05, "loss": 0.0049, "step": 63080 }, { "epoch": 11.25, "learning_rate": 4.437740727532097e-05, "loss": 0.0018, "step": 63090 }, { "epoch": 11.25, "learning_rate": 4.437651569186876e-05, "loss": 0.0061, "step": 63100 }, { "epoch": 11.25, "learning_rate": 4.437562410841655e-05, "loss": 0.0041, "step": 63110 }, { "epoch": 11.26, "learning_rate": 4.4374732524964344e-05, "loss": 0.0041, "step": 63120 }, { "epoch": 11.26, "learning_rate": 4.437384094151213e-05, "loss": 0.0034, "step": 63130 }, { "epoch": 11.26, "learning_rate": 4.437294935805992e-05, "loss": 0.0052, "step": 63140 }, { "epoch": 11.26, "learning_rate": 4.4372057774607704e-05, "loss": 0.0023, "step": 63150 }, { "epoch": 11.26, "learning_rate": 4.4371166191155495e-05, "loss": 0.0035, "step": 63160 }, { "epoch": 11.26, "learning_rate": 4.437027460770328e-05, "loss": 0.0032, "step": 63170 }, { "epoch": 11.27, "learning_rate": 4.436938302425107e-05, "loss": 0.0037, "step": 63180 }, { "epoch": 11.27, "learning_rate": 4.436849144079886e-05, "loss": 0.0043, "step": 63190 }, { "epoch": 11.27, "learning_rate": 4.4367599857346647e-05, "loss": 0.0064, "step": 63200 }, { "epoch": 11.27, "learning_rate": 4.436670827389444e-05, "loss": 0.003, "step": 63210 }, { "epoch": 11.27, "learning_rate": 4.436581669044223e-05, "loss": 0.0016, "step": 63220 }, { "epoch": 11.27, "learning_rate": 4.436492510699002e-05, "loss": 0.0033, "step": 63230 }, { "epoch": 11.28, "learning_rate": 4.4364033523537805e-05, "loss": 0.007, "step": 63240 }, { "epoch": 11.28, "learning_rate": 4.4363141940085596e-05, "loss": 0.0063, "step": 63250 }, { "epoch": 11.28, "learning_rate": 4.436225035663338e-05, "loss": 0.0019, "step": 63260 }, { "epoch": 11.28, "learning_rate": 4.436135877318117e-05, "loss": 0.0041, "step": 63270 }, { "epoch": 11.28, "learning_rate": 4.436046718972896e-05, "loss": 0.008, "step": 63280 }, { "epoch": 11.29, "learning_rate": 4.435957560627675e-05, "loss": 0.0024, "step": 63290 }, { "epoch": 11.29, "learning_rate": 4.435868402282454e-05, "loss": 0.0042, "step": 63300 }, { "epoch": 11.29, "learning_rate": 4.435779243937232e-05, "loss": 0.0052, "step": 63310 }, { "epoch": 11.29, "learning_rate": 4.4356900855920114e-05, "loss": 0.0041, "step": 63320 }, { "epoch": 11.29, "learning_rate": 4.4356009272467905e-05, "loss": 0.0072, "step": 63330 }, { "epoch": 11.29, "learning_rate": 4.4355117689015696e-05, "loss": 0.0055, "step": 63340 }, { "epoch": 11.3, "learning_rate": 4.435422610556349e-05, "loss": 0.006, "step": 63350 }, { "epoch": 11.3, "learning_rate": 4.435333452211127e-05, "loss": 0.0035, "step": 63360 }, { "epoch": 11.3, "learning_rate": 4.435244293865906e-05, "loss": 0.003, "step": 63370 }, { "epoch": 11.3, "learning_rate": 4.435155135520685e-05, "loss": 0.0049, "step": 63380 }, { "epoch": 11.3, "learning_rate": 4.435065977175464e-05, "loss": 0.003, "step": 63390 }, { "epoch": 11.31, "learning_rate": 4.434976818830242e-05, "loss": 0.0028, "step": 63400 }, { "epoch": 11.31, "learning_rate": 4.4348876604850215e-05, "loss": 0.0051, "step": 63410 }, { "epoch": 11.31, "learning_rate": 4.4347985021398006e-05, "loss": 0.003, "step": 63420 }, { "epoch": 11.31, "learning_rate": 4.434709343794579e-05, "loss": 0.0024, "step": 63430 }, { "epoch": 11.31, "learning_rate": 4.434620185449359e-05, "loss": 0.0021, "step": 63440 }, { "epoch": 11.31, "learning_rate": 4.434531027104137e-05, "loss": 0.0037, "step": 63450 }, { "epoch": 11.32, "learning_rate": 4.4344418687589164e-05, "loss": 0.0033, "step": 63460 }, { "epoch": 11.32, "learning_rate": 4.434352710413695e-05, "loss": 0.0034, "step": 63470 }, { "epoch": 11.32, "learning_rate": 4.434263552068474e-05, "loss": 0.0044, "step": 63480 }, { "epoch": 11.32, "learning_rate": 4.4341743937232524e-05, "loss": 0.0023, "step": 63490 }, { "epoch": 11.32, "learning_rate": 4.4340852353780315e-05, "loss": 0.0026, "step": 63500 }, { "epoch": 11.32, "learning_rate": 4.4339960770328106e-05, "loss": 0.0045, "step": 63510 }, { "epoch": 11.33, "learning_rate": 4.433906918687589e-05, "loss": 0.0028, "step": 63520 }, { "epoch": 11.33, "learning_rate": 4.433817760342368e-05, "loss": 0.0035, "step": 63530 }, { "epoch": 11.33, "learning_rate": 4.4337286019971466e-05, "loss": 0.0022, "step": 63540 }, { "epoch": 11.33, "learning_rate": 4.4336394436519264e-05, "loss": 0.0037, "step": 63550 }, { "epoch": 11.33, "learning_rate": 4.433550285306705e-05, "loss": 0.0086, "step": 63560 }, { "epoch": 11.34, "learning_rate": 4.433461126961484e-05, "loss": 0.0078, "step": 63570 }, { "epoch": 11.34, "learning_rate": 4.433371968616263e-05, "loss": 0.0037, "step": 63580 }, { "epoch": 11.34, "learning_rate": 4.4332828102710416e-05, "loss": 0.006, "step": 63590 }, { "epoch": 11.34, "learning_rate": 4.433193651925821e-05, "loss": 0.003, "step": 63600 }, { "epoch": 11.34, "learning_rate": 4.433104493580599e-05, "loss": 0.0038, "step": 63610 }, { "epoch": 11.34, "learning_rate": 4.433015335235378e-05, "loss": 0.0028, "step": 63620 }, { "epoch": 11.35, "learning_rate": 4.432926176890157e-05, "loss": 0.0048, "step": 63630 }, { "epoch": 11.35, "learning_rate": 4.432837018544936e-05, "loss": 0.0029, "step": 63640 }, { "epoch": 11.35, "learning_rate": 4.432747860199715e-05, "loss": 0.0038, "step": 63650 }, { "epoch": 11.35, "learning_rate": 4.432658701854494e-05, "loss": 0.0024, "step": 63660 }, { "epoch": 11.35, "learning_rate": 4.432569543509273e-05, "loss": 0.0031, "step": 63670 }, { "epoch": 11.36, "learning_rate": 4.4324803851640516e-05, "loss": 0.0043, "step": 63680 }, { "epoch": 11.36, "learning_rate": 4.432391226818831e-05, "loss": 0.0035, "step": 63690 }, { "epoch": 11.36, "learning_rate": 4.432302068473609e-05, "loss": 0.0022, "step": 63700 }, { "epoch": 11.36, "learning_rate": 4.432212910128388e-05, "loss": 0.0033, "step": 63710 }, { "epoch": 11.36, "learning_rate": 4.432123751783167e-05, "loss": 0.0029, "step": 63720 }, { "epoch": 11.36, "learning_rate": 4.432034593437946e-05, "loss": 0.0029, "step": 63730 }, { "epoch": 11.37, "learning_rate": 4.431945435092725e-05, "loss": 0.003, "step": 63740 }, { "epoch": 11.37, "learning_rate": 4.4318562767475034e-05, "loss": 0.0037, "step": 63750 }, { "epoch": 11.37, "learning_rate": 4.4317671184022826e-05, "loss": 0.0031, "step": 63760 }, { "epoch": 11.37, "learning_rate": 4.431677960057062e-05, "loss": 0.0043, "step": 63770 }, { "epoch": 11.37, "learning_rate": 4.431588801711841e-05, "loss": 0.0031, "step": 63780 }, { "epoch": 11.37, "learning_rate": 4.431499643366619e-05, "loss": 0.0033, "step": 63790 }, { "epoch": 11.38, "learning_rate": 4.4314104850213984e-05, "loss": 0.0039, "step": 63800 }, { "epoch": 11.38, "learning_rate": 4.4313213266761775e-05, "loss": 0.0043, "step": 63810 }, { "epoch": 11.38, "learning_rate": 4.431232168330956e-05, "loss": 0.0021, "step": 63820 }, { "epoch": 11.38, "learning_rate": 4.431143009985735e-05, "loss": 0.0014, "step": 63830 }, { "epoch": 11.38, "learning_rate": 4.4310538516405135e-05, "loss": 0.0026, "step": 63840 }, { "epoch": 11.39, "learning_rate": 4.4309646932952926e-05, "loss": 0.0049, "step": 63850 }, { "epoch": 11.39, "learning_rate": 4.430875534950071e-05, "loss": 0.0055, "step": 63860 }, { "epoch": 11.39, "learning_rate": 4.43078637660485e-05, "loss": 0.0033, "step": 63870 }, { "epoch": 11.39, "learning_rate": 4.430697218259629e-05, "loss": 0.0029, "step": 63880 }, { "epoch": 11.39, "learning_rate": 4.4306080599144084e-05, "loss": 0.004, "step": 63890 }, { "epoch": 11.39, "learning_rate": 4.4305189015691875e-05, "loss": 0.0038, "step": 63900 }, { "epoch": 11.4, "learning_rate": 4.430429743223966e-05, "loss": 0.0032, "step": 63910 }, { "epoch": 11.4, "learning_rate": 4.430340584878745e-05, "loss": 0.0022, "step": 63920 }, { "epoch": 11.4, "learning_rate": 4.4302514265335235e-05, "loss": 0.0032, "step": 63930 }, { "epoch": 11.4, "learning_rate": 4.430162268188303e-05, "loss": 0.003, "step": 63940 }, { "epoch": 11.4, "learning_rate": 4.430073109843081e-05, "loss": 0.0015, "step": 63950 }, { "epoch": 11.41, "learning_rate": 4.42998395149786e-05, "loss": 0.004, "step": 63960 }, { "epoch": 11.41, "learning_rate": 4.4298947931526394e-05, "loss": 0.0027, "step": 63970 }, { "epoch": 11.41, "learning_rate": 4.429805634807418e-05, "loss": 0.002, "step": 63980 }, { "epoch": 11.41, "learning_rate": 4.4297164764621976e-05, "loss": 0.0021, "step": 63990 }, { "epoch": 11.41, "learning_rate": 4.429627318116976e-05, "loss": 0.0065, "step": 64000 }, { "epoch": 11.41, "learning_rate": 4.429538159771755e-05, "loss": 0.0031, "step": 64010 }, { "epoch": 11.42, "learning_rate": 4.4294490014265336e-05, "loss": 0.005, "step": 64020 }, { "epoch": 11.42, "learning_rate": 4.429359843081313e-05, "loss": 0.0041, "step": 64030 }, { "epoch": 11.42, "learning_rate": 4.429270684736092e-05, "loss": 0.0041, "step": 64040 }, { "epoch": 11.42, "learning_rate": 4.42918152639087e-05, "loss": 0.0044, "step": 64050 }, { "epoch": 11.42, "learning_rate": 4.4290923680456494e-05, "loss": 0.0039, "step": 64060 }, { "epoch": 11.42, "learning_rate": 4.429003209700428e-05, "loss": 0.0039, "step": 64070 }, { "epoch": 11.43, "learning_rate": 4.428914051355207e-05, "loss": 0.0042, "step": 64080 }, { "epoch": 11.43, "learning_rate": 4.4288248930099854e-05, "loss": 0.0037, "step": 64090 }, { "epoch": 11.43, "learning_rate": 4.428735734664765e-05, "loss": 0.0051, "step": 64100 }, { "epoch": 11.43, "learning_rate": 4.4286465763195437e-05, "loss": 0.0054, "step": 64110 }, { "epoch": 11.43, "learning_rate": 4.428557417974323e-05, "loss": 0.0038, "step": 64120 }, { "epoch": 11.44, "learning_rate": 4.428468259629102e-05, "loss": 0.0047, "step": 64130 }, { "epoch": 11.44, "learning_rate": 4.4283791012838803e-05, "loss": 0.0054, "step": 64140 }, { "epoch": 11.44, "learning_rate": 4.4282899429386595e-05, "loss": 0.0029, "step": 64150 }, { "epoch": 11.44, "learning_rate": 4.428200784593438e-05, "loss": 0.0045, "step": 64160 }, { "epoch": 11.44, "learning_rate": 4.428111626248217e-05, "loss": 0.004, "step": 64170 }, { "epoch": 11.44, "learning_rate": 4.4280224679029955e-05, "loss": 0.0048, "step": 64180 }, { "epoch": 11.45, "learning_rate": 4.4279333095577746e-05, "loss": 0.0014, "step": 64190 }, { "epoch": 11.45, "learning_rate": 4.427844151212554e-05, "loss": 0.0028, "step": 64200 }, { "epoch": 11.45, "learning_rate": 4.427754992867333e-05, "loss": 0.0024, "step": 64210 }, { "epoch": 11.45, "learning_rate": 4.427665834522112e-05, "loss": 0.0065, "step": 64220 }, { "epoch": 11.45, "learning_rate": 4.4275766761768904e-05, "loss": 0.0037, "step": 64230 }, { "epoch": 11.46, "learning_rate": 4.4274875178316695e-05, "loss": 0.0037, "step": 64240 }, { "epoch": 11.46, "learning_rate": 4.427398359486448e-05, "loss": 0.0028, "step": 64250 }, { "epoch": 11.46, "learning_rate": 4.427309201141227e-05, "loss": 0.0026, "step": 64260 }, { "epoch": 11.46, "learning_rate": 4.427220042796006e-05, "loss": 0.0046, "step": 64270 }, { "epoch": 11.46, "learning_rate": 4.4271308844507847e-05, "loss": 0.0035, "step": 64280 }, { "epoch": 11.46, "learning_rate": 4.427041726105564e-05, "loss": 0.0027, "step": 64290 }, { "epoch": 11.47, "learning_rate": 4.426952567760342e-05, "loss": 0.0037, "step": 64300 }, { "epoch": 11.47, "learning_rate": 4.426863409415121e-05, "loss": 0.0038, "step": 64310 }, { "epoch": 11.47, "learning_rate": 4.4267742510699005e-05, "loss": 0.0035, "step": 64320 }, { "epoch": 11.47, "learning_rate": 4.4266850927246796e-05, "loss": 0.007, "step": 64330 }, { "epoch": 11.47, "learning_rate": 4.426595934379458e-05, "loss": 0.0028, "step": 64340 }, { "epoch": 11.47, "learning_rate": 4.426506776034237e-05, "loss": 0.0032, "step": 64350 }, { "epoch": 11.48, "learning_rate": 4.426417617689016e-05, "loss": 0.0027, "step": 64360 }, { "epoch": 11.48, "learning_rate": 4.426328459343795e-05, "loss": 0.0033, "step": 64370 }, { "epoch": 11.48, "learning_rate": 4.426239300998574e-05, "loss": 0.0045, "step": 64380 }, { "epoch": 11.48, "learning_rate": 4.426150142653352e-05, "loss": 0.0027, "step": 64390 }, { "epoch": 11.48, "learning_rate": 4.4260609843081314e-05, "loss": 0.0027, "step": 64400 }, { "epoch": 11.49, "learning_rate": 4.42597182596291e-05, "loss": 0.0045, "step": 64410 }, { "epoch": 11.49, "learning_rate": 4.425882667617689e-05, "loss": 0.0031, "step": 64420 }, { "epoch": 11.49, "learning_rate": 4.425793509272468e-05, "loss": 0.005, "step": 64430 }, { "epoch": 11.49, "learning_rate": 4.425704350927247e-05, "loss": 0.0029, "step": 64440 }, { "epoch": 11.49, "learning_rate": 4.425615192582026e-05, "loss": 0.0026, "step": 64450 }, { "epoch": 11.49, "learning_rate": 4.425526034236805e-05, "loss": 0.0028, "step": 64460 }, { "epoch": 11.5, "learning_rate": 4.425436875891584e-05, "loss": 0.006, "step": 64470 }, { "epoch": 11.5, "learning_rate": 4.425347717546362e-05, "loss": 0.0018, "step": 64480 }, { "epoch": 11.5, "learning_rate": 4.4252585592011414e-05, "loss": 0.0057, "step": 64490 }, { "epoch": 11.5, "learning_rate": 4.4251694008559206e-05, "loss": 0.0032, "step": 64500 }, { "epoch": 11.5, "learning_rate": 4.425080242510699e-05, "loss": 0.0031, "step": 64510 }, { "epoch": 11.5, "learning_rate": 4.424991084165478e-05, "loss": 0.0026, "step": 64520 }, { "epoch": 11.51, "learning_rate": 4.4249019258202566e-05, "loss": 0.0045, "step": 64530 }, { "epoch": 11.51, "learning_rate": 4.4248127674750364e-05, "loss": 0.007, "step": 64540 }, { "epoch": 11.51, "learning_rate": 4.424723609129815e-05, "loss": 0.0035, "step": 64550 }, { "epoch": 11.51, "learning_rate": 4.424634450784594e-05, "loss": 0.0052, "step": 64560 }, { "epoch": 11.51, "learning_rate": 4.4245452924393724e-05, "loss": 0.0089, "step": 64570 }, { "epoch": 11.52, "learning_rate": 4.4244561340941515e-05, "loss": 0.0031, "step": 64580 }, { "epoch": 11.52, "learning_rate": 4.4243669757489306e-05, "loss": 0.0059, "step": 64590 }, { "epoch": 11.52, "learning_rate": 4.424277817403709e-05, "loss": 0.0046, "step": 64600 }, { "epoch": 11.52, "learning_rate": 4.424188659058488e-05, "loss": 0.002, "step": 64610 }, { "epoch": 11.52, "learning_rate": 4.4240995007132666e-05, "loss": 0.0054, "step": 64620 }, { "epoch": 11.52, "learning_rate": 4.424010342368046e-05, "loss": 0.0031, "step": 64630 }, { "epoch": 11.53, "learning_rate": 4.423921184022824e-05, "loss": 0.0041, "step": 64640 }, { "epoch": 11.53, "learning_rate": 4.423832025677604e-05, "loss": 0.0045, "step": 64650 }, { "epoch": 11.53, "learning_rate": 4.4237428673323824e-05, "loss": 0.0032, "step": 64660 }, { "epoch": 11.53, "learning_rate": 4.4236537089871616e-05, "loss": 0.0056, "step": 64670 }, { "epoch": 11.53, "learning_rate": 4.423564550641941e-05, "loss": 0.0059, "step": 64680 }, { "epoch": 11.54, "learning_rate": 4.423475392296719e-05, "loss": 0.0062, "step": 64690 }, { "epoch": 11.54, "learning_rate": 4.423386233951498e-05, "loss": 0.0029, "step": 64700 }, { "epoch": 11.54, "learning_rate": 4.423297075606277e-05, "loss": 0.0049, "step": 64710 }, { "epoch": 11.54, "learning_rate": 4.423207917261056e-05, "loss": 0.0023, "step": 64720 }, { "epoch": 11.54, "learning_rate": 4.423118758915835e-05, "loss": 0.0035, "step": 64730 }, { "epoch": 11.54, "learning_rate": 4.4230296005706134e-05, "loss": 0.0028, "step": 64740 }, { "epoch": 11.55, "learning_rate": 4.4229404422253925e-05, "loss": 0.0025, "step": 64750 }, { "epoch": 11.55, "learning_rate": 4.4228512838801716e-05, "loss": 0.0033, "step": 64760 }, { "epoch": 11.55, "learning_rate": 4.422762125534951e-05, "loss": 0.0027, "step": 64770 }, { "epoch": 11.55, "learning_rate": 4.422672967189729e-05, "loss": 0.0039, "step": 64780 }, { "epoch": 11.55, "learning_rate": 4.422583808844508e-05, "loss": 0.0056, "step": 64790 }, { "epoch": 11.55, "learning_rate": 4.422494650499287e-05, "loss": 0.0035, "step": 64800 }, { "epoch": 11.56, "learning_rate": 4.422405492154066e-05, "loss": 0.0036, "step": 64810 }, { "epoch": 11.56, "learning_rate": 4.422316333808845e-05, "loss": 0.0028, "step": 64820 }, { "epoch": 11.56, "learning_rate": 4.4222271754636234e-05, "loss": 0.0023, "step": 64830 }, { "epoch": 11.56, "learning_rate": 4.4221380171184026e-05, "loss": 0.007, "step": 64840 }, { "epoch": 11.56, "learning_rate": 4.422048858773181e-05, "loss": 0.0049, "step": 64850 }, { "epoch": 11.57, "learning_rate": 4.42195970042796e-05, "loss": 0.004, "step": 64860 }, { "epoch": 11.57, "learning_rate": 4.421870542082739e-05, "loss": 0.0046, "step": 64870 }, { "epoch": 11.57, "learning_rate": 4.4217813837375184e-05, "loss": 0.0042, "step": 64880 }, { "epoch": 11.57, "learning_rate": 4.421692225392297e-05, "loss": 0.0024, "step": 64890 }, { "epoch": 11.57, "learning_rate": 4.421603067047076e-05, "loss": 0.004, "step": 64900 }, { "epoch": 11.57, "learning_rate": 4.421513908701855e-05, "loss": 0.0036, "step": 64910 }, { "epoch": 11.58, "learning_rate": 4.4214247503566335e-05, "loss": 0.0034, "step": 64920 }, { "epoch": 11.58, "learning_rate": 4.4213355920114126e-05, "loss": 0.0033, "step": 64930 }, { "epoch": 11.58, "learning_rate": 4.421246433666191e-05, "loss": 0.0036, "step": 64940 }, { "epoch": 11.58, "learning_rate": 4.42115727532097e-05, "loss": 0.0022, "step": 64950 }, { "epoch": 11.58, "learning_rate": 4.421068116975749e-05, "loss": 0.0034, "step": 64960 }, { "epoch": 11.59, "learning_rate": 4.420978958630528e-05, "loss": 0.0046, "step": 64970 }, { "epoch": 11.59, "learning_rate": 4.420889800285307e-05, "loss": 0.0027, "step": 64980 }, { "epoch": 11.59, "learning_rate": 4.420800641940086e-05, "loss": 0.0046, "step": 64990 }, { "epoch": 11.59, "learning_rate": 4.420711483594865e-05, "loss": 0.0042, "step": 65000 }, { "epoch": 11.59, "learning_rate": 4.4206223252496435e-05, "loss": 0.0028, "step": 65010 }, { "epoch": 11.59, "learning_rate": 4.420533166904423e-05, "loss": 0.0062, "step": 65020 }, { "epoch": 11.6, "learning_rate": 4.420444008559201e-05, "loss": 0.0035, "step": 65030 }, { "epoch": 11.6, "learning_rate": 4.42035485021398e-05, "loss": 0.0031, "step": 65040 }, { "epoch": 11.6, "learning_rate": 4.4202656918687594e-05, "loss": 0.0034, "step": 65050 }, { "epoch": 11.6, "learning_rate": 4.420176533523538e-05, "loss": 0.0034, "step": 65060 }, { "epoch": 11.6, "learning_rate": 4.420096291012839e-05, "loss": 0.0032, "step": 65070 }, { "epoch": 11.6, "learning_rate": 4.420007132667618e-05, "loss": 0.0045, "step": 65080 }, { "epoch": 11.61, "learning_rate": 4.419917974322397e-05, "loss": 0.0025, "step": 65090 }, { "epoch": 11.61, "learning_rate": 4.419828815977175e-05, "loss": 0.0031, "step": 65100 }, { "epoch": 11.61, "learning_rate": 4.4197396576319544e-05, "loss": 0.0036, "step": 65110 }, { "epoch": 11.61, "learning_rate": 4.4196504992867335e-05, "loss": 0.0021, "step": 65120 }, { "epoch": 11.61, "learning_rate": 4.419561340941512e-05, "loss": 0.0048, "step": 65130 }, { "epoch": 11.62, "learning_rate": 4.419472182596292e-05, "loss": 0.0031, "step": 65140 }, { "epoch": 11.62, "learning_rate": 4.41938302425107e-05, "loss": 0.004, "step": 65150 }, { "epoch": 11.62, "learning_rate": 4.419293865905849e-05, "loss": 0.0074, "step": 65160 }, { "epoch": 11.62, "learning_rate": 4.419204707560628e-05, "loss": 0.0053, "step": 65170 }, { "epoch": 11.62, "learning_rate": 4.419115549215407e-05, "loss": 0.0033, "step": 65180 }, { "epoch": 11.62, "learning_rate": 4.419026390870185e-05, "loss": 0.0038, "step": 65190 }, { "epoch": 11.63, "learning_rate": 4.4189372325249645e-05, "loss": 0.0038, "step": 65200 }, { "epoch": 11.63, "learning_rate": 4.4188480741797436e-05, "loss": 0.0046, "step": 65210 }, { "epoch": 11.63, "learning_rate": 4.418758915834522e-05, "loss": 0.0032, "step": 65220 }, { "epoch": 11.63, "learning_rate": 4.418669757489301e-05, "loss": 0.0036, "step": 65230 }, { "epoch": 11.63, "learning_rate": 4.4185805991440796e-05, "loss": 0.0045, "step": 65240 }, { "epoch": 11.64, "learning_rate": 4.4184914407988594e-05, "loss": 0.004, "step": 65250 }, { "epoch": 11.64, "learning_rate": 4.418402282453638e-05, "loss": 0.0059, "step": 65260 }, { "epoch": 11.64, "learning_rate": 4.418313124108417e-05, "loss": 0.0039, "step": 65270 }, { "epoch": 11.64, "learning_rate": 4.418223965763196e-05, "loss": 0.0028, "step": 65280 }, { "epoch": 11.64, "learning_rate": 4.4181348074179745e-05, "loss": 0.0049, "step": 65290 }, { "epoch": 11.64, "learning_rate": 4.4180456490727536e-05, "loss": 0.0034, "step": 65300 }, { "epoch": 11.65, "learning_rate": 4.417956490727532e-05, "loss": 0.0036, "step": 65310 }, { "epoch": 11.65, "learning_rate": 4.417867332382311e-05, "loss": 0.004, "step": 65320 }, { "epoch": 11.65, "learning_rate": 4.4177781740370896e-05, "loss": 0.0035, "step": 65330 }, { "epoch": 11.65, "learning_rate": 4.417689015691869e-05, "loss": 0.0046, "step": 65340 }, { "epoch": 11.65, "learning_rate": 4.417599857346648e-05, "loss": 0.0036, "step": 65350 }, { "epoch": 11.65, "learning_rate": 4.417510699001427e-05, "loss": 0.0038, "step": 65360 }, { "epoch": 11.66, "learning_rate": 4.417421540656206e-05, "loss": 0.0031, "step": 65370 }, { "epoch": 11.66, "learning_rate": 4.4173323823109846e-05, "loss": 0.003, "step": 65380 }, { "epoch": 11.66, "learning_rate": 4.417243223965764e-05, "loss": 0.005, "step": 65390 }, { "epoch": 11.66, "learning_rate": 4.417154065620542e-05, "loss": 0.003, "step": 65400 }, { "epoch": 11.66, "learning_rate": 4.417064907275321e-05, "loss": 0.0061, "step": 65410 }, { "epoch": 11.67, "learning_rate": 4.4169757489301e-05, "loss": 0.004, "step": 65420 }, { "epoch": 11.67, "learning_rate": 4.416886590584879e-05, "loss": 0.004, "step": 65430 }, { "epoch": 11.67, "learning_rate": 4.416797432239658e-05, "loss": 0.0044, "step": 65440 }, { "epoch": 11.67, "learning_rate": 4.4167082738944364e-05, "loss": 0.0032, "step": 65450 }, { "epoch": 11.67, "learning_rate": 4.4166191155492155e-05, "loss": 0.0022, "step": 65460 }, { "epoch": 11.67, "learning_rate": 4.4165299572039946e-05, "loss": 0.0032, "step": 65470 }, { "epoch": 11.68, "learning_rate": 4.416440798858774e-05, "loss": 0.0062, "step": 65480 }, { "epoch": 11.68, "learning_rate": 4.416351640513552e-05, "loss": 0.0065, "step": 65490 }, { "epoch": 11.68, "learning_rate": 4.416262482168331e-05, "loss": 0.003, "step": 65500 }, { "epoch": 11.68, "learning_rate": 4.4161733238231104e-05, "loss": 0.0036, "step": 65510 }, { "epoch": 11.68, "learning_rate": 4.416084165477889e-05, "loss": 0.0033, "step": 65520 }, { "epoch": 11.69, "learning_rate": 4.415995007132668e-05, "loss": 0.006, "step": 65530 }, { "epoch": 11.69, "learning_rate": 4.4159058487874464e-05, "loss": 0.0034, "step": 65540 }, { "epoch": 11.69, "learning_rate": 4.4158166904422256e-05, "loss": 0.0056, "step": 65550 }, { "epoch": 11.69, "learning_rate": 4.415727532097004e-05, "loss": 0.0041, "step": 65560 }, { "epoch": 11.69, "learning_rate": 4.415638373751783e-05, "loss": 0.0051, "step": 65570 }, { "epoch": 11.69, "learning_rate": 4.415549215406562e-05, "loss": 0.0027, "step": 65580 }, { "epoch": 11.7, "learning_rate": 4.4154600570613414e-05, "loss": 0.0058, "step": 65590 }, { "epoch": 11.7, "learning_rate": 4.4153708987161205e-05, "loss": 0.0036, "step": 65600 }, { "epoch": 11.7, "learning_rate": 4.415281740370899e-05, "loss": 0.0049, "step": 65610 }, { "epoch": 11.7, "learning_rate": 4.415192582025678e-05, "loss": 0.0035, "step": 65620 }, { "epoch": 11.7, "learning_rate": 4.4151034236804565e-05, "loss": 0.0027, "step": 65630 }, { "epoch": 11.7, "learning_rate": 4.4150142653352356e-05, "loss": 0.0024, "step": 65640 }, { "epoch": 11.71, "learning_rate": 4.414925106990014e-05, "loss": 0.0043, "step": 65650 }, { "epoch": 11.71, "learning_rate": 4.414835948644793e-05, "loss": 0.0028, "step": 65660 }, { "epoch": 11.71, "learning_rate": 4.414746790299572e-05, "loss": 0.0045, "step": 65670 }, { "epoch": 11.71, "learning_rate": 4.414657631954351e-05, "loss": 0.0057, "step": 65680 }, { "epoch": 11.71, "learning_rate": 4.4145684736091305e-05, "loss": 0.0041, "step": 65690 }, { "epoch": 11.72, "learning_rate": 4.414479315263909e-05, "loss": 0.0037, "step": 65700 }, { "epoch": 11.72, "learning_rate": 4.414390156918688e-05, "loss": 0.0042, "step": 65710 }, { "epoch": 11.72, "learning_rate": 4.4143009985734666e-05, "loss": 0.0036, "step": 65720 }, { "epoch": 11.72, "learning_rate": 4.414211840228246e-05, "loss": 0.006, "step": 65730 }, { "epoch": 11.72, "learning_rate": 4.414122681883025e-05, "loss": 0.0048, "step": 65740 }, { "epoch": 11.72, "learning_rate": 4.414033523537803e-05, "loss": 0.0052, "step": 65750 }, { "epoch": 11.73, "learning_rate": 4.4139443651925824e-05, "loss": 0.0027, "step": 65760 }, { "epoch": 11.73, "learning_rate": 4.413855206847361e-05, "loss": 0.0029, "step": 65770 }, { "epoch": 11.73, "learning_rate": 4.41376604850214e-05, "loss": 0.0053, "step": 65780 }, { "epoch": 11.73, "learning_rate": 4.4136768901569184e-05, "loss": 0.0017, "step": 65790 }, { "epoch": 11.73, "learning_rate": 4.413587731811698e-05, "loss": 0.003, "step": 65800 }, { "epoch": 11.74, "learning_rate": 4.4134985734664766e-05, "loss": 0.0046, "step": 65810 }, { "epoch": 11.74, "learning_rate": 4.413409415121256e-05, "loss": 0.003, "step": 65820 }, { "epoch": 11.74, "learning_rate": 4.413320256776035e-05, "loss": 0.003, "step": 65830 }, { "epoch": 11.74, "learning_rate": 4.413231098430813e-05, "loss": 0.0072, "step": 65840 }, { "epoch": 11.74, "learning_rate": 4.4131419400855924e-05, "loss": 0.0064, "step": 65850 }, { "epoch": 11.74, "learning_rate": 4.413052781740371e-05, "loss": 0.0032, "step": 65860 }, { "epoch": 11.75, "learning_rate": 4.41296362339515e-05, "loss": 0.0057, "step": 65870 }, { "epoch": 11.75, "learning_rate": 4.4128744650499284e-05, "loss": 0.0028, "step": 65880 }, { "epoch": 11.75, "learning_rate": 4.4127853067047075e-05, "loss": 0.0033, "step": 65890 }, { "epoch": 11.75, "learning_rate": 4.412696148359487e-05, "loss": 0.0045, "step": 65900 }, { "epoch": 11.75, "learning_rate": 4.412606990014266e-05, "loss": 0.0033, "step": 65910 }, { "epoch": 11.75, "learning_rate": 4.412517831669045e-05, "loss": 0.0027, "step": 65920 }, { "epoch": 11.76, "learning_rate": 4.4124286733238234e-05, "loss": 0.0063, "step": 65930 }, { "epoch": 11.76, "learning_rate": 4.4123395149786025e-05, "loss": 0.0036, "step": 65940 }, { "epoch": 11.76, "learning_rate": 4.412250356633381e-05, "loss": 0.0045, "step": 65950 }, { "epoch": 11.76, "learning_rate": 4.41216119828816e-05, "loss": 0.0033, "step": 65960 }, { "epoch": 11.76, "learning_rate": 4.412072039942939e-05, "loss": 0.0018, "step": 65970 }, { "epoch": 11.77, "learning_rate": 4.4119828815977176e-05, "loss": 0.0035, "step": 65980 }, { "epoch": 11.77, "learning_rate": 4.411893723252497e-05, "loss": 0.006, "step": 65990 }, { "epoch": 11.77, "learning_rate": 4.411804564907275e-05, "loss": 0.005, "step": 66000 }, { "epoch": 11.77, "learning_rate": 4.411715406562054e-05, "loss": 0.0035, "step": 66010 }, { "epoch": 11.77, "learning_rate": 4.4116262482168334e-05, "loss": 0.0049, "step": 66020 }, { "epoch": 11.77, "learning_rate": 4.4115370898716125e-05, "loss": 0.0043, "step": 66030 }, { "epoch": 11.78, "learning_rate": 4.411447931526391e-05, "loss": 0.0041, "step": 66040 }, { "epoch": 11.78, "learning_rate": 4.41135877318117e-05, "loss": 0.004, "step": 66050 }, { "epoch": 11.78, "learning_rate": 4.411269614835949e-05, "loss": 0.0023, "step": 66060 }, { "epoch": 11.78, "learning_rate": 4.4111804564907277e-05, "loss": 0.004, "step": 66070 }, { "epoch": 11.78, "learning_rate": 4.411091298145507e-05, "loss": 0.0049, "step": 66080 }, { "epoch": 11.78, "learning_rate": 4.411002139800285e-05, "loss": 0.0046, "step": 66090 }, { "epoch": 11.79, "learning_rate": 4.4109129814550643e-05, "loss": 0.0041, "step": 66100 }, { "epoch": 11.79, "learning_rate": 4.410823823109843e-05, "loss": 0.0027, "step": 66110 }, { "epoch": 11.79, "learning_rate": 4.410734664764622e-05, "loss": 0.0042, "step": 66120 }, { "epoch": 11.79, "learning_rate": 4.410645506419401e-05, "loss": 0.0028, "step": 66130 }, { "epoch": 11.79, "learning_rate": 4.41055634807418e-05, "loss": 0.0042, "step": 66140 }, { "epoch": 11.8, "learning_rate": 4.410467189728959e-05, "loss": 0.0069, "step": 66150 }, { "epoch": 11.8, "learning_rate": 4.410378031383738e-05, "loss": 0.0029, "step": 66160 }, { "epoch": 11.8, "learning_rate": 4.410288873038517e-05, "loss": 0.0041, "step": 66170 }, { "epoch": 11.8, "learning_rate": 4.410199714693295e-05, "loss": 0.0032, "step": 66180 }, { "epoch": 11.8, "learning_rate": 4.4101105563480744e-05, "loss": 0.0026, "step": 66190 }, { "epoch": 11.8, "learning_rate": 4.4100213980028535e-05, "loss": 0.0056, "step": 66200 }, { "epoch": 11.81, "learning_rate": 4.409932239657632e-05, "loss": 0.0021, "step": 66210 }, { "epoch": 11.81, "learning_rate": 4.409843081312411e-05, "loss": 0.0029, "step": 66220 }, { "epoch": 11.81, "learning_rate": 4.4097539229671895e-05, "loss": 0.0058, "step": 66230 }, { "epoch": 11.81, "learning_rate": 4.409664764621969e-05, "loss": 0.0021, "step": 66240 }, { "epoch": 11.81, "learning_rate": 4.409575606276748e-05, "loss": 0.0035, "step": 66250 }, { "epoch": 11.82, "learning_rate": 4.409486447931527e-05, "loss": 0.006, "step": 66260 }, { "epoch": 11.82, "learning_rate": 4.409397289586305e-05, "loss": 0.0033, "step": 66270 }, { "epoch": 11.82, "learning_rate": 4.4093081312410845e-05, "loss": 0.0029, "step": 66280 }, { "epoch": 11.82, "learning_rate": 4.4092189728958636e-05, "loss": 0.0041, "step": 66290 }, { "epoch": 11.82, "learning_rate": 4.409129814550642e-05, "loss": 0.0018, "step": 66300 }, { "epoch": 11.82, "learning_rate": 4.409040656205421e-05, "loss": 0.004, "step": 66310 }, { "epoch": 11.83, "learning_rate": 4.4089514978601996e-05, "loss": 0.0031, "step": 66320 }, { "epoch": 11.83, "learning_rate": 4.408862339514979e-05, "loss": 0.0034, "step": 66330 }, { "epoch": 11.83, "learning_rate": 4.408773181169757e-05, "loss": 0.0039, "step": 66340 }, { "epoch": 11.83, "learning_rate": 4.408684022824537e-05, "loss": 0.0017, "step": 66350 }, { "epoch": 11.83, "learning_rate": 4.4085948644793154e-05, "loss": 0.004, "step": 66360 }, { "epoch": 11.83, "learning_rate": 4.4085057061340945e-05, "loss": 0.0048, "step": 66370 }, { "epoch": 11.84, "learning_rate": 4.4084165477888736e-05, "loss": 0.0064, "step": 66380 }, { "epoch": 11.84, "learning_rate": 4.408327389443652e-05, "loss": 0.0028, "step": 66390 }, { "epoch": 11.84, "learning_rate": 4.408238231098431e-05, "loss": 0.0049, "step": 66400 }, { "epoch": 11.84, "learning_rate": 4.4081490727532096e-05, "loss": 0.0039, "step": 66410 }, { "epoch": 11.84, "learning_rate": 4.408059914407989e-05, "loss": 0.0045, "step": 66420 }, { "epoch": 11.85, "learning_rate": 4.407970756062768e-05, "loss": 0.0044, "step": 66430 }, { "epoch": 11.85, "learning_rate": 4.407881597717546e-05, "loss": 0.0046, "step": 66440 }, { "epoch": 11.85, "learning_rate": 4.4077924393723254e-05, "loss": 0.0049, "step": 66450 }, { "epoch": 11.85, "learning_rate": 4.4077032810271046e-05, "loss": 0.0045, "step": 66460 }, { "epoch": 11.85, "learning_rate": 4.407614122681884e-05, "loss": 0.003, "step": 66470 }, { "epoch": 11.85, "learning_rate": 4.407524964336662e-05, "loss": 0.0044, "step": 66480 }, { "epoch": 11.86, "learning_rate": 4.407435805991441e-05, "loss": 0.0033, "step": 66490 }, { "epoch": 11.86, "learning_rate": 4.40734664764622e-05, "loss": 0.0035, "step": 66500 }, { "epoch": 11.86, "learning_rate": 4.407257489300999e-05, "loss": 0.0044, "step": 66510 }, { "epoch": 11.86, "learning_rate": 4.407168330955778e-05, "loss": 0.0052, "step": 66520 }, { "epoch": 11.86, "learning_rate": 4.4070791726105564e-05, "loss": 0.0041, "step": 66530 }, { "epoch": 11.87, "learning_rate": 4.4069900142653355e-05, "loss": 0.0061, "step": 66540 }, { "epoch": 11.87, "learning_rate": 4.406900855920114e-05, "loss": 0.0044, "step": 66550 }, { "epoch": 11.87, "learning_rate": 4.406811697574893e-05, "loss": 0.006, "step": 66560 }, { "epoch": 11.87, "learning_rate": 4.406722539229672e-05, "loss": 0.0027, "step": 66570 }, { "epoch": 11.87, "learning_rate": 4.406642296718973e-05, "loss": 0.0032, "step": 66580 }, { "epoch": 11.87, "learning_rate": 4.406553138373752e-05, "loss": 0.0049, "step": 66590 }, { "epoch": 11.88, "learning_rate": 4.4064639800285306e-05, "loss": 0.0047, "step": 66600 }, { "epoch": 11.88, "learning_rate": 4.40637482168331e-05, "loss": 0.0031, "step": 66610 }, { "epoch": 11.88, "learning_rate": 4.406285663338089e-05, "loss": 0.0061, "step": 66620 }, { "epoch": 11.88, "learning_rate": 4.406196504992868e-05, "loss": 0.0023, "step": 66630 }, { "epoch": 11.88, "learning_rate": 4.4061073466476464e-05, "loss": 0.0051, "step": 66640 }, { "epoch": 11.88, "learning_rate": 4.4060181883024255e-05, "loss": 0.0025, "step": 66650 }, { "epoch": 11.89, "learning_rate": 4.405929029957204e-05, "loss": 0.0063, "step": 66660 }, { "epoch": 11.89, "learning_rate": 4.405839871611983e-05, "loss": 0.0049, "step": 66670 }, { "epoch": 11.89, "learning_rate": 4.405750713266762e-05, "loss": 0.0048, "step": 66680 }, { "epoch": 11.89, "learning_rate": 4.4056615549215406e-05, "loss": 0.0043, "step": 66690 }, { "epoch": 11.89, "learning_rate": 4.40557239657632e-05, "loss": 0.0028, "step": 66700 }, { "epoch": 11.9, "learning_rate": 4.405483238231098e-05, "loss": 0.004, "step": 66710 }, { "epoch": 11.9, "learning_rate": 4.405394079885877e-05, "loss": 0.0039, "step": 66720 }, { "epoch": 11.9, "learning_rate": 4.4053049215406564e-05, "loss": 0.0029, "step": 66730 }, { "epoch": 11.9, "learning_rate": 4.4052157631954355e-05, "loss": 0.0052, "step": 66740 }, { "epoch": 11.9, "learning_rate": 4.4051266048502147e-05, "loss": 0.0025, "step": 66750 }, { "epoch": 11.9, "learning_rate": 4.405037446504993e-05, "loss": 0.0039, "step": 66760 }, { "epoch": 11.91, "learning_rate": 4.404948288159772e-05, "loss": 0.0012, "step": 66770 }, { "epoch": 11.91, "learning_rate": 4.404859129814551e-05, "loss": 0.0032, "step": 66780 }, { "epoch": 11.91, "learning_rate": 4.40476997146933e-05, "loss": 0.0048, "step": 66790 }, { "epoch": 11.91, "learning_rate": 4.404680813124108e-05, "loss": 0.0029, "step": 66800 }, { "epoch": 11.91, "learning_rate": 4.4045916547788874e-05, "loss": 0.0023, "step": 66810 }, { "epoch": 11.92, "learning_rate": 4.4045024964336665e-05, "loss": 0.003, "step": 66820 }, { "epoch": 11.92, "learning_rate": 4.404413338088445e-05, "loss": 0.0062, "step": 66830 }, { "epoch": 11.92, "learning_rate": 4.404324179743225e-05, "loss": 0.0036, "step": 66840 }, { "epoch": 11.92, "learning_rate": 4.404235021398003e-05, "loss": 0.0089, "step": 66850 }, { "epoch": 11.92, "learning_rate": 4.404145863052782e-05, "loss": 0.0032, "step": 66860 }, { "epoch": 11.92, "learning_rate": 4.404056704707561e-05, "loss": 0.0034, "step": 66870 }, { "epoch": 11.93, "learning_rate": 4.40396754636234e-05, "loss": 0.0035, "step": 66880 }, { "epoch": 11.93, "learning_rate": 4.403878388017118e-05, "loss": 0.0023, "step": 66890 }, { "epoch": 11.93, "learning_rate": 4.4037892296718974e-05, "loss": 0.0022, "step": 66900 }, { "epoch": 11.93, "learning_rate": 4.4037000713266765e-05, "loss": 0.003, "step": 66910 }, { "epoch": 11.93, "learning_rate": 4.403610912981455e-05, "loss": 0.0059, "step": 66920 }, { "epoch": 11.93, "learning_rate": 4.403521754636234e-05, "loss": 0.0052, "step": 66930 }, { "epoch": 11.94, "learning_rate": 4.4034325962910125e-05, "loss": 0.0041, "step": 66940 }, { "epoch": 11.94, "learning_rate": 4.403343437945792e-05, "loss": 0.0053, "step": 66950 }, { "epoch": 11.94, "learning_rate": 4.403254279600571e-05, "loss": 0.0035, "step": 66960 }, { "epoch": 11.94, "learning_rate": 4.40316512125535e-05, "loss": 0.005, "step": 66970 }, { "epoch": 11.94, "learning_rate": 4.403075962910129e-05, "loss": 0.0027, "step": 66980 }, { "epoch": 11.95, "learning_rate": 4.4029868045649075e-05, "loss": 0.0047, "step": 66990 }, { "epoch": 11.95, "learning_rate": 4.4028976462196866e-05, "loss": 0.0034, "step": 67000 }, { "epoch": 11.95, "learning_rate": 4.402808487874465e-05, "loss": 0.0025, "step": 67010 }, { "epoch": 11.95, "learning_rate": 4.402719329529244e-05, "loss": 0.0037, "step": 67020 }, { "epoch": 11.95, "learning_rate": 4.4026301711840226e-05, "loss": 0.0033, "step": 67030 }, { "epoch": 11.95, "learning_rate": 4.402541012838802e-05, "loss": 0.004, "step": 67040 }, { "epoch": 11.96, "learning_rate": 4.402451854493581e-05, "loss": 0.0031, "step": 67050 }, { "epoch": 11.96, "learning_rate": 4.40236269614836e-05, "loss": 0.0043, "step": 67060 }, { "epoch": 11.96, "learning_rate": 4.402273537803139e-05, "loss": 0.0044, "step": 67070 }, { "epoch": 11.96, "learning_rate": 4.4021843794579175e-05, "loss": 0.0041, "step": 67080 }, { "epoch": 11.96, "learning_rate": 4.4020952211126966e-05, "loss": 0.0037, "step": 67090 }, { "epoch": 11.97, "learning_rate": 4.402006062767475e-05, "loss": 0.0047, "step": 67100 }, { "epoch": 11.97, "learning_rate": 4.401916904422254e-05, "loss": 0.0019, "step": 67110 }, { "epoch": 11.97, "learning_rate": 4.4018277460770326e-05, "loss": 0.0035, "step": 67120 }, { "epoch": 11.97, "learning_rate": 4.401738587731812e-05, "loss": 0.0026, "step": 67130 }, { "epoch": 11.97, "learning_rate": 4.401649429386591e-05, "loss": 0.0026, "step": 67140 }, { "epoch": 11.97, "learning_rate": 4.401560271041369e-05, "loss": 0.0038, "step": 67150 }, { "epoch": 11.98, "learning_rate": 4.4014711126961485e-05, "loss": 0.0069, "step": 67160 }, { "epoch": 11.98, "learning_rate": 4.4013819543509276e-05, "loss": 0.0051, "step": 67170 }, { "epoch": 11.98, "learning_rate": 4.401292796005707e-05, "loss": 0.0058, "step": 67180 }, { "epoch": 11.98, "learning_rate": 4.401203637660485e-05, "loss": 0.0025, "step": 67190 }, { "epoch": 11.98, "learning_rate": 4.401114479315264e-05, "loss": 0.0051, "step": 67200 }, { "epoch": 11.98, "learning_rate": 4.4010253209700434e-05, "loss": 0.0029, "step": 67210 }, { "epoch": 11.99, "learning_rate": 4.400936162624822e-05, "loss": 0.0035, "step": 67220 }, { "epoch": 11.99, "learning_rate": 4.400847004279601e-05, "loss": 0.0027, "step": 67230 }, { "epoch": 11.99, "learning_rate": 4.4007578459343794e-05, "loss": 0.0032, "step": 67240 }, { "epoch": 11.99, "learning_rate": 4.4006686875891585e-05, "loss": 0.0017, "step": 67250 }, { "epoch": 11.99, "learning_rate": 4.400579529243937e-05, "loss": 0.006, "step": 67260 }, { "epoch": 12.0, "learning_rate": 4.400490370898716e-05, "loss": 0.0044, "step": 67270 }, { "epoch": 12.0, "learning_rate": 4.400401212553495e-05, "loss": 0.0032, "step": 67280 }, { "epoch": 12.0, "learning_rate": 4.400312054208274e-05, "loss": 0.0031, "step": 67290 }, { "epoch": 12.0, "eval_loss": 0.01598210446536541, "eval_runtime": 195.6628, "eval_samples_per_second": 23.709, "eval_steps_per_second": 2.964, "step": 67296 }, { "epoch": 12.0, "learning_rate": 4.4002228958630534e-05, "loss": 0.0026, "step": 67300 }, { "epoch": 12.0, "learning_rate": 4.400133737517832e-05, "loss": 0.0024, "step": 67310 }, { "epoch": 12.0, "learning_rate": 4.400044579172611e-05, "loss": 0.0027, "step": 67320 }, { "epoch": 12.01, "learning_rate": 4.3999554208273894e-05, "loss": 0.0022, "step": 67330 }, { "epoch": 12.01, "learning_rate": 4.3998662624821686e-05, "loss": 0.004, "step": 67340 }, { "epoch": 12.01, "learning_rate": 4.399777104136947e-05, "loss": 0.0037, "step": 67350 }, { "epoch": 12.01, "learning_rate": 4.399687945791726e-05, "loss": 0.0026, "step": 67360 }, { "epoch": 12.01, "learning_rate": 4.399598787446505e-05, "loss": 0.0036, "step": 67370 }, { "epoch": 12.01, "learning_rate": 4.399509629101284e-05, "loss": 0.0032, "step": 67380 }, { "epoch": 12.02, "learning_rate": 4.3994204707560635e-05, "loss": 0.0032, "step": 67390 }, { "epoch": 12.02, "learning_rate": 4.399331312410842e-05, "loss": 0.0039, "step": 67400 }, { "epoch": 12.02, "learning_rate": 4.399242154065621e-05, "loss": 0.0019, "step": 67410 }, { "epoch": 12.02, "learning_rate": 4.3991529957203995e-05, "loss": 0.0034, "step": 67420 }, { "epoch": 12.02, "learning_rate": 4.3990638373751786e-05, "loss": 0.0031, "step": 67430 }, { "epoch": 12.03, "learning_rate": 4.398974679029958e-05, "loss": 0.0062, "step": 67440 }, { "epoch": 12.03, "learning_rate": 4.398885520684736e-05, "loss": 0.0045, "step": 67450 }, { "epoch": 12.03, "learning_rate": 4.398796362339515e-05, "loss": 0.0048, "step": 67460 }, { "epoch": 12.03, "learning_rate": 4.398707203994294e-05, "loss": 0.0037, "step": 67470 }, { "epoch": 12.03, "learning_rate": 4.398618045649073e-05, "loss": 0.0035, "step": 67480 }, { "epoch": 12.03, "learning_rate": 4.398528887303851e-05, "loss": 0.0044, "step": 67490 }, { "epoch": 12.04, "learning_rate": 4.398448644793153e-05, "loss": 0.0055, "step": 67500 }, { "epoch": 12.04, "learning_rate": 4.398359486447932e-05, "loss": 0.0035, "step": 67510 }, { "epoch": 12.04, "learning_rate": 4.3982703281027104e-05, "loss": 0.0037, "step": 67520 }, { "epoch": 12.04, "learning_rate": 4.3981811697574895e-05, "loss": 0.0031, "step": 67530 }, { "epoch": 12.04, "learning_rate": 4.3980920114122686e-05, "loss": 0.0027, "step": 67540 }, { "epoch": 12.05, "learning_rate": 4.398002853067048e-05, "loss": 0.0036, "step": 67550 }, { "epoch": 12.05, "learning_rate": 4.397913694721826e-05, "loss": 0.0033, "step": 67560 }, { "epoch": 12.05, "learning_rate": 4.397824536376605e-05, "loss": 0.0015, "step": 67570 }, { "epoch": 12.05, "learning_rate": 4.397735378031384e-05, "loss": 0.0024, "step": 67580 }, { "epoch": 12.05, "learning_rate": 4.397646219686163e-05, "loss": 0.0051, "step": 67590 }, { "epoch": 12.05, "learning_rate": 4.397557061340942e-05, "loss": 0.0028, "step": 67600 }, { "epoch": 12.06, "learning_rate": 4.3974679029957204e-05, "loss": 0.0018, "step": 67610 }, { "epoch": 12.06, "learning_rate": 4.3973787446504995e-05, "loss": 0.0035, "step": 67620 }, { "epoch": 12.06, "learning_rate": 4.397289586305278e-05, "loss": 0.0042, "step": 67630 }, { "epoch": 12.06, "learning_rate": 4.397200427960057e-05, "loss": 0.0059, "step": 67640 }, { "epoch": 12.06, "learning_rate": 4.3971112696148355e-05, "loss": 0.003, "step": 67650 }, { "epoch": 12.06, "learning_rate": 4.3970221112696153e-05, "loss": 0.0021, "step": 67660 }, { "epoch": 12.07, "learning_rate": 4.396932952924394e-05, "loss": 0.0037, "step": 67670 }, { "epoch": 12.07, "learning_rate": 4.396843794579173e-05, "loss": 0.0036, "step": 67680 }, { "epoch": 12.07, "learning_rate": 4.396754636233952e-05, "loss": 0.0047, "step": 67690 }, { "epoch": 12.07, "learning_rate": 4.3966654778887305e-05, "loss": 0.0051, "step": 67700 }, { "epoch": 12.07, "learning_rate": 4.3965763195435096e-05, "loss": 0.0027, "step": 67710 }, { "epoch": 12.08, "learning_rate": 4.396487161198288e-05, "loss": 0.0034, "step": 67720 }, { "epoch": 12.08, "learning_rate": 4.396398002853067e-05, "loss": 0.0032, "step": 67730 }, { "epoch": 12.08, "learning_rate": 4.396308844507846e-05, "loss": 0.0028, "step": 67740 }, { "epoch": 12.08, "learning_rate": 4.396219686162625e-05, "loss": 0.0036, "step": 67750 }, { "epoch": 12.08, "learning_rate": 4.396130527817404e-05, "loss": 0.0041, "step": 67760 }, { "epoch": 12.08, "learning_rate": 4.396041369472183e-05, "loss": 0.0025, "step": 67770 }, { "epoch": 12.09, "learning_rate": 4.395952211126962e-05, "loss": 0.0046, "step": 67780 }, { "epoch": 12.09, "learning_rate": 4.3958630527817405e-05, "loss": 0.0026, "step": 67790 }, { "epoch": 12.09, "learning_rate": 4.3957738944365197e-05, "loss": 0.003, "step": 67800 }, { "epoch": 12.09, "learning_rate": 4.395684736091298e-05, "loss": 0.0062, "step": 67810 }, { "epoch": 12.09, "learning_rate": 4.395595577746077e-05, "loss": 0.0038, "step": 67820 }, { "epoch": 12.1, "learning_rate": 4.395506419400856e-05, "loss": 0.0033, "step": 67830 }, { "epoch": 12.1, "learning_rate": 4.395417261055635e-05, "loss": 0.0027, "step": 67840 }, { "epoch": 12.1, "learning_rate": 4.395328102710414e-05, "loss": 0.0035, "step": 67850 }, { "epoch": 12.1, "learning_rate": 4.3952389443651923e-05, "loss": 0.0028, "step": 67860 }, { "epoch": 12.1, "learning_rate": 4.3951497860199715e-05, "loss": 0.0039, "step": 67870 }, { "epoch": 12.1, "learning_rate": 4.3950606276747506e-05, "loss": 0.0025, "step": 67880 }, { "epoch": 12.11, "learning_rate": 4.39497146932953e-05, "loss": 0.0054, "step": 67890 }, { "epoch": 12.11, "learning_rate": 4.394882310984308e-05, "loss": 0.0039, "step": 67900 }, { "epoch": 12.11, "learning_rate": 4.394793152639087e-05, "loss": 0.0046, "step": 67910 }, { "epoch": 12.11, "learning_rate": 4.3947039942938664e-05, "loss": 0.0026, "step": 67920 }, { "epoch": 12.11, "learning_rate": 4.394614835948645e-05, "loss": 0.0043, "step": 67930 }, { "epoch": 12.11, "learning_rate": 4.394525677603424e-05, "loss": 0.0071, "step": 67940 }, { "epoch": 12.12, "learning_rate": 4.3944365192582024e-05, "loss": 0.0022, "step": 67950 }, { "epoch": 12.12, "learning_rate": 4.3943473609129815e-05, "loss": 0.0021, "step": 67960 }, { "epoch": 12.12, "learning_rate": 4.3942582025677606e-05, "loss": 0.0019, "step": 67970 }, { "epoch": 12.12, "learning_rate": 4.394169044222539e-05, "loss": 0.0053, "step": 67980 }, { "epoch": 12.12, "learning_rate": 4.394079885877319e-05, "loss": 0.0031, "step": 67990 }, { "epoch": 12.13, "learning_rate": 4.393990727532097e-05, "loss": 0.0027, "step": 68000 }, { "epoch": 12.13, "learning_rate": 4.3939015691868764e-05, "loss": 0.0035, "step": 68010 }, { "epoch": 12.13, "learning_rate": 4.393812410841655e-05, "loss": 0.0048, "step": 68020 }, { "epoch": 12.13, "learning_rate": 4.393723252496434e-05, "loss": 0.0015, "step": 68030 }, { "epoch": 12.13, "learning_rate": 4.3936340941512125e-05, "loss": 0.0016, "step": 68040 }, { "epoch": 12.13, "learning_rate": 4.3935449358059916e-05, "loss": 0.0027, "step": 68050 }, { "epoch": 12.14, "learning_rate": 4.393455777460771e-05, "loss": 0.0015, "step": 68060 }, { "epoch": 12.14, "learning_rate": 4.393366619115549e-05, "loss": 0.0024, "step": 68070 }, { "epoch": 12.14, "learning_rate": 4.393277460770328e-05, "loss": 0.0037, "step": 68080 }, { "epoch": 12.14, "learning_rate": 4.393188302425107e-05, "loss": 0.0036, "step": 68090 }, { "epoch": 12.14, "learning_rate": 4.3930991440798865e-05, "loss": 0.0029, "step": 68100 }, { "epoch": 12.15, "learning_rate": 4.393009985734665e-05, "loss": 0.0028, "step": 68110 }, { "epoch": 12.15, "learning_rate": 4.392920827389444e-05, "loss": 0.0072, "step": 68120 }, { "epoch": 12.15, "learning_rate": 4.3928316690442225e-05, "loss": 0.008, "step": 68130 }, { "epoch": 12.15, "learning_rate": 4.3927425106990016e-05, "loss": 0.0037, "step": 68140 }, { "epoch": 12.15, "learning_rate": 4.392653352353781e-05, "loss": 0.0054, "step": 68150 }, { "epoch": 12.15, "learning_rate": 4.392564194008559e-05, "loss": 0.0031, "step": 68160 }, { "epoch": 12.16, "learning_rate": 4.392475035663338e-05, "loss": 0.0033, "step": 68170 }, { "epoch": 12.16, "learning_rate": 4.392385877318117e-05, "loss": 0.0026, "step": 68180 }, { "epoch": 12.16, "learning_rate": 4.392296718972896e-05, "loss": 0.0027, "step": 68190 }, { "epoch": 12.16, "learning_rate": 4.392207560627675e-05, "loss": 0.0025, "step": 68200 }, { "epoch": 12.16, "learning_rate": 4.392118402282454e-05, "loss": 0.0042, "step": 68210 }, { "epoch": 12.16, "learning_rate": 4.392029243937233e-05, "loss": 0.0057, "step": 68220 }, { "epoch": 12.17, "learning_rate": 4.391940085592012e-05, "loss": 0.0024, "step": 68230 }, { "epoch": 12.17, "learning_rate": 4.391850927246791e-05, "loss": 0.0036, "step": 68240 }, { "epoch": 12.17, "learning_rate": 4.391761768901569e-05, "loss": 0.0021, "step": 68250 }, { "epoch": 12.17, "learning_rate": 4.3916726105563484e-05, "loss": 0.0059, "step": 68260 }, { "epoch": 12.17, "learning_rate": 4.391583452211127e-05, "loss": 0.0061, "step": 68270 }, { "epoch": 12.18, "learning_rate": 4.391494293865906e-05, "loss": 0.0032, "step": 68280 }, { "epoch": 12.18, "learning_rate": 4.391405135520685e-05, "loss": 0.0027, "step": 68290 }, { "epoch": 12.18, "learning_rate": 4.3913159771754635e-05, "loss": 0.0046, "step": 68300 }, { "epoch": 12.18, "learning_rate": 4.3912268188302426e-05, "loss": 0.0026, "step": 68310 }, { "epoch": 12.18, "learning_rate": 4.391137660485022e-05, "loss": 0.0022, "step": 68320 }, { "epoch": 12.18, "learning_rate": 4.391048502139801e-05, "loss": 0.0047, "step": 68330 }, { "epoch": 12.19, "learning_rate": 4.390959343794579e-05, "loss": 0.0039, "step": 68340 }, { "epoch": 12.19, "learning_rate": 4.3908701854493584e-05, "loss": 0.0017, "step": 68350 }, { "epoch": 12.19, "learning_rate": 4.390781027104137e-05, "loss": 0.0027, "step": 68360 }, { "epoch": 12.19, "learning_rate": 4.390691868758916e-05, "loss": 0.0033, "step": 68370 }, { "epoch": 12.19, "learning_rate": 4.390602710413695e-05, "loss": 0.0041, "step": 68380 }, { "epoch": 12.2, "learning_rate": 4.3905135520684736e-05, "loss": 0.0027, "step": 68390 }, { "epoch": 12.2, "learning_rate": 4.390424393723253e-05, "loss": 0.0027, "step": 68400 }, { "epoch": 12.2, "learning_rate": 4.390335235378031e-05, "loss": 0.0082, "step": 68410 }, { "epoch": 12.2, "learning_rate": 4.39024607703281e-05, "loss": 0.0021, "step": 68420 }, { "epoch": 12.2, "learning_rate": 4.3901569186875894e-05, "loss": 0.0021, "step": 68430 }, { "epoch": 12.2, "learning_rate": 4.3900677603423685e-05, "loss": 0.0022, "step": 68440 }, { "epoch": 12.21, "learning_rate": 4.3899786019971476e-05, "loss": 0.0034, "step": 68450 }, { "epoch": 12.21, "learning_rate": 4.389889443651926e-05, "loss": 0.0024, "step": 68460 }, { "epoch": 12.21, "learning_rate": 4.389800285306705e-05, "loss": 0.0029, "step": 68470 }, { "epoch": 12.21, "learning_rate": 4.3897111269614836e-05, "loss": 0.0011, "step": 68480 }, { "epoch": 12.21, "learning_rate": 4.389621968616263e-05, "loss": 0.0042, "step": 68490 }, { "epoch": 12.21, "learning_rate": 4.389532810271041e-05, "loss": 0.0039, "step": 68500 }, { "epoch": 12.22, "learning_rate": 4.38944365192582e-05, "loss": 0.0029, "step": 68510 }, { "epoch": 12.22, "learning_rate": 4.3893544935805994e-05, "loss": 0.0064, "step": 68520 }, { "epoch": 12.22, "learning_rate": 4.389265335235378e-05, "loss": 0.0042, "step": 68530 }, { "epoch": 12.22, "learning_rate": 4.389176176890158e-05, "loss": 0.0043, "step": 68540 }, { "epoch": 12.22, "learning_rate": 4.389087018544936e-05, "loss": 0.0041, "step": 68550 }, { "epoch": 12.23, "learning_rate": 4.388997860199715e-05, "loss": 0.0045, "step": 68560 }, { "epoch": 12.23, "learning_rate": 4.388908701854494e-05, "loss": 0.004, "step": 68570 }, { "epoch": 12.23, "learning_rate": 4.388819543509273e-05, "loss": 0.0032, "step": 68580 }, { "epoch": 12.23, "learning_rate": 4.388730385164051e-05, "loss": 0.0066, "step": 68590 }, { "epoch": 12.23, "learning_rate": 4.3886412268188304e-05, "loss": 0.0079, "step": 68600 }, { "epoch": 12.23, "learning_rate": 4.3885520684736095e-05, "loss": 0.0039, "step": 68610 }, { "epoch": 12.24, "learning_rate": 4.388462910128388e-05, "loss": 0.0031, "step": 68620 }, { "epoch": 12.24, "learning_rate": 4.388373751783167e-05, "loss": 0.0044, "step": 68630 }, { "epoch": 12.24, "learning_rate": 4.3882845934379455e-05, "loss": 0.0047, "step": 68640 }, { "epoch": 12.24, "learning_rate": 4.388195435092725e-05, "loss": 0.0019, "step": 68650 }, { "epoch": 12.24, "learning_rate": 4.388106276747504e-05, "loss": 0.0037, "step": 68660 }, { "epoch": 12.25, "learning_rate": 4.388017118402283e-05, "loss": 0.0033, "step": 68670 }, { "epoch": 12.25, "learning_rate": 4.387927960057062e-05, "loss": 0.0038, "step": 68680 }, { "epoch": 12.25, "learning_rate": 4.3878388017118404e-05, "loss": 0.0028, "step": 68690 }, { "epoch": 12.25, "learning_rate": 4.3877496433666195e-05, "loss": 0.0048, "step": 68700 }, { "epoch": 12.25, "learning_rate": 4.387660485021398e-05, "loss": 0.0033, "step": 68710 }, { "epoch": 12.25, "learning_rate": 4.387571326676177e-05, "loss": 0.0025, "step": 68720 }, { "epoch": 12.26, "learning_rate": 4.3874821683309555e-05, "loss": 0.0045, "step": 68730 }, { "epoch": 12.26, "learning_rate": 4.387393009985735e-05, "loss": 0.0018, "step": 68740 }, { "epoch": 12.26, "learning_rate": 4.387303851640514e-05, "loss": 0.0024, "step": 68750 }, { "epoch": 12.26, "learning_rate": 4.387214693295293e-05, "loss": 0.0018, "step": 68760 }, { "epoch": 12.26, "learning_rate": 4.387125534950072e-05, "loss": 0.0038, "step": 68770 }, { "epoch": 12.26, "learning_rate": 4.3870363766048505e-05, "loss": 0.0026, "step": 68780 }, { "epoch": 12.27, "learning_rate": 4.3869472182596296e-05, "loss": 0.0022, "step": 68790 }, { "epoch": 12.27, "learning_rate": 4.386858059914408e-05, "loss": 0.0017, "step": 68800 }, { "epoch": 12.27, "learning_rate": 4.386768901569187e-05, "loss": 0.0046, "step": 68810 }, { "epoch": 12.27, "learning_rate": 4.3866797432239656e-05, "loss": 0.0035, "step": 68820 }, { "epoch": 12.27, "learning_rate": 4.386590584878745e-05, "loss": 0.0032, "step": 68830 }, { "epoch": 12.28, "learning_rate": 4.386501426533524e-05, "loss": 0.0038, "step": 68840 }, { "epoch": 12.28, "learning_rate": 4.386412268188302e-05, "loss": 0.0033, "step": 68850 }, { "epoch": 12.28, "learning_rate": 4.3863231098430814e-05, "loss": 0.0038, "step": 68860 }, { "epoch": 12.28, "learning_rate": 4.3862339514978605e-05, "loss": 0.0043, "step": 68870 }, { "epoch": 12.28, "learning_rate": 4.3861447931526396e-05, "loss": 0.0019, "step": 68880 }, { "epoch": 12.28, "learning_rate": 4.386055634807418e-05, "loss": 0.0021, "step": 68890 }, { "epoch": 12.29, "learning_rate": 4.385966476462197e-05, "loss": 0.0039, "step": 68900 }, { "epoch": 12.29, "learning_rate": 4.385877318116976e-05, "loss": 0.0027, "step": 68910 }, { "epoch": 12.29, "learning_rate": 4.385788159771755e-05, "loss": 0.0029, "step": 68920 }, { "epoch": 12.29, "learning_rate": 4.385699001426534e-05, "loss": 0.0031, "step": 68930 }, { "epoch": 12.29, "learning_rate": 4.3856098430813123e-05, "loss": 0.0059, "step": 68940 }, { "epoch": 12.29, "learning_rate": 4.3855206847360915e-05, "loss": 0.0037, "step": 68950 }, { "epoch": 12.3, "learning_rate": 4.38543152639087e-05, "loss": 0.0044, "step": 68960 }, { "epoch": 12.3, "learning_rate": 4.385342368045649e-05, "loss": 0.0055, "step": 68970 }, { "epoch": 12.3, "learning_rate": 4.385253209700428e-05, "loss": 0.0037, "step": 68980 }, { "epoch": 12.3, "learning_rate": 4.385164051355207e-05, "loss": 0.0038, "step": 68990 }, { "epoch": 12.3, "learning_rate": 4.3850748930099864e-05, "loss": 0.0041, "step": 69000 }, { "epoch": 12.31, "learning_rate": 4.384985734664765e-05, "loss": 0.0025, "step": 69010 }, { "epoch": 12.31, "learning_rate": 4.384896576319544e-05, "loss": 0.0063, "step": 69020 }, { "epoch": 12.31, "learning_rate": 4.3848074179743224e-05, "loss": 0.0036, "step": 69030 }, { "epoch": 12.31, "learning_rate": 4.3847182596291015e-05, "loss": 0.0066, "step": 69040 }, { "epoch": 12.31, "learning_rate": 4.38462910128388e-05, "loss": 0.0036, "step": 69050 }, { "epoch": 12.31, "learning_rate": 4.384539942938659e-05, "loss": 0.0054, "step": 69060 }, { "epoch": 12.32, "learning_rate": 4.384450784593438e-05, "loss": 0.0061, "step": 69070 }, { "epoch": 12.32, "learning_rate": 4.3843616262482166e-05, "loss": 0.0038, "step": 69080 }, { "epoch": 12.32, "learning_rate": 4.3842724679029964e-05, "loss": 0.0046, "step": 69090 }, { "epoch": 12.32, "learning_rate": 4.384183309557775e-05, "loss": 0.0028, "step": 69100 }, { "epoch": 12.32, "learning_rate": 4.384094151212554e-05, "loss": 0.0028, "step": 69110 }, { "epoch": 12.33, "learning_rate": 4.3840049928673325e-05, "loss": 0.0037, "step": 69120 }, { "epoch": 12.33, "learning_rate": 4.3839158345221116e-05, "loss": 0.0043, "step": 69130 }, { "epoch": 12.33, "learning_rate": 4.383826676176891e-05, "loss": 0.003, "step": 69140 }, { "epoch": 12.33, "learning_rate": 4.383737517831669e-05, "loss": 0.0038, "step": 69150 }, { "epoch": 12.33, "learning_rate": 4.383648359486448e-05, "loss": 0.0063, "step": 69160 }, { "epoch": 12.33, "learning_rate": 4.383559201141227e-05, "loss": 0.0041, "step": 69170 }, { "epoch": 12.34, "learning_rate": 4.383470042796006e-05, "loss": 0.0022, "step": 69180 }, { "epoch": 12.34, "learning_rate": 4.383380884450784e-05, "loss": 0.0027, "step": 69190 }, { "epoch": 12.34, "learning_rate": 4.383291726105564e-05, "loss": 0.0034, "step": 69200 }, { "epoch": 12.34, "learning_rate": 4.3832025677603425e-05, "loss": 0.0029, "step": 69210 }, { "epoch": 12.34, "learning_rate": 4.3831134094151216e-05, "loss": 0.0028, "step": 69220 }, { "epoch": 12.34, "learning_rate": 4.383024251069901e-05, "loss": 0.0048, "step": 69230 }, { "epoch": 12.35, "learning_rate": 4.382935092724679e-05, "loss": 0.0031, "step": 69240 }, { "epoch": 12.35, "learning_rate": 4.382845934379458e-05, "loss": 0.0069, "step": 69250 }, { "epoch": 12.35, "learning_rate": 4.382756776034237e-05, "loss": 0.0062, "step": 69260 }, { "epoch": 12.35, "learning_rate": 4.382667617689016e-05, "loss": 0.0025, "step": 69270 }, { "epoch": 12.35, "learning_rate": 4.382578459343794e-05, "loss": 0.0042, "step": 69280 }, { "epoch": 12.36, "learning_rate": 4.3824893009985734e-05, "loss": 0.0024, "step": 69290 }, { "epoch": 12.36, "learning_rate": 4.3824001426533526e-05, "loss": 0.0028, "step": 69300 }, { "epoch": 12.36, "learning_rate": 4.382310984308132e-05, "loss": 0.0027, "step": 69310 }, { "epoch": 12.36, "learning_rate": 4.382221825962911e-05, "loss": 0.0036, "step": 69320 }, { "epoch": 12.36, "learning_rate": 4.382132667617689e-05, "loss": 0.0017, "step": 69330 }, { "epoch": 12.36, "learning_rate": 4.3820435092724684e-05, "loss": 0.0033, "step": 69340 }, { "epoch": 12.37, "learning_rate": 4.381954350927247e-05, "loss": 0.0037, "step": 69350 }, { "epoch": 12.37, "learning_rate": 4.381865192582026e-05, "loss": 0.0017, "step": 69360 }, { "epoch": 12.37, "learning_rate": 4.381776034236805e-05, "loss": 0.003, "step": 69370 }, { "epoch": 12.37, "learning_rate": 4.3816868758915835e-05, "loss": 0.0053, "step": 69380 }, { "epoch": 12.37, "learning_rate": 4.3815977175463626e-05, "loss": 0.0055, "step": 69390 }, { "epoch": 12.38, "learning_rate": 4.381508559201141e-05, "loss": 0.0031, "step": 69400 }, { "epoch": 12.38, "learning_rate": 4.38141940085592e-05, "loss": 0.005, "step": 69410 }, { "epoch": 12.38, "learning_rate": 4.381330242510699e-05, "loss": 0.0041, "step": 69420 }, { "epoch": 12.38, "learning_rate": 4.3812410841654784e-05, "loss": 0.0062, "step": 69430 }, { "epoch": 12.38, "learning_rate": 4.381151925820257e-05, "loss": 0.005, "step": 69440 }, { "epoch": 12.38, "learning_rate": 4.381062767475036e-05, "loss": 0.0054, "step": 69450 }, { "epoch": 12.39, "learning_rate": 4.380973609129815e-05, "loss": 0.0015, "step": 69460 }, { "epoch": 12.39, "learning_rate": 4.3808844507845936e-05, "loss": 0.0079, "step": 69470 }, { "epoch": 12.39, "learning_rate": 4.380795292439373e-05, "loss": 0.005, "step": 69480 }, { "epoch": 12.39, "learning_rate": 4.380706134094151e-05, "loss": 0.0018, "step": 69490 }, { "epoch": 12.39, "learning_rate": 4.38061697574893e-05, "loss": 0.0035, "step": 69500 }, { "epoch": 12.39, "learning_rate": 4.380527817403709e-05, "loss": 0.0024, "step": 69510 }, { "epoch": 12.4, "learning_rate": 4.380438659058488e-05, "loss": 0.0023, "step": 69520 }, { "epoch": 12.4, "learning_rate": 4.380349500713267e-05, "loss": 0.0042, "step": 69530 }, { "epoch": 12.4, "learning_rate": 4.380260342368046e-05, "loss": 0.0026, "step": 69540 }, { "epoch": 12.4, "learning_rate": 4.380171184022825e-05, "loss": 0.0034, "step": 69550 }, { "epoch": 12.4, "learning_rate": 4.3800820256776036e-05, "loss": 0.0048, "step": 69560 }, { "epoch": 12.41, "learning_rate": 4.379992867332383e-05, "loss": 0.0028, "step": 69570 }, { "epoch": 12.41, "learning_rate": 4.379903708987161e-05, "loss": 0.0042, "step": 69580 }, { "epoch": 12.41, "learning_rate": 4.37981455064194e-05, "loss": 0.0039, "step": 69590 }, { "epoch": 12.41, "learning_rate": 4.379725392296719e-05, "loss": 0.0045, "step": 69600 }, { "epoch": 12.41, "learning_rate": 4.379636233951498e-05, "loss": 0.004, "step": 69610 }, { "epoch": 12.41, "learning_rate": 4.379547075606277e-05, "loss": 0.0028, "step": 69620 }, { "epoch": 12.42, "learning_rate": 4.3794579172610554e-05, "loss": 0.0043, "step": 69630 }, { "epoch": 12.42, "learning_rate": 4.379368758915835e-05, "loss": 0.0023, "step": 69640 }, { "epoch": 12.42, "learning_rate": 4.379279600570614e-05, "loss": 0.0032, "step": 69650 }, { "epoch": 12.42, "learning_rate": 4.379190442225393e-05, "loss": 0.0017, "step": 69660 }, { "epoch": 12.42, "learning_rate": 4.379101283880171e-05, "loss": 0.0029, "step": 69670 }, { "epoch": 12.43, "learning_rate": 4.3790121255349504e-05, "loss": 0.0066, "step": 69680 }, { "epoch": 12.43, "learning_rate": 4.3789229671897295e-05, "loss": 0.0034, "step": 69690 }, { "epoch": 12.43, "learning_rate": 4.378833808844508e-05, "loss": 0.0037, "step": 69700 }, { "epoch": 12.43, "learning_rate": 4.378744650499287e-05, "loss": 0.0032, "step": 69710 }, { "epoch": 12.43, "learning_rate": 4.3786554921540655e-05, "loss": 0.0043, "step": 69720 }, { "epoch": 12.43, "learning_rate": 4.3785663338088446e-05, "loss": 0.0036, "step": 69730 }, { "epoch": 12.44, "learning_rate": 4.378477175463623e-05, "loss": 0.0048, "step": 69740 }, { "epoch": 12.44, "learning_rate": 4.378388017118403e-05, "loss": 0.0033, "step": 69750 }, { "epoch": 12.44, "learning_rate": 4.378298858773181e-05, "loss": 0.0031, "step": 69760 }, { "epoch": 12.44, "learning_rate": 4.3782097004279604e-05, "loss": 0.004, "step": 69770 }, { "epoch": 12.44, "learning_rate": 4.3781205420827395e-05, "loss": 0.0046, "step": 69780 }, { "epoch": 12.44, "learning_rate": 4.378031383737518e-05, "loss": 0.0035, "step": 69790 }, { "epoch": 12.45, "learning_rate": 4.377942225392297e-05, "loss": 0.0023, "step": 69800 }, { "epoch": 12.45, "learning_rate": 4.3778530670470755e-05, "loss": 0.0044, "step": 69810 }, { "epoch": 12.45, "learning_rate": 4.3777639087018547e-05, "loss": 0.0026, "step": 69820 }, { "epoch": 12.45, "learning_rate": 4.377674750356633e-05, "loss": 0.0031, "step": 69830 }, { "epoch": 12.45, "learning_rate": 4.377585592011412e-05, "loss": 0.0019, "step": 69840 }, { "epoch": 12.46, "learning_rate": 4.3774964336661913e-05, "loss": 0.0031, "step": 69850 }, { "epoch": 12.46, "learning_rate": 4.3774072753209705e-05, "loss": 0.0035, "step": 69860 }, { "epoch": 12.46, "learning_rate": 4.3773181169757496e-05, "loss": 0.0047, "step": 69870 }, { "epoch": 12.46, "learning_rate": 4.377228958630528e-05, "loss": 0.002, "step": 69880 }, { "epoch": 12.46, "learning_rate": 4.377139800285307e-05, "loss": 0.0019, "step": 69890 }, { "epoch": 12.46, "learning_rate": 4.3770506419400856e-05, "loss": 0.005, "step": 69900 }, { "epoch": 12.47, "learning_rate": 4.376961483594865e-05, "loss": 0.0027, "step": 69910 }, { "epoch": 12.47, "learning_rate": 4.376872325249644e-05, "loss": 0.0028, "step": 69920 }, { "epoch": 12.47, "learning_rate": 4.376783166904422e-05, "loss": 0.0037, "step": 69930 }, { "epoch": 12.47, "learning_rate": 4.3766940085592014e-05, "loss": 0.0019, "step": 69940 }, { "epoch": 12.47, "learning_rate": 4.37660485021398e-05, "loss": 0.0061, "step": 69950 }, { "epoch": 12.48, "learning_rate": 4.376515691868759e-05, "loss": 0.0028, "step": 69960 }, { "epoch": 12.48, "learning_rate": 4.376426533523538e-05, "loss": 0.0028, "step": 69970 }, { "epoch": 12.48, "learning_rate": 4.376337375178317e-05, "loss": 0.0014, "step": 69980 }, { "epoch": 12.48, "learning_rate": 4.3762482168330956e-05, "loss": 0.004, "step": 69990 }, { "epoch": 12.48, "learning_rate": 4.376159058487875e-05, "loss": 0.0022, "step": 70000 }, { "epoch": 12.48, "learning_rate": 4.376069900142654e-05, "loss": 0.0053, "step": 70010 }, { "epoch": 12.49, "learning_rate": 4.375980741797432e-05, "loss": 0.0047, "step": 70020 }, { "epoch": 12.49, "learning_rate": 4.3758915834522115e-05, "loss": 0.0021, "step": 70030 }, { "epoch": 12.49, "learning_rate": 4.37580242510699e-05, "loss": 0.0035, "step": 70040 }, { "epoch": 12.49, "learning_rate": 4.375713266761769e-05, "loss": 0.0041, "step": 70050 }, { "epoch": 12.49, "learning_rate": 4.3756241084165475e-05, "loss": 0.0032, "step": 70060 }, { "epoch": 12.49, "learning_rate": 4.3755349500713266e-05, "loss": 0.0036, "step": 70070 }, { "epoch": 12.5, "learning_rate": 4.375445791726106e-05, "loss": 0.0031, "step": 70080 }, { "epoch": 12.5, "learning_rate": 4.375356633380885e-05, "loss": 0.0034, "step": 70090 }, { "epoch": 12.5, "learning_rate": 4.375267475035664e-05, "loss": 0.0031, "step": 70100 }, { "epoch": 12.5, "learning_rate": 4.3751783166904424e-05, "loss": 0.0065, "step": 70110 }, { "epoch": 12.5, "learning_rate": 4.3750891583452215e-05, "loss": 0.007, "step": 70120 }, { "epoch": 12.51, "learning_rate": 4.375e-05, "loss": 0.0029, "step": 70130 }, { "epoch": 12.51, "learning_rate": 4.374910841654779e-05, "loss": 0.0041, "step": 70140 }, { "epoch": 12.51, "learning_rate": 4.374821683309558e-05, "loss": 0.0047, "step": 70150 }, { "epoch": 12.51, "learning_rate": 4.3747325249643366e-05, "loss": 0.0034, "step": 70160 }, { "epoch": 12.51, "learning_rate": 4.374643366619116e-05, "loss": 0.0065, "step": 70170 }, { "epoch": 12.51, "learning_rate": 4.374554208273894e-05, "loss": 0.0038, "step": 70180 }, { "epoch": 12.52, "learning_rate": 4.374465049928674e-05, "loss": 0.0058, "step": 70190 }, { "epoch": 12.52, "learning_rate": 4.3743758915834524e-05, "loss": 0.0027, "step": 70200 }, { "epoch": 12.52, "learning_rate": 4.3742867332382316e-05, "loss": 0.0038, "step": 70210 }, { "epoch": 12.52, "learning_rate": 4.37419757489301e-05, "loss": 0.0036, "step": 70220 }, { "epoch": 12.52, "learning_rate": 4.374108416547789e-05, "loss": 0.0051, "step": 70230 }, { "epoch": 12.52, "learning_rate": 4.374019258202568e-05, "loss": 0.0027, "step": 70240 }, { "epoch": 12.53, "learning_rate": 4.373930099857347e-05, "loss": 0.005, "step": 70250 }, { "epoch": 12.53, "learning_rate": 4.373840941512126e-05, "loss": 0.0031, "step": 70260 }, { "epoch": 12.53, "learning_rate": 4.373751783166904e-05, "loss": 0.003, "step": 70270 }, { "epoch": 12.53, "learning_rate": 4.3736626248216834e-05, "loss": 0.003, "step": 70280 }, { "epoch": 12.53, "learning_rate": 4.373573466476462e-05, "loss": 0.0047, "step": 70290 }, { "epoch": 12.54, "learning_rate": 4.3734843081312416e-05, "loss": 0.0037, "step": 70300 }, { "epoch": 12.54, "learning_rate": 4.37339514978602e-05, "loss": 0.0034, "step": 70310 }, { "epoch": 12.54, "learning_rate": 4.373305991440799e-05, "loss": 0.0043, "step": 70320 }, { "epoch": 12.54, "learning_rate": 4.373216833095578e-05, "loss": 0.0025, "step": 70330 }, { "epoch": 12.54, "learning_rate": 4.373127674750357e-05, "loss": 0.0019, "step": 70340 }, { "epoch": 12.54, "learning_rate": 4.373038516405136e-05, "loss": 0.0037, "step": 70350 }, { "epoch": 12.55, "learning_rate": 4.372949358059914e-05, "loss": 0.0032, "step": 70360 }, { "epoch": 12.55, "learning_rate": 4.3728601997146934e-05, "loss": 0.0019, "step": 70370 }, { "epoch": 12.55, "learning_rate": 4.3727710413694726e-05, "loss": 0.0039, "step": 70380 }, { "epoch": 12.55, "learning_rate": 4.372681883024251e-05, "loss": 0.004, "step": 70390 }, { "epoch": 12.55, "learning_rate": 4.37259272467903e-05, "loss": 0.0041, "step": 70400 }, { "epoch": 12.56, "learning_rate": 4.372503566333809e-05, "loss": 0.0027, "step": 70410 }, { "epoch": 12.56, "learning_rate": 4.3724144079885884e-05, "loss": 0.0059, "step": 70420 }, { "epoch": 12.56, "learning_rate": 4.372325249643367e-05, "loss": 0.0037, "step": 70430 }, { "epoch": 12.56, "learning_rate": 4.372236091298146e-05, "loss": 0.0039, "step": 70440 }, { "epoch": 12.56, "learning_rate": 4.3721469329529244e-05, "loss": 0.0037, "step": 70450 }, { "epoch": 12.56, "learning_rate": 4.3720577746077035e-05, "loss": 0.0017, "step": 70460 }, { "epoch": 12.57, "learning_rate": 4.3719686162624826e-05, "loss": 0.0062, "step": 70470 }, { "epoch": 12.57, "learning_rate": 4.371879457917261e-05, "loss": 0.0042, "step": 70480 }, { "epoch": 12.57, "learning_rate": 4.37179029957204e-05, "loss": 0.003, "step": 70490 }, { "epoch": 12.57, "learning_rate": 4.3717011412268186e-05, "loss": 0.0039, "step": 70500 }, { "epoch": 12.57, "learning_rate": 4.371611982881598e-05, "loss": 0.0029, "step": 70510 }, { "epoch": 12.57, "learning_rate": 4.371522824536377e-05, "loss": 0.0053, "step": 70520 }, { "epoch": 12.58, "learning_rate": 4.371433666191156e-05, "loss": 0.0049, "step": 70530 }, { "epoch": 12.58, "learning_rate": 4.3713445078459344e-05, "loss": 0.0044, "step": 70540 }, { "epoch": 12.58, "learning_rate": 4.3712553495007136e-05, "loss": 0.0041, "step": 70550 }, { "epoch": 12.58, "learning_rate": 4.371166191155493e-05, "loss": 0.0066, "step": 70560 }, { "epoch": 12.58, "learning_rate": 4.371077032810271e-05, "loss": 0.0032, "step": 70570 }, { "epoch": 12.59, "learning_rate": 4.37098787446505e-05, "loss": 0.0051, "step": 70580 }, { "epoch": 12.59, "learning_rate": 4.370898716119829e-05, "loss": 0.0039, "step": 70590 }, { "epoch": 12.59, "learning_rate": 4.370809557774608e-05, "loss": 0.0067, "step": 70600 }, { "epoch": 12.59, "learning_rate": 4.370720399429387e-05, "loss": 0.0024, "step": 70610 }, { "epoch": 12.59, "learning_rate": 4.3706312410841654e-05, "loss": 0.0024, "step": 70620 }, { "epoch": 12.59, "learning_rate": 4.370542082738945e-05, "loss": 0.0038, "step": 70630 }, { "epoch": 12.6, "learning_rate": 4.3704529243937236e-05, "loss": 0.0022, "step": 70640 }, { "epoch": 12.6, "learning_rate": 4.370363766048503e-05, "loss": 0.0028, "step": 70650 }, { "epoch": 12.6, "learning_rate": 4.370274607703281e-05, "loss": 0.0022, "step": 70660 }, { "epoch": 12.6, "learning_rate": 4.37018544935806e-05, "loss": 0.0031, "step": 70670 }, { "epoch": 12.6, "learning_rate": 4.370096291012839e-05, "loss": 0.0025, "step": 70680 }, { "epoch": 12.61, "learning_rate": 4.370007132667618e-05, "loss": 0.0032, "step": 70690 }, { "epoch": 12.61, "learning_rate": 4.369917974322397e-05, "loss": 0.006, "step": 70700 }, { "epoch": 12.61, "learning_rate": 4.3698288159771754e-05, "loss": 0.0046, "step": 70710 }, { "epoch": 12.61, "learning_rate": 4.3697396576319545e-05, "loss": 0.0045, "step": 70720 }, { "epoch": 12.61, "learning_rate": 4.369650499286733e-05, "loss": 0.0049, "step": 70730 }, { "epoch": 12.61, "learning_rate": 4.369561340941513e-05, "loss": 0.0032, "step": 70740 }, { "epoch": 12.62, "learning_rate": 4.369472182596291e-05, "loss": 0.005, "step": 70750 }, { "epoch": 12.62, "learning_rate": 4.3693830242510703e-05, "loss": 0.0029, "step": 70760 }, { "epoch": 12.62, "learning_rate": 4.369293865905849e-05, "loss": 0.0035, "step": 70770 }, { "epoch": 12.62, "learning_rate": 4.369204707560628e-05, "loss": 0.0045, "step": 70780 }, { "epoch": 12.62, "learning_rate": 4.369115549215407e-05, "loss": 0.0053, "step": 70790 }, { "epoch": 12.62, "learning_rate": 4.3690263908701855e-05, "loss": 0.0072, "step": 70800 }, { "epoch": 12.63, "learning_rate": 4.3689372325249646e-05, "loss": 0.0033, "step": 70810 }, { "epoch": 12.63, "learning_rate": 4.368848074179743e-05, "loss": 0.0042, "step": 70820 }, { "epoch": 12.63, "learning_rate": 4.368758915834522e-05, "loss": 0.0049, "step": 70830 }, { "epoch": 12.63, "learning_rate": 4.368669757489301e-05, "loss": 0.0078, "step": 70840 }, { "epoch": 12.63, "learning_rate": 4.3685805991440804e-05, "loss": 0.0033, "step": 70850 }, { "epoch": 12.64, "learning_rate": 4.3684914407988595e-05, "loss": 0.002, "step": 70860 }, { "epoch": 12.64, "learning_rate": 4.368402282453638e-05, "loss": 0.0022, "step": 70870 }, { "epoch": 12.64, "learning_rate": 4.368313124108417e-05, "loss": 0.0038, "step": 70880 }, { "epoch": 12.64, "learning_rate": 4.3682239657631955e-05, "loss": 0.0049, "step": 70890 }, { "epoch": 12.64, "learning_rate": 4.3681348074179747e-05, "loss": 0.0028, "step": 70900 }, { "epoch": 12.64, "learning_rate": 4.368045649072753e-05, "loss": 0.0027, "step": 70910 }, { "epoch": 12.65, "learning_rate": 4.367956490727532e-05, "loss": 0.0024, "step": 70920 }, { "epoch": 12.65, "learning_rate": 4.3678673323823113e-05, "loss": 0.003, "step": 70930 }, { "epoch": 12.65, "learning_rate": 4.36777817403709e-05, "loss": 0.0032, "step": 70940 }, { "epoch": 12.65, "learning_rate": 4.367689015691869e-05, "loss": 0.003, "step": 70950 }, { "epoch": 12.65, "learning_rate": 4.367599857346648e-05, "loss": 0.0043, "step": 70960 }, { "epoch": 12.66, "learning_rate": 4.367510699001427e-05, "loss": 0.0023, "step": 70970 }, { "epoch": 12.66, "learning_rate": 4.3674215406562056e-05, "loss": 0.0067, "step": 70980 }, { "epoch": 12.66, "learning_rate": 4.367332382310985e-05, "loss": 0.003, "step": 70990 }, { "epoch": 12.66, "learning_rate": 4.367243223965763e-05, "loss": 0.0048, "step": 71000 }, { "epoch": 12.66, "learning_rate": 4.367154065620542e-05, "loss": 0.0024, "step": 71010 }, { "epoch": 12.66, "learning_rate": 4.3670649072753214e-05, "loss": 0.0041, "step": 71020 }, { "epoch": 12.67, "learning_rate": 4.3669757489301e-05, "loss": 0.0046, "step": 71030 }, { "epoch": 12.67, "learning_rate": 4.366886590584879e-05, "loss": 0.0037, "step": 71040 }, { "epoch": 12.67, "learning_rate": 4.3667974322396574e-05, "loss": 0.003, "step": 71050 }, { "epoch": 12.67, "learning_rate": 4.3667082738944365e-05, "loss": 0.0032, "step": 71060 }, { "epoch": 12.67, "learning_rate": 4.3666191155492156e-05, "loss": 0.0029, "step": 71070 }, { "epoch": 12.67, "learning_rate": 4.366529957203995e-05, "loss": 0.0039, "step": 71080 }, { "epoch": 12.68, "learning_rate": 4.366440798858774e-05, "loss": 0.0041, "step": 71090 }, { "epoch": 12.68, "learning_rate": 4.366351640513552e-05, "loss": 0.0061, "step": 71100 }, { "epoch": 12.68, "learning_rate": 4.3662624821683315e-05, "loss": 0.0025, "step": 71110 }, { "epoch": 12.68, "learning_rate": 4.36617332382311e-05, "loss": 0.004, "step": 71120 }, { "epoch": 12.68, "learning_rate": 4.366084165477889e-05, "loss": 0.0028, "step": 71130 }, { "epoch": 12.69, "learning_rate": 4.3659950071326675e-05, "loss": 0.0022, "step": 71140 }, { "epoch": 12.69, "learning_rate": 4.3659058487874466e-05, "loss": 0.0019, "step": 71150 }, { "epoch": 12.69, "learning_rate": 4.365816690442226e-05, "loss": 0.0036, "step": 71160 }, { "epoch": 12.69, "learning_rate": 4.365727532097004e-05, "loss": 0.0026, "step": 71170 }, { "epoch": 12.69, "learning_rate": 4.365638373751784e-05, "loss": 0.0022, "step": 71180 }, { "epoch": 12.69, "learning_rate": 4.3655492154065624e-05, "loss": 0.0058, "step": 71190 }, { "epoch": 12.7, "learning_rate": 4.3654600570613415e-05, "loss": 0.0041, "step": 71200 }, { "epoch": 12.7, "learning_rate": 4.36537089871612e-05, "loss": 0.0031, "step": 71210 }, { "epoch": 12.7, "learning_rate": 4.365281740370899e-05, "loss": 0.0025, "step": 71220 }, { "epoch": 12.7, "learning_rate": 4.3651925820256775e-05, "loss": 0.0046, "step": 71230 }, { "epoch": 12.7, "learning_rate": 4.3651034236804566e-05, "loss": 0.0045, "step": 71240 }, { "epoch": 12.71, "learning_rate": 4.365014265335236e-05, "loss": 0.0043, "step": 71250 }, { "epoch": 12.71, "learning_rate": 4.364925106990014e-05, "loss": 0.0058, "step": 71260 }, { "epoch": 12.71, "learning_rate": 4.364835948644793e-05, "loss": 0.0045, "step": 71270 }, { "epoch": 12.71, "learning_rate": 4.364746790299572e-05, "loss": 0.0035, "step": 71280 }, { "epoch": 12.71, "learning_rate": 4.3646576319543516e-05, "loss": 0.0023, "step": 71290 }, { "epoch": 12.71, "learning_rate": 4.36456847360913e-05, "loss": 0.0055, "step": 71300 }, { "epoch": 12.72, "learning_rate": 4.364479315263909e-05, "loss": 0.0031, "step": 71310 }, { "epoch": 12.72, "learning_rate": 4.364390156918688e-05, "loss": 0.0012, "step": 71320 }, { "epoch": 12.72, "learning_rate": 4.364300998573467e-05, "loss": 0.0024, "step": 71330 }, { "epoch": 12.72, "learning_rate": 4.364211840228246e-05, "loss": 0.002, "step": 71340 }, { "epoch": 12.72, "learning_rate": 4.364122681883024e-05, "loss": 0.0028, "step": 71350 }, { "epoch": 12.72, "learning_rate": 4.3640335235378034e-05, "loss": 0.0052, "step": 71360 }, { "epoch": 12.73, "learning_rate": 4.363944365192582e-05, "loss": 0.0048, "step": 71370 }, { "epoch": 12.73, "learning_rate": 4.363855206847361e-05, "loss": 0.0027, "step": 71380 }, { "epoch": 12.73, "learning_rate": 4.36376604850214e-05, "loss": 0.0021, "step": 71390 }, { "epoch": 12.73, "learning_rate": 4.363676890156919e-05, "loss": 0.0044, "step": 71400 }, { "epoch": 12.73, "learning_rate": 4.363587731811698e-05, "loss": 0.0054, "step": 71410 }, { "epoch": 12.74, "learning_rate": 4.363498573466477e-05, "loss": 0.008, "step": 71420 }, { "epoch": 12.74, "learning_rate": 4.363409415121256e-05, "loss": 0.0044, "step": 71430 }, { "epoch": 12.74, "learning_rate": 4.363320256776034e-05, "loss": 0.0065, "step": 71440 }, { "epoch": 12.74, "learning_rate": 4.3632310984308134e-05, "loss": 0.0039, "step": 71450 }, { "epoch": 12.74, "learning_rate": 4.363141940085592e-05, "loss": 0.0028, "step": 71460 }, { "epoch": 12.74, "learning_rate": 4.363052781740371e-05, "loss": 0.0021, "step": 71470 }, { "epoch": 12.75, "learning_rate": 4.36296362339515e-05, "loss": 0.006, "step": 71480 }, { "epoch": 12.75, "learning_rate": 4.3628744650499286e-05, "loss": 0.0021, "step": 71490 }, { "epoch": 12.75, "learning_rate": 4.362785306704708e-05, "loss": 0.0035, "step": 71500 }, { "epoch": 12.75, "learning_rate": 4.362696148359487e-05, "loss": 0.0058, "step": 71510 }, { "epoch": 12.75, "learning_rate": 4.362606990014266e-05, "loss": 0.003, "step": 71520 }, { "epoch": 12.75, "learning_rate": 4.3625178316690444e-05, "loss": 0.0045, "step": 71530 }, { "epoch": 12.76, "learning_rate": 4.3624286733238235e-05, "loss": 0.0038, "step": 71540 }, { "epoch": 12.76, "learning_rate": 4.362339514978602e-05, "loss": 0.0036, "step": 71550 }, { "epoch": 12.76, "learning_rate": 4.362250356633381e-05, "loss": 0.0047, "step": 71560 }, { "epoch": 12.76, "learning_rate": 4.36216119828816e-05, "loss": 0.0052, "step": 71570 }, { "epoch": 12.76, "learning_rate": 4.3620720399429386e-05, "loss": 0.0053, "step": 71580 }, { "epoch": 12.77, "learning_rate": 4.361982881597718e-05, "loss": 0.0049, "step": 71590 }, { "epoch": 12.77, "learning_rate": 4.361893723252496e-05, "loss": 0.0066, "step": 71600 }, { "epoch": 12.77, "learning_rate": 4.361804564907275e-05, "loss": 0.0036, "step": 71610 }, { "epoch": 12.77, "learning_rate": 4.3617154065620544e-05, "loss": 0.0044, "step": 71620 }, { "epoch": 12.77, "learning_rate": 4.3616262482168335e-05, "loss": 0.003, "step": 71630 }, { "epoch": 12.77, "learning_rate": 4.361537089871613e-05, "loss": 0.0029, "step": 71640 }, { "epoch": 12.78, "learning_rate": 4.361447931526391e-05, "loss": 0.003, "step": 71650 }, { "epoch": 12.78, "learning_rate": 4.36135877318117e-05, "loss": 0.0037, "step": 71660 }, { "epoch": 12.78, "learning_rate": 4.361269614835949e-05, "loss": 0.0028, "step": 71670 }, { "epoch": 12.78, "learning_rate": 4.361180456490728e-05, "loss": 0.0049, "step": 71680 }, { "epoch": 12.78, "learning_rate": 4.361091298145506e-05, "loss": 0.0053, "step": 71690 }, { "epoch": 12.79, "learning_rate": 4.3610021398002854e-05, "loss": 0.0029, "step": 71700 }, { "epoch": 12.79, "learning_rate": 4.3609129814550645e-05, "loss": 0.0046, "step": 71710 }, { "epoch": 12.79, "learning_rate": 4.360823823109843e-05, "loss": 0.0015, "step": 71720 }, { "epoch": 12.79, "learning_rate": 4.360734664764623e-05, "loss": 0.0057, "step": 71730 }, { "epoch": 12.79, "learning_rate": 4.360645506419401e-05, "loss": 0.0062, "step": 71740 }, { "epoch": 12.79, "learning_rate": 4.36055634807418e-05, "loss": 0.0025, "step": 71750 }, { "epoch": 12.8, "learning_rate": 4.360467189728959e-05, "loss": 0.0051, "step": 71760 }, { "epoch": 12.8, "learning_rate": 4.360378031383738e-05, "loss": 0.002, "step": 71770 }, { "epoch": 12.8, "learning_rate": 4.360288873038516e-05, "loss": 0.0037, "step": 71780 }, { "epoch": 12.8, "learning_rate": 4.3601997146932954e-05, "loss": 0.0041, "step": 71790 }, { "epoch": 12.8, "learning_rate": 4.3601105563480745e-05, "loss": 0.0054, "step": 71800 }, { "epoch": 12.8, "learning_rate": 4.360021398002853e-05, "loss": 0.0047, "step": 71810 }, { "epoch": 12.81, "learning_rate": 4.359932239657632e-05, "loss": 0.002, "step": 71820 }, { "epoch": 12.81, "learning_rate": 4.3598430813124105e-05, "loss": 0.0044, "step": 71830 }, { "epoch": 12.81, "learning_rate": 4.3597539229671903e-05, "loss": 0.0029, "step": 71840 }, { "epoch": 12.81, "learning_rate": 4.359664764621969e-05, "loss": 0.0044, "step": 71850 }, { "epoch": 12.81, "learning_rate": 4.359575606276748e-05, "loss": 0.0031, "step": 71860 }, { "epoch": 12.82, "learning_rate": 4.359486447931527e-05, "loss": 0.0034, "step": 71870 }, { "epoch": 12.82, "learning_rate": 4.3593972895863055e-05, "loss": 0.0042, "step": 71880 }, { "epoch": 12.82, "learning_rate": 4.3593081312410846e-05, "loss": 0.003, "step": 71890 }, { "epoch": 12.82, "learning_rate": 4.359218972895863e-05, "loss": 0.0031, "step": 71900 }, { "epoch": 12.82, "learning_rate": 4.359129814550642e-05, "loss": 0.0022, "step": 71910 }, { "epoch": 12.82, "learning_rate": 4.3590406562054206e-05, "loss": 0.0018, "step": 71920 }, { "epoch": 12.83, "learning_rate": 4.3589514978602e-05, "loss": 0.0016, "step": 71930 }, { "epoch": 12.83, "learning_rate": 4.358862339514979e-05, "loss": 0.004, "step": 71940 }, { "epoch": 12.83, "learning_rate": 4.358773181169758e-05, "loss": 0.0036, "step": 71950 }, { "epoch": 12.83, "learning_rate": 4.358684022824537e-05, "loss": 0.0049, "step": 71960 }, { "epoch": 12.83, "learning_rate": 4.3585948644793155e-05, "loss": 0.0059, "step": 71970 }, { "epoch": 12.84, "learning_rate": 4.3585057061340947e-05, "loss": 0.0054, "step": 71980 }, { "epoch": 12.84, "learning_rate": 4.358416547788873e-05, "loss": 0.003, "step": 71990 }, { "epoch": 12.84, "learning_rate": 4.358327389443652e-05, "loss": 0.0041, "step": 72000 }, { "epoch": 12.84, "learning_rate": 4.3582382310984307e-05, "loss": 0.0028, "step": 72010 }, { "epoch": 12.84, "learning_rate": 4.35814907275321e-05, "loss": 0.0043, "step": 72020 }, { "epoch": 12.84, "learning_rate": 4.358059914407989e-05, "loss": 0.0018, "step": 72030 }, { "epoch": 12.85, "learning_rate": 4.3579707560627673e-05, "loss": 0.002, "step": 72040 }, { "epoch": 12.85, "learning_rate": 4.3578815977175465e-05, "loss": 0.0028, "step": 72050 }, { "epoch": 12.85, "learning_rate": 4.3577924393723256e-05, "loss": 0.0033, "step": 72060 }, { "epoch": 12.85, "learning_rate": 4.357703281027105e-05, "loss": 0.0032, "step": 72070 }, { "epoch": 12.85, "learning_rate": 4.357614122681883e-05, "loss": 0.0024, "step": 72080 }, { "epoch": 12.85, "learning_rate": 4.357524964336662e-05, "loss": 0.0026, "step": 72090 }, { "epoch": 12.86, "learning_rate": 4.3574358059914414e-05, "loss": 0.004, "step": 72100 }, { "epoch": 12.86, "learning_rate": 4.35734664764622e-05, "loss": 0.0059, "step": 72110 }, { "epoch": 12.86, "learning_rate": 4.357257489300999e-05, "loss": 0.0017, "step": 72120 }, { "epoch": 12.86, "learning_rate": 4.3571683309557774e-05, "loss": 0.003, "step": 72130 }, { "epoch": 12.86, "learning_rate": 4.3570791726105565e-05, "loss": 0.0054, "step": 72140 }, { "epoch": 12.87, "learning_rate": 4.356990014265335e-05, "loss": 0.0031, "step": 72150 }, { "epoch": 12.87, "learning_rate": 4.356900855920114e-05, "loss": 0.0033, "step": 72160 }, { "epoch": 12.87, "learning_rate": 4.356811697574893e-05, "loss": 0.0043, "step": 72170 }, { "epoch": 12.87, "learning_rate": 4.356722539229672e-05, "loss": 0.0029, "step": 72180 }, { "epoch": 12.87, "learning_rate": 4.3566333808844514e-05, "loss": 0.0031, "step": 72190 }, { "epoch": 12.87, "learning_rate": 4.35654422253923e-05, "loss": 0.0028, "step": 72200 }, { "epoch": 12.88, "learning_rate": 4.356455064194009e-05, "loss": 0.0048, "step": 72210 }, { "epoch": 12.88, "learning_rate": 4.3563659058487875e-05, "loss": 0.0031, "step": 72220 }, { "epoch": 12.88, "learning_rate": 4.3562767475035666e-05, "loss": 0.0047, "step": 72230 }, { "epoch": 12.88, "learning_rate": 4.356187589158345e-05, "loss": 0.0032, "step": 72240 }, { "epoch": 12.88, "learning_rate": 4.356098430813124e-05, "loss": 0.0035, "step": 72250 }, { "epoch": 12.89, "learning_rate": 4.356009272467903e-05, "loss": 0.0028, "step": 72260 }, { "epoch": 12.89, "learning_rate": 4.355920114122682e-05, "loss": 0.0037, "step": 72270 }, { "epoch": 12.89, "learning_rate": 4.3558309557774615e-05, "loss": 0.002, "step": 72280 }, { "epoch": 12.89, "learning_rate": 4.35574179743224e-05, "loss": 0.0018, "step": 72290 }, { "epoch": 12.89, "learning_rate": 4.355652639087019e-05, "loss": 0.0056, "step": 72300 }, { "epoch": 12.89, "learning_rate": 4.3555634807417975e-05, "loss": 0.0025, "step": 72310 }, { "epoch": 12.9, "learning_rate": 4.3554743223965766e-05, "loss": 0.0024, "step": 72320 }, { "epoch": 12.9, "learning_rate": 4.355385164051356e-05, "loss": 0.0055, "step": 72330 }, { "epoch": 12.9, "learning_rate": 4.355296005706134e-05, "loss": 0.0038, "step": 72340 }, { "epoch": 12.9, "learning_rate": 4.355206847360913e-05, "loss": 0.0049, "step": 72350 }, { "epoch": 12.9, "learning_rate": 4.355117689015692e-05, "loss": 0.0037, "step": 72360 }, { "epoch": 12.9, "learning_rate": 4.355028530670471e-05, "loss": 0.0033, "step": 72370 }, { "epoch": 12.91, "learning_rate": 4.354939372325249e-05, "loss": 0.0024, "step": 72380 }, { "epoch": 12.91, "learning_rate": 4.354850213980029e-05, "loss": 0.0018, "step": 72390 }, { "epoch": 12.91, "learning_rate": 4.3547610556348076e-05, "loss": 0.0032, "step": 72400 }, { "epoch": 12.91, "learning_rate": 4.354671897289587e-05, "loss": 0.0031, "step": 72410 }, { "epoch": 12.91, "learning_rate": 4.354582738944366e-05, "loss": 0.0021, "step": 72420 }, { "epoch": 12.92, "learning_rate": 4.354493580599144e-05, "loss": 0.006, "step": 72430 }, { "epoch": 12.92, "learning_rate": 4.3544044222539234e-05, "loss": 0.0076, "step": 72440 }, { "epoch": 12.92, "learning_rate": 4.354315263908702e-05, "loss": 0.0054, "step": 72450 }, { "epoch": 12.92, "learning_rate": 4.354226105563481e-05, "loss": 0.0026, "step": 72460 }, { "epoch": 12.92, "learning_rate": 4.3541369472182594e-05, "loss": 0.0036, "step": 72470 }, { "epoch": 12.92, "learning_rate": 4.3540477888730385e-05, "loss": 0.0031, "step": 72480 }, { "epoch": 12.93, "learning_rate": 4.3539586305278176e-05, "loss": 0.0038, "step": 72490 }, { "epoch": 12.93, "learning_rate": 4.353869472182597e-05, "loss": 0.0019, "step": 72500 }, { "epoch": 12.93, "learning_rate": 4.353780313837376e-05, "loss": 0.0045, "step": 72510 }, { "epoch": 12.93, "learning_rate": 4.353691155492154e-05, "loss": 0.0028, "step": 72520 }, { "epoch": 12.93, "learning_rate": 4.3536019971469334e-05, "loss": 0.0046, "step": 72530 }, { "epoch": 12.94, "learning_rate": 4.353512838801712e-05, "loss": 0.0044, "step": 72540 }, { "epoch": 12.94, "learning_rate": 4.353423680456491e-05, "loss": 0.0036, "step": 72550 }, { "epoch": 12.94, "learning_rate": 4.35333452211127e-05, "loss": 0.0036, "step": 72560 }, { "epoch": 12.94, "learning_rate": 4.3532453637660486e-05, "loss": 0.0024, "step": 72570 }, { "epoch": 12.94, "learning_rate": 4.353156205420828e-05, "loss": 0.0043, "step": 72580 }, { "epoch": 12.94, "learning_rate": 4.353067047075606e-05, "loss": 0.004, "step": 72590 }, { "epoch": 12.95, "learning_rate": 4.352977888730385e-05, "loss": 0.0026, "step": 72600 }, { "epoch": 12.95, "learning_rate": 4.3528887303851644e-05, "loss": 0.0028, "step": 72610 }, { "epoch": 12.95, "learning_rate": 4.3527995720399435e-05, "loss": 0.0059, "step": 72620 }, { "epoch": 12.95, "learning_rate": 4.352710413694722e-05, "loss": 0.003, "step": 72630 }, { "epoch": 12.95, "learning_rate": 4.352621255349501e-05, "loss": 0.0021, "step": 72640 }, { "epoch": 12.95, "learning_rate": 4.35253209700428e-05, "loss": 0.0068, "step": 72650 }, { "epoch": 12.96, "learning_rate": 4.3524429386590586e-05, "loss": 0.0046, "step": 72660 }, { "epoch": 12.96, "learning_rate": 4.352353780313838e-05, "loss": 0.0024, "step": 72670 }, { "epoch": 12.96, "learning_rate": 4.352264621968616e-05, "loss": 0.005, "step": 72680 }, { "epoch": 12.96, "learning_rate": 4.352175463623395e-05, "loss": 0.0039, "step": 72690 }, { "epoch": 12.96, "learning_rate": 4.352086305278174e-05, "loss": 0.0042, "step": 72700 }, { "epoch": 12.97, "learning_rate": 4.351997146932953e-05, "loss": 0.0047, "step": 72710 }, { "epoch": 12.97, "learning_rate": 4.351907988587732e-05, "loss": 0.0027, "step": 72720 }, { "epoch": 12.97, "learning_rate": 4.351818830242511e-05, "loss": 0.0023, "step": 72730 }, { "epoch": 12.97, "learning_rate": 4.35172967189729e-05, "loss": 0.0029, "step": 72740 }, { "epoch": 12.97, "learning_rate": 4.351640513552069e-05, "loss": 0.0029, "step": 72750 }, { "epoch": 12.97, "learning_rate": 4.351551355206848e-05, "loss": 0.0061, "step": 72760 }, { "epoch": 12.98, "learning_rate": 4.351462196861626e-05, "loss": 0.0032, "step": 72770 }, { "epoch": 12.98, "learning_rate": 4.3513730385164054e-05, "loss": 0.0042, "step": 72780 }, { "epoch": 12.98, "learning_rate": 4.3512838801711845e-05, "loss": 0.0022, "step": 72790 }, { "epoch": 12.98, "learning_rate": 4.351194721825963e-05, "loss": 0.0025, "step": 72800 }, { "epoch": 12.98, "learning_rate": 4.351105563480742e-05, "loss": 0.0023, "step": 72810 }, { "epoch": 12.99, "learning_rate": 4.3510164051355205e-05, "loss": 0.0042, "step": 72820 }, { "epoch": 12.99, "learning_rate": 4.3509272467902996e-05, "loss": 0.0037, "step": 72830 }, { "epoch": 12.99, "learning_rate": 4.350838088445079e-05, "loss": 0.0046, "step": 72840 }, { "epoch": 12.99, "learning_rate": 4.350748930099858e-05, "loss": 0.0034, "step": 72850 }, { "epoch": 12.99, "learning_rate": 4.350659771754636e-05, "loss": 0.0035, "step": 72860 }, { "epoch": 12.99, "learning_rate": 4.3505706134094154e-05, "loss": 0.0039, "step": 72870 }, { "epoch": 13.0, "learning_rate": 4.3504814550641945e-05, "loss": 0.0047, "step": 72880 }, { "epoch": 13.0, "learning_rate": 4.350392296718973e-05, "loss": 0.0039, "step": 72890 }, { "epoch": 13.0, "learning_rate": 4.350303138373752e-05, "loss": 0.0038, "step": 72900 }, { "epoch": 13.0, "eval_loss": 0.02078627049922943, "eval_runtime": 196.3604, "eval_samples_per_second": 23.625, "eval_steps_per_second": 2.954, "step": 72904 }, { "epoch": 13.0, "learning_rate": 4.3502139800285305e-05, "loss": 0.0032, "step": 72910 }, { "epoch": 13.0, "learning_rate": 4.35012482168331e-05, "loss": 0.0034, "step": 72920 }, { "epoch": 13.0, "learning_rate": 4.350035663338088e-05, "loss": 0.0011, "step": 72930 }, { "epoch": 13.01, "learning_rate": 4.349946504992867e-05, "loss": 0.0058, "step": 72940 }, { "epoch": 13.01, "learning_rate": 4.3498573466476463e-05, "loss": 0.0044, "step": 72950 }, { "epoch": 13.01, "learning_rate": 4.3497681883024255e-05, "loss": 0.0044, "step": 72960 }, { "epoch": 13.01, "learning_rate": 4.3496790299572046e-05, "loss": 0.0037, "step": 72970 }, { "epoch": 13.01, "learning_rate": 4.349589871611983e-05, "loss": 0.0027, "step": 72980 }, { "epoch": 13.02, "learning_rate": 4.349500713266762e-05, "loss": 0.0046, "step": 72990 }, { "epoch": 13.02, "learning_rate": 4.3494115549215406e-05, "loss": 0.0045, "step": 73000 }, { "epoch": 13.02, "learning_rate": 4.34932239657632e-05, "loss": 0.0028, "step": 73010 }, { "epoch": 13.02, "learning_rate": 4.349233238231099e-05, "loss": 0.0048, "step": 73020 }, { "epoch": 13.02, "learning_rate": 4.349144079885877e-05, "loss": 0.0033, "step": 73030 }, { "epoch": 13.02, "learning_rate": 4.3490549215406564e-05, "loss": 0.0022, "step": 73040 }, { "epoch": 13.03, "learning_rate": 4.348965763195435e-05, "loss": 0.0033, "step": 73050 }, { "epoch": 13.03, "learning_rate": 4.3488766048502146e-05, "loss": 0.003, "step": 73060 }, { "epoch": 13.03, "learning_rate": 4.348787446504993e-05, "loss": 0.0026, "step": 73070 }, { "epoch": 13.03, "learning_rate": 4.348698288159772e-05, "loss": 0.0034, "step": 73080 }, { "epoch": 13.03, "learning_rate": 4.3486091298145507e-05, "loss": 0.0031, "step": 73090 }, { "epoch": 13.03, "learning_rate": 4.34851997146933e-05, "loss": 0.0028, "step": 73100 }, { "epoch": 13.04, "learning_rate": 4.348430813124109e-05, "loss": 0.0031, "step": 73110 }, { "epoch": 13.04, "learning_rate": 4.3483416547788873e-05, "loss": 0.0013, "step": 73120 }, { "epoch": 13.04, "learning_rate": 4.3482524964336665e-05, "loss": 0.0029, "step": 73130 }, { "epoch": 13.04, "learning_rate": 4.348163338088445e-05, "loss": 0.0049, "step": 73140 }, { "epoch": 13.04, "learning_rate": 4.348074179743224e-05, "loss": 0.0034, "step": 73150 }, { "epoch": 13.05, "learning_rate": 4.3479850213980025e-05, "loss": 0.0032, "step": 73160 }, { "epoch": 13.05, "learning_rate": 4.347895863052782e-05, "loss": 0.003, "step": 73170 }, { "epoch": 13.05, "learning_rate": 4.347806704707561e-05, "loss": 0.0018, "step": 73180 }, { "epoch": 13.05, "learning_rate": 4.34771754636234e-05, "loss": 0.0014, "step": 73190 }, { "epoch": 13.05, "learning_rate": 4.347628388017119e-05, "loss": 0.0038, "step": 73200 }, { "epoch": 13.05, "learning_rate": 4.3475392296718974e-05, "loss": 0.0044, "step": 73210 }, { "epoch": 13.06, "learning_rate": 4.3474500713266765e-05, "loss": 0.0021, "step": 73220 }, { "epoch": 13.06, "learning_rate": 4.347369828815977e-05, "loss": 0.0077, "step": 73230 }, { "epoch": 13.06, "learning_rate": 4.3472806704707564e-05, "loss": 0.0055, "step": 73240 }, { "epoch": 13.06, "learning_rate": 4.347191512125535e-05, "loss": 0.0037, "step": 73250 }, { "epoch": 13.06, "learning_rate": 4.347102353780314e-05, "loss": 0.0026, "step": 73260 }, { "epoch": 13.07, "learning_rate": 4.347013195435093e-05, "loss": 0.0031, "step": 73270 }, { "epoch": 13.07, "learning_rate": 4.3469240370898716e-05, "loss": 0.0026, "step": 73280 }, { "epoch": 13.07, "learning_rate": 4.346834878744651e-05, "loss": 0.0033, "step": 73290 }, { "epoch": 13.07, "learning_rate": 4.346745720399429e-05, "loss": 0.0028, "step": 73300 }, { "epoch": 13.07, "learning_rate": 4.346656562054208e-05, "loss": 0.0059, "step": 73310 }, { "epoch": 13.07, "learning_rate": 4.3465674037089874e-05, "loss": 0.0029, "step": 73320 }, { "epoch": 13.08, "learning_rate": 4.3464782453637665e-05, "loss": 0.0013, "step": 73330 }, { "epoch": 13.08, "learning_rate": 4.3463890870185456e-05, "loss": 0.0032, "step": 73340 }, { "epoch": 13.08, "learning_rate": 4.346299928673324e-05, "loss": 0.0036, "step": 73350 }, { "epoch": 13.08, "learning_rate": 4.346210770328103e-05, "loss": 0.0025, "step": 73360 }, { "epoch": 13.08, "learning_rate": 4.3461216119828816e-05, "loss": 0.0029, "step": 73370 }, { "epoch": 13.08, "learning_rate": 4.346032453637661e-05, "loss": 0.003, "step": 73380 }, { "epoch": 13.09, "learning_rate": 4.345943295292439e-05, "loss": 0.0025, "step": 73390 }, { "epoch": 13.09, "learning_rate": 4.345854136947218e-05, "loss": 0.0036, "step": 73400 }, { "epoch": 13.09, "learning_rate": 4.3457649786019974e-05, "loss": 0.0027, "step": 73410 }, { "epoch": 13.09, "learning_rate": 4.345675820256776e-05, "loss": 0.0027, "step": 73420 }, { "epoch": 13.09, "learning_rate": 4.345586661911556e-05, "loss": 0.0044, "step": 73430 }, { "epoch": 13.1, "learning_rate": 4.345497503566334e-05, "loss": 0.0019, "step": 73440 }, { "epoch": 13.1, "learning_rate": 4.345408345221113e-05, "loss": 0.0045, "step": 73450 }, { "epoch": 13.1, "learning_rate": 4.345319186875892e-05, "loss": 0.002, "step": 73460 }, { "epoch": 13.1, "learning_rate": 4.345230028530671e-05, "loss": 0.0031, "step": 73470 }, { "epoch": 13.1, "learning_rate": 4.345140870185449e-05, "loss": 0.0038, "step": 73480 }, { "epoch": 13.1, "learning_rate": 4.3450517118402284e-05, "loss": 0.0039, "step": 73490 }, { "epoch": 13.11, "learning_rate": 4.3449625534950075e-05, "loss": 0.0034, "step": 73500 }, { "epoch": 13.11, "learning_rate": 4.344873395149786e-05, "loss": 0.0022, "step": 73510 }, { "epoch": 13.11, "learning_rate": 4.344784236804565e-05, "loss": 0.0026, "step": 73520 }, { "epoch": 13.11, "learning_rate": 4.3446950784593435e-05, "loss": 0.002, "step": 73530 }, { "epoch": 13.11, "learning_rate": 4.344605920114123e-05, "loss": 0.004, "step": 73540 }, { "epoch": 13.12, "learning_rate": 4.344516761768902e-05, "loss": 0.0024, "step": 73550 }, { "epoch": 13.12, "learning_rate": 4.344427603423681e-05, "loss": 0.0064, "step": 73560 }, { "epoch": 13.12, "learning_rate": 4.34433844507846e-05, "loss": 0.0039, "step": 73570 }, { "epoch": 13.12, "learning_rate": 4.3442492867332384e-05, "loss": 0.0021, "step": 73580 }, { "epoch": 13.12, "learning_rate": 4.3441601283880175e-05, "loss": 0.0031, "step": 73590 }, { "epoch": 13.12, "learning_rate": 4.344070970042796e-05, "loss": 0.0035, "step": 73600 }, { "epoch": 13.13, "learning_rate": 4.343981811697575e-05, "loss": 0.0023, "step": 73610 }, { "epoch": 13.13, "learning_rate": 4.3438926533523536e-05, "loss": 0.0048, "step": 73620 }, { "epoch": 13.13, "learning_rate": 4.343803495007133e-05, "loss": 0.0035, "step": 73630 }, { "epoch": 13.13, "learning_rate": 4.343714336661912e-05, "loss": 0.0025, "step": 73640 }, { "epoch": 13.13, "learning_rate": 4.343625178316691e-05, "loss": 0.002, "step": 73650 }, { "epoch": 13.13, "learning_rate": 4.34353601997147e-05, "loss": 0.0027, "step": 73660 }, { "epoch": 13.14, "learning_rate": 4.3434468616262485e-05, "loss": 0.0022, "step": 73670 }, { "epoch": 13.14, "learning_rate": 4.3433577032810276e-05, "loss": 0.0027, "step": 73680 }, { "epoch": 13.14, "learning_rate": 4.343268544935806e-05, "loss": 0.0014, "step": 73690 }, { "epoch": 13.14, "learning_rate": 4.343179386590585e-05, "loss": 0.0046, "step": 73700 }, { "epoch": 13.14, "learning_rate": 4.3430902282453636e-05, "loss": 0.0037, "step": 73710 }, { "epoch": 13.15, "learning_rate": 4.343001069900143e-05, "loss": 0.0029, "step": 73720 }, { "epoch": 13.15, "learning_rate": 4.342911911554922e-05, "loss": 0.0038, "step": 73730 }, { "epoch": 13.15, "learning_rate": 4.3428227532097e-05, "loss": 0.002, "step": 73740 }, { "epoch": 13.15, "learning_rate": 4.3427335948644794e-05, "loss": 0.0018, "step": 73750 }, { "epoch": 13.15, "learning_rate": 4.3426444365192585e-05, "loss": 0.0019, "step": 73760 }, { "epoch": 13.15, "learning_rate": 4.3425552781740377e-05, "loss": 0.0039, "step": 73770 }, { "epoch": 13.16, "learning_rate": 4.342466119828816e-05, "loss": 0.0039, "step": 73780 }, { "epoch": 13.16, "learning_rate": 4.342376961483595e-05, "loss": 0.003, "step": 73790 }, { "epoch": 13.16, "learning_rate": 4.3422878031383743e-05, "loss": 0.0036, "step": 73800 }, { "epoch": 13.16, "learning_rate": 4.342198644793153e-05, "loss": 0.0025, "step": 73810 }, { "epoch": 13.16, "learning_rate": 4.342109486447932e-05, "loss": 0.0035, "step": 73820 }, { "epoch": 13.17, "learning_rate": 4.3420203281027103e-05, "loss": 0.0027, "step": 73830 }, { "epoch": 13.17, "learning_rate": 4.3419311697574895e-05, "loss": 0.0035, "step": 73840 }, { "epoch": 13.17, "learning_rate": 4.341842011412268e-05, "loss": 0.0043, "step": 73850 }, { "epoch": 13.17, "learning_rate": 4.341752853067047e-05, "loss": 0.0019, "step": 73860 }, { "epoch": 13.17, "learning_rate": 4.341663694721826e-05, "loss": 0.0046, "step": 73870 }, { "epoch": 13.17, "learning_rate": 4.341574536376605e-05, "loss": 0.0021, "step": 73880 }, { "epoch": 13.18, "learning_rate": 4.3414853780313844e-05, "loss": 0.0037, "step": 73890 }, { "epoch": 13.18, "learning_rate": 4.341396219686163e-05, "loss": 0.0045, "step": 73900 }, { "epoch": 13.18, "learning_rate": 4.341307061340942e-05, "loss": 0.0035, "step": 73910 }, { "epoch": 13.18, "learning_rate": 4.3412179029957204e-05, "loss": 0.0049, "step": 73920 }, { "epoch": 13.18, "learning_rate": 4.3411287446504995e-05, "loss": 0.0037, "step": 73930 }, { "epoch": 13.18, "learning_rate": 4.341039586305278e-05, "loss": 0.0024, "step": 73940 }, { "epoch": 13.19, "learning_rate": 4.340950427960057e-05, "loss": 0.003, "step": 73950 }, { "epoch": 13.19, "learning_rate": 4.340861269614836e-05, "loss": 0.0027, "step": 73960 }, { "epoch": 13.19, "learning_rate": 4.3407721112696147e-05, "loss": 0.0032, "step": 73970 }, { "epoch": 13.19, "learning_rate": 4.3406829529243945e-05, "loss": 0.0018, "step": 73980 }, { "epoch": 13.19, "learning_rate": 4.340593794579173e-05, "loss": 0.0027, "step": 73990 }, { "epoch": 13.2, "learning_rate": 4.340504636233952e-05, "loss": 0.0045, "step": 74000 }, { "epoch": 13.2, "learning_rate": 4.3404154778887305e-05, "loss": 0.0026, "step": 74010 }, { "epoch": 13.2, "learning_rate": 4.3403263195435096e-05, "loss": 0.0031, "step": 74020 }, { "epoch": 13.2, "learning_rate": 4.340237161198289e-05, "loss": 0.0014, "step": 74030 }, { "epoch": 13.2, "learning_rate": 4.340148002853067e-05, "loss": 0.0049, "step": 74040 }, { "epoch": 13.2, "learning_rate": 4.340058844507846e-05, "loss": 0.0035, "step": 74050 }, { "epoch": 13.21, "learning_rate": 4.339969686162625e-05, "loss": 0.0036, "step": 74060 }, { "epoch": 13.21, "learning_rate": 4.339880527817404e-05, "loss": 0.0068, "step": 74070 }, { "epoch": 13.21, "learning_rate": 4.339791369472182e-05, "loss": 0.0033, "step": 74080 }, { "epoch": 13.21, "learning_rate": 4.339702211126962e-05, "loss": 0.0037, "step": 74090 }, { "epoch": 13.21, "learning_rate": 4.3396130527817405e-05, "loss": 0.0027, "step": 74100 }, { "epoch": 13.22, "learning_rate": 4.3395238944365196e-05, "loss": 0.0027, "step": 74110 }, { "epoch": 13.22, "learning_rate": 4.339434736091299e-05, "loss": 0.0039, "step": 74120 }, { "epoch": 13.22, "learning_rate": 4.339345577746077e-05, "loss": 0.0027, "step": 74130 }, { "epoch": 13.22, "learning_rate": 4.339256419400856e-05, "loss": 0.0032, "step": 74140 }, { "epoch": 13.22, "learning_rate": 4.339167261055635e-05, "loss": 0.0025, "step": 74150 }, { "epoch": 13.22, "learning_rate": 4.339078102710414e-05, "loss": 0.0022, "step": 74160 }, { "epoch": 13.23, "learning_rate": 4.338988944365192e-05, "loss": 0.0026, "step": 74170 }, { "epoch": 13.23, "learning_rate": 4.3388997860199715e-05, "loss": 0.0037, "step": 74180 }, { "epoch": 13.23, "learning_rate": 4.3388106276747506e-05, "loss": 0.0031, "step": 74190 }, { "epoch": 13.23, "learning_rate": 4.33872146932953e-05, "loss": 0.0035, "step": 74200 }, { "epoch": 13.23, "learning_rate": 4.338632310984309e-05, "loss": 0.0037, "step": 74210 }, { "epoch": 13.23, "learning_rate": 4.338543152639087e-05, "loss": 0.0013, "step": 74220 }, { "epoch": 13.24, "learning_rate": 4.3384539942938664e-05, "loss": 0.0023, "step": 74230 }, { "epoch": 13.24, "learning_rate": 4.338364835948645e-05, "loss": 0.0037, "step": 74240 }, { "epoch": 13.24, "learning_rate": 4.338275677603424e-05, "loss": 0.0023, "step": 74250 }, { "epoch": 13.24, "learning_rate": 4.338186519258203e-05, "loss": 0.0014, "step": 74260 }, { "epoch": 13.24, "learning_rate": 4.3380973609129815e-05, "loss": 0.0029, "step": 74270 }, { "epoch": 13.25, "learning_rate": 4.3380082025677606e-05, "loss": 0.0027, "step": 74280 }, { "epoch": 13.25, "learning_rate": 4.337919044222539e-05, "loss": 0.0046, "step": 74290 }, { "epoch": 13.25, "learning_rate": 4.337829885877318e-05, "loss": 0.001, "step": 74300 }, { "epoch": 13.25, "learning_rate": 4.337740727532097e-05, "loss": 0.0025, "step": 74310 }, { "epoch": 13.25, "learning_rate": 4.3376515691868764e-05, "loss": 0.0026, "step": 74320 }, { "epoch": 13.25, "learning_rate": 4.337562410841655e-05, "loss": 0.0012, "step": 74330 }, { "epoch": 13.26, "learning_rate": 4.337473252496434e-05, "loss": 0.0012, "step": 74340 }, { "epoch": 13.26, "learning_rate": 4.337384094151213e-05, "loss": 0.0039, "step": 74350 }, { "epoch": 13.26, "learning_rate": 4.3372949358059916e-05, "loss": 0.0044, "step": 74360 }, { "epoch": 13.26, "learning_rate": 4.337205777460771e-05, "loss": 0.0035, "step": 74370 }, { "epoch": 13.26, "learning_rate": 4.337116619115549e-05, "loss": 0.0036, "step": 74380 }, { "epoch": 13.26, "learning_rate": 4.337027460770328e-05, "loss": 0.0058, "step": 74390 }, { "epoch": 13.27, "learning_rate": 4.336938302425107e-05, "loss": 0.0035, "step": 74400 }, { "epoch": 13.27, "learning_rate": 4.336849144079886e-05, "loss": 0.0038, "step": 74410 }, { "epoch": 13.27, "learning_rate": 4.336759985734665e-05, "loss": 0.003, "step": 74420 }, { "epoch": 13.27, "learning_rate": 4.336670827389444e-05, "loss": 0.0028, "step": 74430 }, { "epoch": 13.27, "learning_rate": 4.336581669044223e-05, "loss": 0.004, "step": 74440 }, { "epoch": 13.28, "learning_rate": 4.3364925106990016e-05, "loss": 0.003, "step": 74450 }, { "epoch": 13.28, "learning_rate": 4.336403352353781e-05, "loss": 0.0048, "step": 74460 }, { "epoch": 13.28, "learning_rate": 4.336314194008559e-05, "loss": 0.005, "step": 74470 }, { "epoch": 13.28, "learning_rate": 4.336225035663338e-05, "loss": 0.0021, "step": 74480 }, { "epoch": 13.28, "learning_rate": 4.3361358773181174e-05, "loss": 0.006, "step": 74490 }, { "epoch": 13.28, "learning_rate": 4.336046718972896e-05, "loss": 0.0031, "step": 74500 }, { "epoch": 13.29, "learning_rate": 4.335957560627675e-05, "loss": 0.0024, "step": 74510 }, { "epoch": 13.29, "learning_rate": 4.3358684022824534e-05, "loss": 0.0047, "step": 74520 }, { "epoch": 13.29, "learning_rate": 4.335779243937233e-05, "loss": 0.0032, "step": 74530 }, { "epoch": 13.29, "learning_rate": 4.335690085592012e-05, "loss": 0.0026, "step": 74540 }, { "epoch": 13.29, "learning_rate": 4.335600927246791e-05, "loss": 0.002, "step": 74550 }, { "epoch": 13.3, "learning_rate": 4.335511768901569e-05, "loss": 0.0032, "step": 74560 }, { "epoch": 13.3, "learning_rate": 4.3354226105563484e-05, "loss": 0.0028, "step": 74570 }, { "epoch": 13.3, "learning_rate": 4.3353334522111275e-05, "loss": 0.0038, "step": 74580 }, { "epoch": 13.3, "learning_rate": 4.335244293865906e-05, "loss": 0.0034, "step": 74590 }, { "epoch": 13.3, "learning_rate": 4.335155135520685e-05, "loss": 0.0021, "step": 74600 }, { "epoch": 13.3, "learning_rate": 4.3350659771754635e-05, "loss": 0.0043, "step": 74610 }, { "epoch": 13.31, "learning_rate": 4.3349768188302426e-05, "loss": 0.0053, "step": 74620 }, { "epoch": 13.31, "learning_rate": 4.334887660485021e-05, "loss": 0.005, "step": 74630 }, { "epoch": 13.31, "learning_rate": 4.334798502139801e-05, "loss": 0.0032, "step": 74640 }, { "epoch": 13.31, "learning_rate": 4.334709343794579e-05, "loss": 0.003, "step": 74650 }, { "epoch": 13.31, "learning_rate": 4.3346201854493584e-05, "loss": 0.0057, "step": 74660 }, { "epoch": 13.31, "learning_rate": 4.3345310271041375e-05, "loss": 0.0017, "step": 74670 }, { "epoch": 13.32, "learning_rate": 4.334441868758916e-05, "loss": 0.0027, "step": 74680 }, { "epoch": 13.32, "learning_rate": 4.334352710413695e-05, "loss": 0.0028, "step": 74690 }, { "epoch": 13.32, "learning_rate": 4.3342635520684735e-05, "loss": 0.0018, "step": 74700 }, { "epoch": 13.32, "learning_rate": 4.334174393723253e-05, "loss": 0.0056, "step": 74710 }, { "epoch": 13.32, "learning_rate": 4.334085235378032e-05, "loss": 0.0012, "step": 74720 }, { "epoch": 13.33, "learning_rate": 4.33399607703281e-05, "loss": 0.0024, "step": 74730 }, { "epoch": 13.33, "learning_rate": 4.3339069186875894e-05, "loss": 0.004, "step": 74740 }, { "epoch": 13.33, "learning_rate": 4.3338177603423685e-05, "loss": 0.0022, "step": 74750 }, { "epoch": 13.33, "learning_rate": 4.3337286019971476e-05, "loss": 0.0035, "step": 74760 }, { "epoch": 13.33, "learning_rate": 4.333639443651926e-05, "loss": 0.0041, "step": 74770 }, { "epoch": 13.33, "learning_rate": 4.333550285306705e-05, "loss": 0.0029, "step": 74780 }, { "epoch": 13.34, "learning_rate": 4.3334611269614836e-05, "loss": 0.0037, "step": 74790 }, { "epoch": 13.34, "learning_rate": 4.333371968616263e-05, "loss": 0.0034, "step": 74800 }, { "epoch": 13.34, "learning_rate": 4.333282810271042e-05, "loss": 0.0031, "step": 74810 }, { "epoch": 13.34, "learning_rate": 4.33319365192582e-05, "loss": 0.0034, "step": 74820 }, { "epoch": 13.34, "learning_rate": 4.3331044935805994e-05, "loss": 0.0023, "step": 74830 }, { "epoch": 13.35, "learning_rate": 4.333015335235378e-05, "loss": 0.0052, "step": 74840 }, { "epoch": 13.35, "learning_rate": 4.332926176890157e-05, "loss": 0.0028, "step": 74850 }, { "epoch": 13.35, "learning_rate": 4.332837018544936e-05, "loss": 0.0022, "step": 74860 }, { "epoch": 13.35, "learning_rate": 4.332747860199715e-05, "loss": 0.0023, "step": 74870 }, { "epoch": 13.35, "learning_rate": 4.3326587018544937e-05, "loss": 0.0024, "step": 74880 }, { "epoch": 13.35, "learning_rate": 4.332569543509273e-05, "loss": 0.0022, "step": 74890 }, { "epoch": 13.36, "learning_rate": 4.332480385164052e-05, "loss": 0.0034, "step": 74900 }, { "epoch": 13.36, "learning_rate": 4.3323912268188303e-05, "loss": 0.0027, "step": 74910 }, { "epoch": 13.36, "learning_rate": 4.3323020684736095e-05, "loss": 0.005, "step": 74920 }, { "epoch": 13.36, "learning_rate": 4.332212910128388e-05, "loss": 0.0037, "step": 74930 }, { "epoch": 13.36, "learning_rate": 4.332123751783167e-05, "loss": 0.0034, "step": 74940 }, { "epoch": 13.36, "learning_rate": 4.332034593437946e-05, "loss": 0.0033, "step": 74950 }, { "epoch": 13.37, "learning_rate": 4.3319454350927246e-05, "loss": 0.0042, "step": 74960 }, { "epoch": 13.37, "learning_rate": 4.331856276747504e-05, "loss": 0.0038, "step": 74970 }, { "epoch": 13.37, "learning_rate": 4.331767118402283e-05, "loss": 0.003, "step": 74980 }, { "epoch": 13.37, "learning_rate": 4.331677960057062e-05, "loss": 0.0023, "step": 74990 }, { "epoch": 13.37, "learning_rate": 4.3315888017118404e-05, "loss": 0.0025, "step": 75000 }, { "epoch": 13.38, "learning_rate": 4.3314996433666195e-05, "loss": 0.0021, "step": 75010 }, { "epoch": 13.38, "learning_rate": 4.331410485021398e-05, "loss": 0.0049, "step": 75020 }, { "epoch": 13.38, "learning_rate": 4.331321326676177e-05, "loss": 0.0041, "step": 75030 }, { "epoch": 13.38, "learning_rate": 4.331232168330956e-05, "loss": 0.004, "step": 75040 }, { "epoch": 13.38, "learning_rate": 4.3311430099857347e-05, "loss": 0.0011, "step": 75050 }, { "epoch": 13.38, "learning_rate": 4.331053851640514e-05, "loss": 0.0031, "step": 75060 }, { "epoch": 13.39, "learning_rate": 4.330964693295292e-05, "loss": 0.0037, "step": 75070 }, { "epoch": 13.39, "learning_rate": 4.330875534950072e-05, "loss": 0.0043, "step": 75080 }, { "epoch": 13.39, "learning_rate": 4.3307863766048505e-05, "loss": 0.0042, "step": 75090 }, { "epoch": 13.39, "learning_rate": 4.3306972182596296e-05, "loss": 0.0027, "step": 75100 }, { "epoch": 13.39, "learning_rate": 4.330608059914408e-05, "loss": 0.0021, "step": 75110 }, { "epoch": 13.4, "learning_rate": 4.330518901569187e-05, "loss": 0.0033, "step": 75120 }, { "epoch": 13.4, "learning_rate": 4.330429743223966e-05, "loss": 0.0018, "step": 75130 }, { "epoch": 13.4, "learning_rate": 4.330340584878745e-05, "loss": 0.0034, "step": 75140 }, { "epoch": 13.4, "learning_rate": 4.330251426533524e-05, "loss": 0.0033, "step": 75150 }, { "epoch": 13.4, "learning_rate": 4.330162268188302e-05, "loss": 0.0037, "step": 75160 }, { "epoch": 13.4, "learning_rate": 4.3300731098430814e-05, "loss": 0.0045, "step": 75170 }, { "epoch": 13.41, "learning_rate": 4.3299839514978605e-05, "loss": 0.0017, "step": 75180 }, { "epoch": 13.41, "learning_rate": 4.329894793152639e-05, "loss": 0.002, "step": 75190 }, { "epoch": 13.41, "learning_rate": 4.329805634807418e-05, "loss": 0.0028, "step": 75200 }, { "epoch": 13.41, "learning_rate": 4.329716476462197e-05, "loss": 0.0052, "step": 75210 }, { "epoch": 13.41, "learning_rate": 4.329627318116976e-05, "loss": 0.003, "step": 75220 }, { "epoch": 13.41, "learning_rate": 4.329538159771755e-05, "loss": 0.002, "step": 75230 }, { "epoch": 13.42, "learning_rate": 4.329449001426534e-05, "loss": 0.0037, "step": 75240 }, { "epoch": 13.42, "learning_rate": 4.329359843081312e-05, "loss": 0.0025, "step": 75250 }, { "epoch": 13.42, "learning_rate": 4.3292706847360914e-05, "loss": 0.0032, "step": 75260 }, { "epoch": 13.42, "learning_rate": 4.3291815263908706e-05, "loss": 0.003, "step": 75270 }, { "epoch": 13.42, "learning_rate": 4.329092368045649e-05, "loss": 0.0026, "step": 75280 }, { "epoch": 13.43, "learning_rate": 4.329003209700428e-05, "loss": 0.0023, "step": 75290 }, { "epoch": 13.43, "learning_rate": 4.3289140513552066e-05, "loss": 0.0071, "step": 75300 }, { "epoch": 13.43, "learning_rate": 4.3288248930099864e-05, "loss": 0.0021, "step": 75310 }, { "epoch": 13.43, "learning_rate": 4.328735734664765e-05, "loss": 0.0022, "step": 75320 }, { "epoch": 13.43, "learning_rate": 4.328646576319544e-05, "loss": 0.0041, "step": 75330 }, { "epoch": 13.43, "learning_rate": 4.3285574179743224e-05, "loss": 0.0064, "step": 75340 }, { "epoch": 13.44, "learning_rate": 4.3284682596291015e-05, "loss": 0.0033, "step": 75350 }, { "epoch": 13.44, "learning_rate": 4.3283791012838806e-05, "loss": 0.0026, "step": 75360 }, { "epoch": 13.44, "learning_rate": 4.328289942938659e-05, "loss": 0.0031, "step": 75370 }, { "epoch": 13.44, "learning_rate": 4.328200784593438e-05, "loss": 0.003, "step": 75380 }, { "epoch": 13.44, "learning_rate": 4.3281116262482166e-05, "loss": 0.0038, "step": 75390 }, { "epoch": 13.45, "learning_rate": 4.328022467902996e-05, "loss": 0.0016, "step": 75400 }, { "epoch": 13.45, "learning_rate": 4.327933309557775e-05, "loss": 0.0018, "step": 75410 }, { "epoch": 13.45, "learning_rate": 4.327844151212554e-05, "loss": 0.0026, "step": 75420 }, { "epoch": 13.45, "learning_rate": 4.3277549928673324e-05, "loss": 0.0038, "step": 75430 }, { "epoch": 13.45, "learning_rate": 4.3276658345221116e-05, "loss": 0.0044, "step": 75440 }, { "epoch": 13.45, "learning_rate": 4.327576676176891e-05, "loss": 0.0041, "step": 75450 }, { "epoch": 13.46, "learning_rate": 4.327487517831669e-05, "loss": 0.0015, "step": 75460 }, { "epoch": 13.46, "learning_rate": 4.327398359486448e-05, "loss": 0.0029, "step": 75470 }, { "epoch": 13.46, "learning_rate": 4.327309201141227e-05, "loss": 0.0036, "step": 75480 }, { "epoch": 13.46, "learning_rate": 4.327220042796006e-05, "loss": 0.0034, "step": 75490 }, { "epoch": 13.46, "learning_rate": 4.327130884450785e-05, "loss": 0.0045, "step": 75500 }, { "epoch": 13.46, "learning_rate": 4.3270417261055634e-05, "loss": 0.0047, "step": 75510 }, { "epoch": 13.47, "learning_rate": 4.3269525677603425e-05, "loss": 0.0024, "step": 75520 }, { "epoch": 13.47, "learning_rate": 4.3268634094151216e-05, "loss": 0.002, "step": 75530 }, { "epoch": 13.47, "learning_rate": 4.326774251069901e-05, "loss": 0.0036, "step": 75540 }, { "epoch": 13.47, "learning_rate": 4.326685092724679e-05, "loss": 0.0042, "step": 75550 }, { "epoch": 13.47, "learning_rate": 4.326595934379458e-05, "loss": 0.0034, "step": 75560 }, { "epoch": 13.48, "learning_rate": 4.326506776034237e-05, "loss": 0.005, "step": 75570 }, { "epoch": 13.48, "learning_rate": 4.326417617689016e-05, "loss": 0.0048, "step": 75580 }, { "epoch": 13.48, "learning_rate": 4.326328459343795e-05, "loss": 0.0033, "step": 75590 }, { "epoch": 13.48, "learning_rate": 4.3262393009985734e-05, "loss": 0.0028, "step": 75600 }, { "epoch": 13.48, "learning_rate": 4.3261501426533526e-05, "loss": 0.0039, "step": 75610 }, { "epoch": 13.48, "learning_rate": 4.326060984308131e-05, "loss": 0.0039, "step": 75620 }, { "epoch": 13.49, "learning_rate": 4.32597182596291e-05, "loss": 0.0042, "step": 75630 }, { "epoch": 13.49, "learning_rate": 4.325882667617689e-05, "loss": 0.0044, "step": 75640 }, { "epoch": 13.49, "learning_rate": 4.3257935092724684e-05, "loss": 0.0017, "step": 75650 }, { "epoch": 13.49, "learning_rate": 4.325704350927247e-05, "loss": 0.0026, "step": 75660 }, { "epoch": 13.49, "learning_rate": 4.325615192582026e-05, "loss": 0.0046, "step": 75670 }, { "epoch": 13.5, "learning_rate": 4.325526034236805e-05, "loss": 0.0021, "step": 75680 }, { "epoch": 13.5, "learning_rate": 4.3254368758915835e-05, "loss": 0.0029, "step": 75690 }, { "epoch": 13.5, "learning_rate": 4.3253477175463626e-05, "loss": 0.003, "step": 75700 }, { "epoch": 13.5, "learning_rate": 4.325258559201141e-05, "loss": 0.0019, "step": 75710 }, { "epoch": 13.5, "learning_rate": 4.32516940085592e-05, "loss": 0.0028, "step": 75720 }, { "epoch": 13.5, "learning_rate": 4.325080242510699e-05, "loss": 0.0039, "step": 75730 }, { "epoch": 13.51, "learning_rate": 4.324991084165478e-05, "loss": 0.0039, "step": 75740 }, { "epoch": 13.51, "learning_rate": 4.324910841654779e-05, "loss": 0.0041, "step": 75750 }, { "epoch": 13.51, "learning_rate": 4.3248216833095577e-05, "loss": 0.003, "step": 75760 }, { "epoch": 13.51, "learning_rate": 4.324732524964337e-05, "loss": 0.0037, "step": 75770 }, { "epoch": 13.51, "learning_rate": 4.324643366619115e-05, "loss": 0.0024, "step": 75780 }, { "epoch": 13.51, "learning_rate": 4.324554208273895e-05, "loss": 0.0014, "step": 75790 }, { "epoch": 13.52, "learning_rate": 4.3244650499286735e-05, "loss": 0.0032, "step": 75800 }, { "epoch": 13.52, "learning_rate": 4.3243758915834526e-05, "loss": 0.0039, "step": 75810 }, { "epoch": 13.52, "learning_rate": 4.324286733238232e-05, "loss": 0.0058, "step": 75820 }, { "epoch": 13.52, "learning_rate": 4.32419757489301e-05, "loss": 0.0036, "step": 75830 }, { "epoch": 13.52, "learning_rate": 4.324108416547789e-05, "loss": 0.0044, "step": 75840 }, { "epoch": 13.53, "learning_rate": 4.324019258202568e-05, "loss": 0.0022, "step": 75850 }, { "epoch": 13.53, "learning_rate": 4.323930099857347e-05, "loss": 0.0029, "step": 75860 }, { "epoch": 13.53, "learning_rate": 4.323840941512125e-05, "loss": 0.0035, "step": 75870 }, { "epoch": 13.53, "learning_rate": 4.3237517831669044e-05, "loss": 0.0034, "step": 75880 }, { "epoch": 13.53, "learning_rate": 4.3236626248216835e-05, "loss": 0.004, "step": 75890 }, { "epoch": 13.53, "learning_rate": 4.3235734664764626e-05, "loss": 0.003, "step": 75900 }, { "epoch": 13.54, "learning_rate": 4.323484308131242e-05, "loss": 0.0017, "step": 75910 }, { "epoch": 13.54, "learning_rate": 4.32339514978602e-05, "loss": 0.0019, "step": 75920 }, { "epoch": 13.54, "learning_rate": 4.323305991440799e-05, "loss": 0.0037, "step": 75930 }, { "epoch": 13.54, "learning_rate": 4.323216833095578e-05, "loss": 0.0033, "step": 75940 }, { "epoch": 13.54, "learning_rate": 4.323127674750357e-05, "loss": 0.003, "step": 75950 }, { "epoch": 13.54, "learning_rate": 4.323038516405136e-05, "loss": 0.0064, "step": 75960 }, { "epoch": 13.55, "learning_rate": 4.3229493580599145e-05, "loss": 0.0037, "step": 75970 }, { "epoch": 13.55, "learning_rate": 4.3228601997146936e-05, "loss": 0.002, "step": 75980 }, { "epoch": 13.55, "learning_rate": 4.322771041369472e-05, "loss": 0.0025, "step": 75990 }, { "epoch": 13.55, "learning_rate": 4.322681883024251e-05, "loss": 0.0034, "step": 76000 }, { "epoch": 13.55, "learning_rate": 4.32259272467903e-05, "loss": 0.0049, "step": 76010 }, { "epoch": 13.56, "learning_rate": 4.3225035663338094e-05, "loss": 0.0035, "step": 76020 }, { "epoch": 13.56, "learning_rate": 4.322414407988588e-05, "loss": 0.0034, "step": 76030 }, { "epoch": 13.56, "learning_rate": 4.322325249643367e-05, "loss": 0.004, "step": 76040 }, { "epoch": 13.56, "learning_rate": 4.322236091298146e-05, "loss": 0.003, "step": 76050 }, { "epoch": 13.56, "learning_rate": 4.3221469329529245e-05, "loss": 0.0017, "step": 76060 }, { "epoch": 13.56, "learning_rate": 4.3220577746077036e-05, "loss": 0.003, "step": 76070 }, { "epoch": 13.57, "learning_rate": 4.321968616262482e-05, "loss": 0.0023, "step": 76080 }, { "epoch": 13.57, "learning_rate": 4.321879457917261e-05, "loss": 0.002, "step": 76090 }, { "epoch": 13.57, "learning_rate": 4.3217902995720396e-05, "loss": 0.0041, "step": 76100 }, { "epoch": 13.57, "learning_rate": 4.321701141226819e-05, "loss": 0.003, "step": 76110 }, { "epoch": 13.57, "learning_rate": 4.321611982881598e-05, "loss": 0.0036, "step": 76120 }, { "epoch": 13.58, "learning_rate": 4.321522824536377e-05, "loss": 0.0033, "step": 76130 }, { "epoch": 13.58, "learning_rate": 4.321433666191156e-05, "loss": 0.0026, "step": 76140 }, { "epoch": 13.58, "learning_rate": 4.3213445078459346e-05, "loss": 0.0024, "step": 76150 }, { "epoch": 13.58, "learning_rate": 4.321255349500714e-05, "loss": 0.0044, "step": 76160 }, { "epoch": 13.58, "learning_rate": 4.321166191155492e-05, "loss": 0.0024, "step": 76170 }, { "epoch": 13.58, "learning_rate": 4.321077032810271e-05, "loss": 0.0027, "step": 76180 }, { "epoch": 13.59, "learning_rate": 4.3209878744650504e-05, "loss": 0.0031, "step": 76190 }, { "epoch": 13.59, "learning_rate": 4.320898716119829e-05, "loss": 0.0037, "step": 76200 }, { "epoch": 13.59, "learning_rate": 4.320809557774608e-05, "loss": 0.0029, "step": 76210 }, { "epoch": 13.59, "learning_rate": 4.3207203994293864e-05, "loss": 0.005, "step": 76220 }, { "epoch": 13.59, "learning_rate": 4.320631241084166e-05, "loss": 0.0029, "step": 76230 }, { "epoch": 13.59, "learning_rate": 4.3205420827389446e-05, "loss": 0.0051, "step": 76240 }, { "epoch": 13.6, "learning_rate": 4.320452924393724e-05, "loss": 0.0041, "step": 76250 }, { "epoch": 13.6, "learning_rate": 4.320363766048502e-05, "loss": 0.0021, "step": 76260 }, { "epoch": 13.6, "learning_rate": 4.320274607703281e-05, "loss": 0.0036, "step": 76270 }, { "epoch": 13.6, "learning_rate": 4.3201854493580604e-05, "loss": 0.004, "step": 76280 }, { "epoch": 13.6, "learning_rate": 4.320096291012839e-05, "loss": 0.0035, "step": 76290 }, { "epoch": 13.61, "learning_rate": 4.320007132667618e-05, "loss": 0.0024, "step": 76300 }, { "epoch": 13.61, "learning_rate": 4.3199179743223964e-05, "loss": 0.0028, "step": 76310 }, { "epoch": 13.61, "learning_rate": 4.3198288159771756e-05, "loss": 0.0032, "step": 76320 }, { "epoch": 13.61, "learning_rate": 4.319739657631954e-05, "loss": 0.0026, "step": 76330 }, { "epoch": 13.61, "learning_rate": 4.319650499286734e-05, "loss": 0.0025, "step": 76340 }, { "epoch": 13.61, "learning_rate": 4.319561340941512e-05, "loss": 0.0024, "step": 76350 }, { "epoch": 13.62, "learning_rate": 4.3194721825962914e-05, "loss": 0.0021, "step": 76360 }, { "epoch": 13.62, "learning_rate": 4.3193830242510705e-05, "loss": 0.0034, "step": 76370 }, { "epoch": 13.62, "learning_rate": 4.319293865905849e-05, "loss": 0.0027, "step": 76380 }, { "epoch": 13.62, "learning_rate": 4.319204707560628e-05, "loss": 0.0061, "step": 76390 }, { "epoch": 13.62, "learning_rate": 4.3191155492154065e-05, "loss": 0.005, "step": 76400 }, { "epoch": 13.63, "learning_rate": 4.3190263908701856e-05, "loss": 0.0031, "step": 76410 }, { "epoch": 13.63, "learning_rate": 4.318937232524965e-05, "loss": 0.0038, "step": 76420 }, { "epoch": 13.63, "learning_rate": 4.318848074179743e-05, "loss": 0.0015, "step": 76430 }, { "epoch": 13.63, "learning_rate": 4.318758915834522e-05, "loss": 0.0026, "step": 76440 }, { "epoch": 13.63, "learning_rate": 4.3186697574893014e-05, "loss": 0.0045, "step": 76450 }, { "epoch": 13.63, "learning_rate": 4.3185805991440805e-05, "loss": 0.0047, "step": 76460 }, { "epoch": 13.64, "learning_rate": 4.318491440798859e-05, "loss": 0.0029, "step": 76470 }, { "epoch": 13.64, "learning_rate": 4.318402282453638e-05, "loss": 0.0033, "step": 76480 }, { "epoch": 13.64, "learning_rate": 4.3183131241084166e-05, "loss": 0.0018, "step": 76490 }, { "epoch": 13.64, "learning_rate": 4.318223965763196e-05, "loss": 0.0025, "step": 76500 }, { "epoch": 13.64, "learning_rate": 4.318134807417975e-05, "loss": 0.0016, "step": 76510 }, { "epoch": 13.64, "learning_rate": 4.318045649072753e-05, "loss": 0.0051, "step": 76520 }, { "epoch": 13.65, "learning_rate": 4.3179564907275324e-05, "loss": 0.0075, "step": 76530 }, { "epoch": 13.65, "learning_rate": 4.317867332382311e-05, "loss": 0.0046, "step": 76540 }, { "epoch": 13.65, "learning_rate": 4.31777817403709e-05, "loss": 0.0049, "step": 76550 }, { "epoch": 13.65, "learning_rate": 4.317689015691869e-05, "loss": 0.0028, "step": 76560 }, { "epoch": 13.65, "learning_rate": 4.317599857346648e-05, "loss": 0.0051, "step": 76570 }, { "epoch": 13.66, "learning_rate": 4.3175106990014266e-05, "loss": 0.0035, "step": 76580 }, { "epoch": 13.66, "learning_rate": 4.317421540656206e-05, "loss": 0.0024, "step": 76590 }, { "epoch": 13.66, "learning_rate": 4.317332382310985e-05, "loss": 0.0036, "step": 76600 }, { "epoch": 13.66, "learning_rate": 4.317243223965763e-05, "loss": 0.0032, "step": 76610 }, { "epoch": 13.66, "learning_rate": 4.3171540656205424e-05, "loss": 0.0018, "step": 76620 }, { "epoch": 13.66, "learning_rate": 4.317064907275321e-05, "loss": 0.0024, "step": 76630 }, { "epoch": 13.67, "learning_rate": 4.3169757489301e-05, "loss": 0.0045, "step": 76640 }, { "epoch": 13.67, "learning_rate": 4.316886590584879e-05, "loss": 0.0007, "step": 76650 }, { "epoch": 13.67, "learning_rate": 4.3167974322396575e-05, "loss": 0.0031, "step": 76660 }, { "epoch": 13.67, "learning_rate": 4.316708273894437e-05, "loss": 0.0035, "step": 76670 }, { "epoch": 13.67, "learning_rate": 4.316619115549216e-05, "loss": 0.0051, "step": 76680 }, { "epoch": 13.68, "learning_rate": 4.316529957203995e-05, "loss": 0.0037, "step": 76690 }, { "epoch": 13.68, "learning_rate": 4.3164407988587733e-05, "loss": 0.0031, "step": 76700 }, { "epoch": 13.68, "learning_rate": 4.3163516405135525e-05, "loss": 0.005, "step": 76710 }, { "epoch": 13.68, "learning_rate": 4.316262482168331e-05, "loss": 0.0015, "step": 76720 }, { "epoch": 13.68, "learning_rate": 4.3161822396576324e-05, "loss": 0.0038, "step": 76730 }, { "epoch": 13.68, "learning_rate": 4.3160930813124115e-05, "loss": 0.0022, "step": 76740 }, { "epoch": 13.69, "learning_rate": 4.31600392296719e-05, "loss": 0.0029, "step": 76750 }, { "epoch": 13.69, "learning_rate": 4.315914764621969e-05, "loss": 0.0021, "step": 76760 }, { "epoch": 13.69, "learning_rate": 4.3158256062767475e-05, "loss": 0.0021, "step": 76770 }, { "epoch": 13.69, "learning_rate": 4.3157364479315266e-05, "loss": 0.0053, "step": 76780 }, { "epoch": 13.69, "learning_rate": 4.315647289586305e-05, "loss": 0.0033, "step": 76790 }, { "epoch": 13.69, "learning_rate": 4.315558131241084e-05, "loss": 0.0031, "step": 76800 }, { "epoch": 13.7, "learning_rate": 4.315468972895863e-05, "loss": 0.0051, "step": 76810 }, { "epoch": 13.7, "learning_rate": 4.315379814550642e-05, "loss": 0.0024, "step": 76820 }, { "epoch": 13.7, "learning_rate": 4.3152906562054216e-05, "loss": 0.0025, "step": 76830 }, { "epoch": 13.7, "learning_rate": 4.3152014978602e-05, "loss": 0.0027, "step": 76840 }, { "epoch": 13.7, "learning_rate": 4.315112339514979e-05, "loss": 0.003, "step": 76850 }, { "epoch": 13.71, "learning_rate": 4.3150231811697576e-05, "loss": 0.0046, "step": 76860 }, { "epoch": 13.71, "learning_rate": 4.314934022824537e-05, "loss": 0.0043, "step": 76870 }, { "epoch": 13.71, "learning_rate": 4.314844864479315e-05, "loss": 0.0032, "step": 76880 }, { "epoch": 13.71, "learning_rate": 4.314755706134094e-05, "loss": 0.0032, "step": 76890 }, { "epoch": 13.71, "learning_rate": 4.3146665477888734e-05, "loss": 0.0044, "step": 76900 }, { "epoch": 13.71, "learning_rate": 4.314577389443652e-05, "loss": 0.0022, "step": 76910 }, { "epoch": 13.72, "learning_rate": 4.314488231098431e-05, "loss": 0.0037, "step": 76920 }, { "epoch": 13.72, "learning_rate": 4.3143990727532094e-05, "loss": 0.0018, "step": 76930 }, { "epoch": 13.72, "learning_rate": 4.314309914407989e-05, "loss": 0.004, "step": 76940 }, { "epoch": 13.72, "learning_rate": 4.3142207560627676e-05, "loss": 0.0035, "step": 76950 }, { "epoch": 13.72, "learning_rate": 4.314131597717547e-05, "loss": 0.0049, "step": 76960 }, { "epoch": 13.73, "learning_rate": 4.314042439372326e-05, "loss": 0.0027, "step": 76970 }, { "epoch": 13.73, "learning_rate": 4.313953281027104e-05, "loss": 0.0031, "step": 76980 }, { "epoch": 13.73, "learning_rate": 4.3138641226818834e-05, "loss": 0.0065, "step": 76990 }, { "epoch": 13.73, "learning_rate": 4.313774964336662e-05, "loss": 0.0038, "step": 77000 }, { "epoch": 13.73, "learning_rate": 4.313685805991441e-05, "loss": 0.0024, "step": 77010 }, { "epoch": 13.73, "learning_rate": 4.3135966476462194e-05, "loss": 0.0036, "step": 77020 }, { "epoch": 13.74, "learning_rate": 4.3135074893009986e-05, "loss": 0.0048, "step": 77030 }, { "epoch": 13.74, "learning_rate": 4.313418330955778e-05, "loss": 0.0051, "step": 77040 }, { "epoch": 13.74, "learning_rate": 4.313329172610557e-05, "loss": 0.0025, "step": 77050 }, { "epoch": 13.74, "learning_rate": 4.313240014265336e-05, "loss": 0.0022, "step": 77060 }, { "epoch": 13.74, "learning_rate": 4.3131508559201144e-05, "loss": 0.0036, "step": 77070 }, { "epoch": 13.74, "learning_rate": 4.3130616975748935e-05, "loss": 0.0028, "step": 77080 }, { "epoch": 13.75, "learning_rate": 4.312972539229672e-05, "loss": 0.0018, "step": 77090 }, { "epoch": 13.75, "learning_rate": 4.312883380884451e-05, "loss": 0.0039, "step": 77100 }, { "epoch": 13.75, "learning_rate": 4.3127942225392295e-05, "loss": 0.0028, "step": 77110 }, { "epoch": 13.75, "learning_rate": 4.3127050641940086e-05, "loss": 0.0051, "step": 77120 }, { "epoch": 13.75, "learning_rate": 4.312615905848788e-05, "loss": 0.0049, "step": 77130 }, { "epoch": 13.76, "learning_rate": 4.312526747503566e-05, "loss": 0.0045, "step": 77140 }, { "epoch": 13.76, "learning_rate": 4.312437589158345e-05, "loss": 0.0037, "step": 77150 }, { "epoch": 13.76, "learning_rate": 4.3123484308131244e-05, "loss": 0.0029, "step": 77160 }, { "epoch": 13.76, "learning_rate": 4.3122592724679036e-05, "loss": 0.0033, "step": 77170 }, { "epoch": 13.76, "learning_rate": 4.312170114122682e-05, "loss": 0.0039, "step": 77180 }, { "epoch": 13.76, "learning_rate": 4.312080955777461e-05, "loss": 0.0016, "step": 77190 }, { "epoch": 13.77, "learning_rate": 4.31199179743224e-05, "loss": 0.004, "step": 77200 }, { "epoch": 13.77, "learning_rate": 4.311902639087019e-05, "loss": 0.0029, "step": 77210 }, { "epoch": 13.77, "learning_rate": 4.311813480741798e-05, "loss": 0.0032, "step": 77220 }, { "epoch": 13.77, "learning_rate": 4.311724322396576e-05, "loss": 0.0035, "step": 77230 }, { "epoch": 13.77, "learning_rate": 4.3116351640513554e-05, "loss": 0.0055, "step": 77240 }, { "epoch": 13.77, "learning_rate": 4.311546005706134e-05, "loss": 0.0033, "step": 77250 }, { "epoch": 13.78, "learning_rate": 4.311456847360913e-05, "loss": 0.0074, "step": 77260 }, { "epoch": 13.78, "learning_rate": 4.311367689015692e-05, "loss": 0.0013, "step": 77270 }, { "epoch": 13.78, "learning_rate": 4.311278530670471e-05, "loss": 0.0035, "step": 77280 }, { "epoch": 13.78, "learning_rate": 4.31118937232525e-05, "loss": 0.0014, "step": 77290 }, { "epoch": 13.78, "learning_rate": 4.311100213980029e-05, "loss": 0.0026, "step": 77300 }, { "epoch": 13.79, "learning_rate": 4.311011055634808e-05, "loss": 0.0033, "step": 77310 }, { "epoch": 13.79, "learning_rate": 4.310921897289586e-05, "loss": 0.0049, "step": 77320 }, { "epoch": 13.79, "learning_rate": 4.3108327389443654e-05, "loss": 0.003, "step": 77330 }, { "epoch": 13.79, "learning_rate": 4.310743580599144e-05, "loss": 0.0032, "step": 77340 }, { "epoch": 13.79, "learning_rate": 4.310654422253923e-05, "loss": 0.0032, "step": 77350 }, { "epoch": 13.79, "learning_rate": 4.310565263908702e-05, "loss": 0.004, "step": 77360 }, { "epoch": 13.8, "learning_rate": 4.3104761055634806e-05, "loss": 0.0058, "step": 77370 }, { "epoch": 13.8, "learning_rate": 4.3103869472182604e-05, "loss": 0.0022, "step": 77380 }, { "epoch": 13.8, "learning_rate": 4.310297788873039e-05, "loss": 0.0037, "step": 77390 }, { "epoch": 13.8, "learning_rate": 4.310208630527818e-05, "loss": 0.0033, "step": 77400 }, { "epoch": 13.8, "learning_rate": 4.3101194721825964e-05, "loss": 0.0029, "step": 77410 }, { "epoch": 13.81, "learning_rate": 4.3100303138373755e-05, "loss": 0.0037, "step": 77420 }, { "epoch": 13.81, "learning_rate": 4.3099411554921546e-05, "loss": 0.0024, "step": 77430 }, { "epoch": 13.81, "learning_rate": 4.309851997146933e-05, "loss": 0.0025, "step": 77440 }, { "epoch": 13.81, "learning_rate": 4.309762838801712e-05, "loss": 0.0033, "step": 77450 }, { "epoch": 13.81, "learning_rate": 4.3096736804564906e-05, "loss": 0.0035, "step": 77460 }, { "epoch": 13.81, "learning_rate": 4.30958452211127e-05, "loss": 0.0025, "step": 77470 }, { "epoch": 13.82, "learning_rate": 4.309495363766048e-05, "loss": 0.0026, "step": 77480 }, { "epoch": 13.82, "learning_rate": 4.309406205420828e-05, "loss": 0.005, "step": 77490 }, { "epoch": 13.82, "learning_rate": 4.3093170470756064e-05, "loss": 0.0023, "step": 77500 }, { "epoch": 13.82, "learning_rate": 4.3092278887303855e-05, "loss": 0.0073, "step": 77510 }, { "epoch": 13.82, "learning_rate": 4.3091387303851647e-05, "loss": 0.0019, "step": 77520 }, { "epoch": 13.82, "learning_rate": 4.309049572039943e-05, "loss": 0.0012, "step": 77530 }, { "epoch": 13.83, "learning_rate": 4.308960413694722e-05, "loss": 0.003, "step": 77540 }, { "epoch": 13.83, "learning_rate": 4.308871255349501e-05, "loss": 0.003, "step": 77550 }, { "epoch": 13.83, "learning_rate": 4.30878209700428e-05, "loss": 0.0033, "step": 77560 }, { "epoch": 13.83, "learning_rate": 4.308692938659058e-05, "loss": 0.0034, "step": 77570 }, { "epoch": 13.83, "learning_rate": 4.3086037803138374e-05, "loss": 0.0036, "step": 77580 }, { "epoch": 13.84, "learning_rate": 4.3085146219686165e-05, "loss": 0.0047, "step": 77590 }, { "epoch": 13.84, "learning_rate": 4.3084254636233956e-05, "loss": 0.0018, "step": 77600 }, { "epoch": 13.84, "learning_rate": 4.308336305278175e-05, "loss": 0.0029, "step": 77610 }, { "epoch": 13.84, "learning_rate": 4.308247146932953e-05, "loss": 0.0031, "step": 77620 }, { "epoch": 13.84, "learning_rate": 4.308157988587732e-05, "loss": 0.004, "step": 77630 }, { "epoch": 13.84, "learning_rate": 4.308068830242511e-05, "loss": 0.0045, "step": 77640 }, { "epoch": 13.85, "learning_rate": 4.30797967189729e-05, "loss": 0.0041, "step": 77650 }, { "epoch": 13.85, "learning_rate": 4.307890513552069e-05, "loss": 0.0042, "step": 77660 }, { "epoch": 13.85, "learning_rate": 4.3078013552068474e-05, "loss": 0.0036, "step": 77670 }, { "epoch": 13.85, "learning_rate": 4.3077121968616265e-05, "loss": 0.0035, "step": 77680 }, { "epoch": 13.85, "learning_rate": 4.307623038516405e-05, "loss": 0.0031, "step": 77690 }, { "epoch": 13.86, "learning_rate": 4.307533880171184e-05, "loss": 0.0054, "step": 77700 }, { "epoch": 13.86, "learning_rate": 4.307444721825963e-05, "loss": 0.0022, "step": 77710 }, { "epoch": 13.86, "learning_rate": 4.307355563480742e-05, "loss": 0.0032, "step": 77720 }, { "epoch": 13.86, "learning_rate": 4.307266405135521e-05, "loss": 0.0041, "step": 77730 }, { "epoch": 13.86, "learning_rate": 4.3071772467903e-05, "loss": 0.0034, "step": 77740 }, { "epoch": 13.86, "learning_rate": 4.307088088445079e-05, "loss": 0.0038, "step": 77750 }, { "epoch": 13.87, "learning_rate": 4.3069989300998575e-05, "loss": 0.0027, "step": 77760 }, { "epoch": 13.87, "learning_rate": 4.3069097717546366e-05, "loss": 0.0031, "step": 77770 }, { "epoch": 13.87, "learning_rate": 4.306820613409415e-05, "loss": 0.0029, "step": 77780 }, { "epoch": 13.87, "learning_rate": 4.306731455064194e-05, "loss": 0.0028, "step": 77790 }, { "epoch": 13.87, "learning_rate": 4.3066422967189726e-05, "loss": 0.0024, "step": 77800 }, { "epoch": 13.87, "learning_rate": 4.306553138373752e-05, "loss": 0.0029, "step": 77810 }, { "epoch": 13.88, "learning_rate": 4.306463980028531e-05, "loss": 0.0033, "step": 77820 }, { "epoch": 13.88, "learning_rate": 4.30637482168331e-05, "loss": 0.0031, "step": 77830 }, { "epoch": 13.88, "learning_rate": 4.306285663338089e-05, "loss": 0.0032, "step": 77840 }, { "epoch": 13.88, "learning_rate": 4.3061965049928675e-05, "loss": 0.004, "step": 77850 }, { "epoch": 13.88, "learning_rate": 4.3061073466476466e-05, "loss": 0.0043, "step": 77860 }, { "epoch": 13.89, "learning_rate": 4.306018188302425e-05, "loss": 0.0033, "step": 77870 }, { "epoch": 13.89, "learning_rate": 4.305929029957204e-05, "loss": 0.0036, "step": 77880 }, { "epoch": 13.89, "learning_rate": 4.305839871611983e-05, "loss": 0.002, "step": 77890 }, { "epoch": 13.89, "learning_rate": 4.305750713266762e-05, "loss": 0.0047, "step": 77900 }, { "epoch": 13.89, "learning_rate": 4.305661554921541e-05, "loss": 0.0043, "step": 77910 }, { "epoch": 13.89, "learning_rate": 4.305572396576319e-05, "loss": 0.003, "step": 77920 }, { "epoch": 13.9, "learning_rate": 4.305483238231099e-05, "loss": 0.0055, "step": 77930 }, { "epoch": 13.9, "learning_rate": 4.3053940798858776e-05, "loss": 0.0035, "step": 77940 }, { "epoch": 13.9, "learning_rate": 4.305304921540657e-05, "loss": 0.003, "step": 77950 }, { "epoch": 13.9, "learning_rate": 4.305215763195435e-05, "loss": 0.006, "step": 77960 }, { "epoch": 13.9, "learning_rate": 4.305126604850214e-05, "loss": 0.0059, "step": 77970 }, { "epoch": 13.91, "learning_rate": 4.3050374465049934e-05, "loss": 0.0043, "step": 77980 }, { "epoch": 13.91, "learning_rate": 4.304948288159772e-05, "loss": 0.004, "step": 77990 }, { "epoch": 13.91, "learning_rate": 4.304859129814551e-05, "loss": 0.0056, "step": 78000 }, { "epoch": 13.91, "learning_rate": 4.3047699714693294e-05, "loss": 0.0043, "step": 78010 }, { "epoch": 13.91, "learning_rate": 4.3046808131241085e-05, "loss": 0.0048, "step": 78020 }, { "epoch": 13.91, "learning_rate": 4.304591654778887e-05, "loss": 0.0027, "step": 78030 }, { "epoch": 13.92, "learning_rate": 4.304502496433667e-05, "loss": 0.0039, "step": 78040 }, { "epoch": 13.92, "learning_rate": 4.304413338088445e-05, "loss": 0.0025, "step": 78050 }, { "epoch": 13.92, "learning_rate": 4.304324179743224e-05, "loss": 0.0022, "step": 78060 }, { "epoch": 13.92, "learning_rate": 4.3042350213980034e-05, "loss": 0.0033, "step": 78070 }, { "epoch": 13.92, "learning_rate": 4.304145863052782e-05, "loss": 0.0037, "step": 78080 }, { "epoch": 13.92, "learning_rate": 4.304056704707561e-05, "loss": 0.0029, "step": 78090 }, { "epoch": 13.93, "learning_rate": 4.3039675463623394e-05, "loss": 0.0028, "step": 78100 }, { "epoch": 13.93, "learning_rate": 4.3038783880171186e-05, "loss": 0.0026, "step": 78110 }, { "epoch": 13.93, "learning_rate": 4.303789229671898e-05, "loss": 0.0021, "step": 78120 }, { "epoch": 13.93, "learning_rate": 4.303700071326676e-05, "loss": 0.0065, "step": 78130 }, { "epoch": 13.93, "learning_rate": 4.303610912981455e-05, "loss": 0.0033, "step": 78140 }, { "epoch": 13.94, "learning_rate": 4.3035217546362344e-05, "loss": 0.0022, "step": 78150 }, { "epoch": 13.94, "learning_rate": 4.3034325962910135e-05, "loss": 0.0038, "step": 78160 }, { "epoch": 13.94, "learning_rate": 4.303343437945792e-05, "loss": 0.0043, "step": 78170 }, { "epoch": 13.94, "learning_rate": 4.303254279600571e-05, "loss": 0.0037, "step": 78180 }, { "epoch": 13.94, "learning_rate": 4.3031651212553495e-05, "loss": 0.0037, "step": 78190 }, { "epoch": 13.94, "learning_rate": 4.3030759629101286e-05, "loss": 0.0037, "step": 78200 }, { "epoch": 13.95, "learning_rate": 4.302986804564908e-05, "loss": 0.0035, "step": 78210 }, { "epoch": 13.95, "learning_rate": 4.302897646219686e-05, "loss": 0.0044, "step": 78220 }, { "epoch": 13.95, "learning_rate": 4.302808487874465e-05, "loss": 0.0035, "step": 78230 }, { "epoch": 13.95, "learning_rate": 4.302719329529244e-05, "loss": 0.0026, "step": 78240 }, { "epoch": 13.95, "learning_rate": 4.302630171184023e-05, "loss": 0.0029, "step": 78250 }, { "epoch": 13.96, "learning_rate": 4.302541012838802e-05, "loss": 0.0023, "step": 78260 }, { "epoch": 13.96, "learning_rate": 4.302451854493581e-05, "loss": 0.0022, "step": 78270 }, { "epoch": 13.96, "learning_rate": 4.3023626961483596e-05, "loss": 0.002, "step": 78280 }, { "epoch": 13.96, "learning_rate": 4.302273537803139e-05, "loss": 0.0041, "step": 78290 }, { "epoch": 13.96, "learning_rate": 4.302184379457918e-05, "loss": 0.0041, "step": 78300 }, { "epoch": 13.96, "learning_rate": 4.302095221112696e-05, "loss": 0.002, "step": 78310 }, { "epoch": 13.97, "learning_rate": 4.3020060627674754e-05, "loss": 0.006, "step": 78320 }, { "epoch": 13.97, "learning_rate": 4.301916904422254e-05, "loss": 0.0021, "step": 78330 }, { "epoch": 13.97, "learning_rate": 4.301827746077033e-05, "loss": 0.0025, "step": 78340 }, { "epoch": 13.97, "learning_rate": 4.3017385877318114e-05, "loss": 0.0029, "step": 78350 }, { "epoch": 13.97, "learning_rate": 4.3016494293865905e-05, "loss": 0.0019, "step": 78360 }, { "epoch": 13.97, "learning_rate": 4.3015602710413696e-05, "loss": 0.0029, "step": 78370 }, { "epoch": 13.98, "learning_rate": 4.301471112696149e-05, "loss": 0.0025, "step": 78380 }, { "epoch": 13.98, "learning_rate": 4.301381954350928e-05, "loss": 0.0017, "step": 78390 }, { "epoch": 13.98, "learning_rate": 4.301292796005706e-05, "loss": 0.0035, "step": 78400 }, { "epoch": 13.98, "learning_rate": 4.3012036376604854e-05, "loss": 0.003, "step": 78410 }, { "epoch": 13.98, "learning_rate": 4.301114479315264e-05, "loss": 0.0027, "step": 78420 }, { "epoch": 13.99, "learning_rate": 4.301025320970043e-05, "loss": 0.0028, "step": 78430 }, { "epoch": 13.99, "learning_rate": 4.300936162624822e-05, "loss": 0.0028, "step": 78440 }, { "epoch": 13.99, "learning_rate": 4.3008470042796005e-05, "loss": 0.0024, "step": 78450 }, { "epoch": 13.99, "learning_rate": 4.30075784593438e-05, "loss": 0.002, "step": 78460 }, { "epoch": 13.99, "learning_rate": 4.300668687589158e-05, "loss": 0.0025, "step": 78470 }, { "epoch": 13.99, "learning_rate": 4.300579529243938e-05, "loss": 0.003, "step": 78480 }, { "epoch": 14.0, "learning_rate": 4.3004903708987164e-05, "loss": 0.0058, "step": 78490 }, { "epoch": 14.0, "learning_rate": 4.3004012125534955e-05, "loss": 0.0039, "step": 78500 }, { "epoch": 14.0, "learning_rate": 4.300312054208274e-05, "loss": 0.0038, "step": 78510 }, { "epoch": 14.0, "eval_loss": 0.019745901226997375, "eval_runtime": 196.3873, "eval_samples_per_second": 23.622, "eval_steps_per_second": 2.953, "step": 78512 }, { "epoch": 14.0, "learning_rate": 4.300222895863053e-05, "loss": 0.0024, "step": 78520 }, { "epoch": 14.0, "learning_rate": 4.300133737517832e-05, "loss": 0.0034, "step": 78530 }, { "epoch": 14.0, "learning_rate": 4.3000445791726106e-05, "loss": 0.0031, "step": 78540 }, { "epoch": 14.01, "learning_rate": 4.29995542082739e-05, "loss": 0.0048, "step": 78550 }, { "epoch": 14.01, "learning_rate": 4.299866262482168e-05, "loss": 0.0019, "step": 78560 }, { "epoch": 14.01, "learning_rate": 4.299777104136947e-05, "loss": 0.0021, "step": 78570 }, { "epoch": 14.01, "learning_rate": 4.299687945791726e-05, "loss": 0.0039, "step": 78580 }, { "epoch": 14.01, "learning_rate": 4.2995987874465055e-05, "loss": 0.0021, "step": 78590 }, { "epoch": 14.02, "learning_rate": 4.299509629101284e-05, "loss": 0.0026, "step": 78600 }, { "epoch": 14.02, "learning_rate": 4.299420470756063e-05, "loss": 0.0024, "step": 78610 }, { "epoch": 14.02, "learning_rate": 4.299331312410842e-05, "loss": 0.0034, "step": 78620 }, { "epoch": 14.02, "learning_rate": 4.2992421540656207e-05, "loss": 0.0022, "step": 78630 }, { "epoch": 14.02, "learning_rate": 4.2991529957204e-05, "loss": 0.0031, "step": 78640 }, { "epoch": 14.02, "learning_rate": 4.299063837375178e-05, "loss": 0.0023, "step": 78650 }, { "epoch": 14.03, "learning_rate": 4.2989746790299573e-05, "loss": 0.0011, "step": 78660 }, { "epoch": 14.03, "learning_rate": 4.2988855206847365e-05, "loss": 0.004, "step": 78670 }, { "epoch": 14.03, "learning_rate": 4.298796362339515e-05, "loss": 0.0029, "step": 78680 }, { "epoch": 14.03, "learning_rate": 4.298707203994294e-05, "loss": 0.0021, "step": 78690 }, { "epoch": 14.03, "learning_rate": 4.298618045649073e-05, "loss": 0.0019, "step": 78700 }, { "epoch": 14.04, "learning_rate": 4.298528887303852e-05, "loss": 0.0041, "step": 78710 }, { "epoch": 14.04, "learning_rate": 4.298439728958631e-05, "loss": 0.0025, "step": 78720 }, { "epoch": 14.04, "learning_rate": 4.29835057061341e-05, "loss": 0.0043, "step": 78730 }, { "epoch": 14.04, "learning_rate": 4.298261412268188e-05, "loss": 0.0027, "step": 78740 }, { "epoch": 14.04, "learning_rate": 4.2981722539229674e-05, "loss": 0.004, "step": 78750 }, { "epoch": 14.04, "learning_rate": 4.2980830955777465e-05, "loss": 0.002, "step": 78760 }, { "epoch": 14.05, "learning_rate": 4.297993937232525e-05, "loss": 0.0046, "step": 78770 }, { "epoch": 14.05, "learning_rate": 4.297904778887304e-05, "loss": 0.0037, "step": 78780 }, { "epoch": 14.05, "learning_rate": 4.2978156205420825e-05, "loss": 0.0028, "step": 78790 }, { "epoch": 14.05, "learning_rate": 4.2977264621968617e-05, "loss": 0.0029, "step": 78800 }, { "epoch": 14.05, "learning_rate": 4.297637303851641e-05, "loss": 0.002, "step": 78810 }, { "epoch": 14.05, "learning_rate": 4.29754814550642e-05, "loss": 0.003, "step": 78820 }, { "epoch": 14.06, "learning_rate": 4.297458987161198e-05, "loss": 0.0022, "step": 78830 }, { "epoch": 14.06, "learning_rate": 4.2973698288159775e-05, "loss": 0.0018, "step": 78840 }, { "epoch": 14.06, "learning_rate": 4.2972806704707566e-05, "loss": 0.0034, "step": 78850 }, { "epoch": 14.06, "learning_rate": 4.297191512125535e-05, "loss": 0.0017, "step": 78860 }, { "epoch": 14.06, "learning_rate": 4.297102353780314e-05, "loss": 0.0022, "step": 78870 }, { "epoch": 14.07, "learning_rate": 4.2970131954350926e-05, "loss": 0.0027, "step": 78880 }, { "epoch": 14.07, "learning_rate": 4.296924037089872e-05, "loss": 0.0044, "step": 78890 }, { "epoch": 14.07, "learning_rate": 4.296834878744651e-05, "loss": 0.0025, "step": 78900 }, { "epoch": 14.07, "learning_rate": 4.296745720399429e-05, "loss": 0.0029, "step": 78910 }, { "epoch": 14.07, "learning_rate": 4.296656562054209e-05, "loss": 0.0018, "step": 78920 }, { "epoch": 14.07, "learning_rate": 4.2965674037089875e-05, "loss": 0.0025, "step": 78930 }, { "epoch": 14.08, "learning_rate": 4.2964782453637666e-05, "loss": 0.0032, "step": 78940 }, { "epoch": 14.08, "learning_rate": 4.296389087018545e-05, "loss": 0.003, "step": 78950 }, { "epoch": 14.08, "learning_rate": 4.296299928673324e-05, "loss": 0.0015, "step": 78960 }, { "epoch": 14.08, "learning_rate": 4.2962107703281026e-05, "loss": 0.0024, "step": 78970 }, { "epoch": 14.08, "learning_rate": 4.296121611982882e-05, "loss": 0.0026, "step": 78980 }, { "epoch": 14.09, "learning_rate": 4.296032453637661e-05, "loss": 0.0017, "step": 78990 }, { "epoch": 14.09, "learning_rate": 4.295943295292439e-05, "loss": 0.0055, "step": 79000 }, { "epoch": 14.09, "learning_rate": 4.2958541369472184e-05, "loss": 0.0058, "step": 79010 }, { "epoch": 14.09, "learning_rate": 4.295764978601997e-05, "loss": 0.0015, "step": 79020 }, { "epoch": 14.09, "learning_rate": 4.295675820256777e-05, "loss": 0.0026, "step": 79030 }, { "epoch": 14.09, "learning_rate": 4.295586661911555e-05, "loss": 0.0033, "step": 79040 }, { "epoch": 14.1, "learning_rate": 4.295497503566334e-05, "loss": 0.0031, "step": 79050 }, { "epoch": 14.1, "learning_rate": 4.295408345221113e-05, "loss": 0.0014, "step": 79060 }, { "epoch": 14.1, "learning_rate": 4.295319186875892e-05, "loss": 0.0019, "step": 79070 }, { "epoch": 14.1, "learning_rate": 4.295230028530671e-05, "loss": 0.0032, "step": 79080 }, { "epoch": 14.1, "learning_rate": 4.2951408701854494e-05, "loss": 0.0042, "step": 79090 }, { "epoch": 14.1, "learning_rate": 4.2950517118402285e-05, "loss": 0.002, "step": 79100 }, { "epoch": 14.11, "learning_rate": 4.294962553495007e-05, "loss": 0.0023, "step": 79110 }, { "epoch": 14.11, "learning_rate": 4.294873395149786e-05, "loss": 0.003, "step": 79120 }, { "epoch": 14.11, "learning_rate": 4.294784236804565e-05, "loss": 0.0055, "step": 79130 }, { "epoch": 14.11, "learning_rate": 4.294695078459344e-05, "loss": 0.0059, "step": 79140 }, { "epoch": 14.11, "learning_rate": 4.2946059201141234e-05, "loss": 0.0023, "step": 79150 }, { "epoch": 14.12, "learning_rate": 4.294516761768902e-05, "loss": 0.0017, "step": 79160 }, { "epoch": 14.12, "learning_rate": 4.294427603423681e-05, "loss": 0.002, "step": 79170 }, { "epoch": 14.12, "learning_rate": 4.2943384450784594e-05, "loss": 0.0041, "step": 79180 }, { "epoch": 14.12, "learning_rate": 4.2942492867332386e-05, "loss": 0.0036, "step": 79190 }, { "epoch": 14.12, "learning_rate": 4.294160128388017e-05, "loss": 0.0043, "step": 79200 }, { "epoch": 14.12, "learning_rate": 4.294070970042796e-05, "loss": 0.0031, "step": 79210 }, { "epoch": 14.13, "learning_rate": 4.293981811697575e-05, "loss": 0.0046, "step": 79220 }, { "epoch": 14.13, "learning_rate": 4.293892653352354e-05, "loss": 0.002, "step": 79230 }, { "epoch": 14.13, "learning_rate": 4.293803495007133e-05, "loss": 0.0035, "step": 79240 }, { "epoch": 14.13, "learning_rate": 4.293714336661912e-05, "loss": 0.0033, "step": 79250 }, { "epoch": 14.13, "learning_rate": 4.293625178316691e-05, "loss": 0.0027, "step": 79260 }, { "epoch": 14.14, "learning_rate": 4.2935360199714695e-05, "loss": 0.0021, "step": 79270 }, { "epoch": 14.14, "learning_rate": 4.2934468616262486e-05, "loss": 0.0016, "step": 79280 }, { "epoch": 14.14, "learning_rate": 4.293357703281027e-05, "loss": 0.0039, "step": 79290 }, { "epoch": 14.14, "learning_rate": 4.293268544935806e-05, "loss": 0.0076, "step": 79300 }, { "epoch": 14.14, "learning_rate": 4.293179386590585e-05, "loss": 0.0021, "step": 79310 }, { "epoch": 14.14, "learning_rate": 4.293090228245364e-05, "loss": 0.0042, "step": 79320 }, { "epoch": 14.15, "learning_rate": 4.293001069900143e-05, "loss": 0.0064, "step": 79330 }, { "epoch": 14.15, "learning_rate": 4.292911911554921e-05, "loss": 0.0038, "step": 79340 }, { "epoch": 14.15, "learning_rate": 4.2928227532097004e-05, "loss": 0.0057, "step": 79350 }, { "epoch": 14.15, "learning_rate": 4.2927335948644796e-05, "loss": 0.0024, "step": 79360 }, { "epoch": 14.15, "learning_rate": 4.292644436519259e-05, "loss": 0.0027, "step": 79370 }, { "epoch": 14.15, "learning_rate": 4.292555278174038e-05, "loss": 0.003, "step": 79380 }, { "epoch": 14.16, "learning_rate": 4.292466119828816e-05, "loss": 0.0015, "step": 79390 }, { "epoch": 14.16, "learning_rate": 4.2923769614835954e-05, "loss": 0.0035, "step": 79400 }, { "epoch": 14.16, "learning_rate": 4.292287803138374e-05, "loss": 0.0024, "step": 79410 }, { "epoch": 14.16, "learning_rate": 4.292198644793153e-05, "loss": 0.0018, "step": 79420 }, { "epoch": 14.16, "learning_rate": 4.2921094864479314e-05, "loss": 0.0033, "step": 79430 }, { "epoch": 14.17, "learning_rate": 4.2920203281027105e-05, "loss": 0.0039, "step": 79440 }, { "epoch": 14.17, "learning_rate": 4.2919311697574896e-05, "loss": 0.0021, "step": 79450 }, { "epoch": 14.17, "learning_rate": 4.291842011412268e-05, "loss": 0.0039, "step": 79460 }, { "epoch": 14.17, "learning_rate": 4.291752853067048e-05, "loss": 0.0033, "step": 79470 }, { "epoch": 14.17, "learning_rate": 4.291663694721826e-05, "loss": 0.0025, "step": 79480 }, { "epoch": 14.17, "learning_rate": 4.2915745363766054e-05, "loss": 0.0039, "step": 79490 }, { "epoch": 14.18, "learning_rate": 4.291485378031384e-05, "loss": 0.0022, "step": 79500 }, { "epoch": 14.18, "learning_rate": 4.291396219686163e-05, "loss": 0.0069, "step": 79510 }, { "epoch": 14.18, "learning_rate": 4.2913070613409414e-05, "loss": 0.0036, "step": 79520 }, { "epoch": 14.18, "learning_rate": 4.2912179029957205e-05, "loss": 0.0043, "step": 79530 }, { "epoch": 14.18, "learning_rate": 4.2911287446505e-05, "loss": 0.0021, "step": 79540 }, { "epoch": 14.19, "learning_rate": 4.291039586305278e-05, "loss": 0.0033, "step": 79550 }, { "epoch": 14.19, "learning_rate": 4.290950427960057e-05, "loss": 0.003, "step": 79560 }, { "epoch": 14.19, "learning_rate": 4.290861269614836e-05, "loss": 0.0015, "step": 79570 }, { "epoch": 14.19, "learning_rate": 4.2907721112696155e-05, "loss": 0.0031, "step": 79580 }, { "epoch": 14.19, "learning_rate": 4.290682952924394e-05, "loss": 0.0029, "step": 79590 }, { "epoch": 14.19, "learning_rate": 4.290593794579173e-05, "loss": 0.0027, "step": 79600 }, { "epoch": 14.2, "learning_rate": 4.290504636233952e-05, "loss": 0.0032, "step": 79610 }, { "epoch": 14.2, "learning_rate": 4.2904154778887306e-05, "loss": 0.004, "step": 79620 }, { "epoch": 14.2, "learning_rate": 4.29032631954351e-05, "loss": 0.0025, "step": 79630 }, { "epoch": 14.2, "learning_rate": 4.290237161198288e-05, "loss": 0.0025, "step": 79640 }, { "epoch": 14.2, "learning_rate": 4.290148002853067e-05, "loss": 0.0023, "step": 79650 }, { "epoch": 14.2, "learning_rate": 4.290058844507846e-05, "loss": 0.0025, "step": 79660 }, { "epoch": 14.21, "learning_rate": 4.289969686162625e-05, "loss": 0.0028, "step": 79670 }, { "epoch": 14.21, "learning_rate": 4.289880527817404e-05, "loss": 0.0043, "step": 79680 }, { "epoch": 14.21, "learning_rate": 4.289791369472183e-05, "loss": 0.0012, "step": 79690 }, { "epoch": 14.21, "learning_rate": 4.289702211126962e-05, "loss": 0.0034, "step": 79700 }, { "epoch": 14.21, "learning_rate": 4.2896130527817407e-05, "loss": 0.0025, "step": 79710 }, { "epoch": 14.22, "learning_rate": 4.28952389443652e-05, "loss": 0.0034, "step": 79720 }, { "epoch": 14.22, "learning_rate": 4.289434736091298e-05, "loss": 0.0028, "step": 79730 }, { "epoch": 14.22, "learning_rate": 4.2893455777460773e-05, "loss": 0.0041, "step": 79740 }, { "epoch": 14.22, "learning_rate": 4.289256419400856e-05, "loss": 0.0029, "step": 79750 }, { "epoch": 14.22, "learning_rate": 4.289167261055635e-05, "loss": 0.0022, "step": 79760 }, { "epoch": 14.22, "learning_rate": 4.289078102710414e-05, "loss": 0.0033, "step": 79770 }, { "epoch": 14.23, "learning_rate": 4.2889889443651925e-05, "loss": 0.0058, "step": 79780 }, { "epoch": 14.23, "learning_rate": 4.2888997860199716e-05, "loss": 0.0028, "step": 79790 }, { "epoch": 14.23, "learning_rate": 4.288810627674751e-05, "loss": 0.0035, "step": 79800 }, { "epoch": 14.23, "learning_rate": 4.28872146932953e-05, "loss": 0.0031, "step": 79810 }, { "epoch": 14.23, "learning_rate": 4.288632310984308e-05, "loss": 0.0037, "step": 79820 }, { "epoch": 14.24, "learning_rate": 4.2885431526390874e-05, "loss": 0.003, "step": 79830 }, { "epoch": 14.24, "learning_rate": 4.2884539942938665e-05, "loss": 0.0038, "step": 79840 }, { "epoch": 14.24, "learning_rate": 4.288364835948645e-05, "loss": 0.0061, "step": 79850 }, { "epoch": 14.24, "learning_rate": 4.288275677603424e-05, "loss": 0.0025, "step": 79860 }, { "epoch": 14.24, "learning_rate": 4.2881865192582025e-05, "loss": 0.0027, "step": 79870 }, { "epoch": 14.24, "learning_rate": 4.2880973609129816e-05, "loss": 0.0074, "step": 79880 }, { "epoch": 14.25, "learning_rate": 4.28800820256776e-05, "loss": 0.0021, "step": 79890 }, { "epoch": 14.25, "learning_rate": 4.287919044222539e-05, "loss": 0.003, "step": 79900 }, { "epoch": 14.25, "learning_rate": 4.287829885877318e-05, "loss": 0.0014, "step": 79910 }, { "epoch": 14.25, "learning_rate": 4.2877407275320975e-05, "loss": 0.0059, "step": 79920 }, { "epoch": 14.25, "learning_rate": 4.2876515691868766e-05, "loss": 0.0039, "step": 79930 }, { "epoch": 14.25, "learning_rate": 4.287562410841655e-05, "loss": 0.0031, "step": 79940 }, { "epoch": 14.26, "learning_rate": 4.287473252496434e-05, "loss": 0.0031, "step": 79950 }, { "epoch": 14.26, "learning_rate": 4.2873840941512126e-05, "loss": 0.0039, "step": 79960 }, { "epoch": 14.26, "learning_rate": 4.287294935805992e-05, "loss": 0.0037, "step": 79970 }, { "epoch": 14.26, "learning_rate": 4.28720577746077e-05, "loss": 0.0025, "step": 79980 }, { "epoch": 14.26, "learning_rate": 4.287116619115549e-05, "loss": 0.0038, "step": 79990 }, { "epoch": 14.27, "learning_rate": 4.2870274607703284e-05, "loss": 0.0046, "step": 80000 }, { "epoch": 14.27, "learning_rate": 4.286938302425107e-05, "loss": 0.0029, "step": 80010 }, { "epoch": 14.27, "learning_rate": 4.2868491440798866e-05, "loss": 0.0016, "step": 80020 }, { "epoch": 14.27, "learning_rate": 4.286759985734665e-05, "loss": 0.0061, "step": 80030 }, { "epoch": 14.27, "learning_rate": 4.286670827389444e-05, "loss": 0.0026, "step": 80040 }, { "epoch": 14.27, "learning_rate": 4.2865816690442226e-05, "loss": 0.004, "step": 80050 }, { "epoch": 14.28, "learning_rate": 4.286492510699002e-05, "loss": 0.002, "step": 80060 }, { "epoch": 14.28, "learning_rate": 4.286403352353781e-05, "loss": 0.003, "step": 80070 }, { "epoch": 14.28, "learning_rate": 4.286314194008559e-05, "loss": 0.0036, "step": 80080 }, { "epoch": 14.28, "learning_rate": 4.2862250356633384e-05, "loss": 0.0024, "step": 80090 }, { "epoch": 14.28, "learning_rate": 4.286135877318117e-05, "loss": 0.0039, "step": 80100 }, { "epoch": 14.28, "learning_rate": 4.286046718972896e-05, "loss": 0.003, "step": 80110 }, { "epoch": 14.29, "learning_rate": 4.2859575606276745e-05, "loss": 0.0053, "step": 80120 }, { "epoch": 14.29, "learning_rate": 4.285868402282454e-05, "loss": 0.0029, "step": 80130 }, { "epoch": 14.29, "learning_rate": 4.285779243937233e-05, "loss": 0.0026, "step": 80140 }, { "epoch": 14.29, "learning_rate": 4.285690085592012e-05, "loss": 0.0038, "step": 80150 }, { "epoch": 14.29, "learning_rate": 4.285600927246791e-05, "loss": 0.0045, "step": 80160 }, { "epoch": 14.3, "learning_rate": 4.2855117689015694e-05, "loss": 0.0024, "step": 80170 }, { "epoch": 14.3, "learning_rate": 4.2854226105563485e-05, "loss": 0.003, "step": 80180 }, { "epoch": 14.3, "learning_rate": 4.285333452211127e-05, "loss": 0.0045, "step": 80190 }, { "epoch": 14.3, "learning_rate": 4.285244293865906e-05, "loss": 0.0022, "step": 80200 }, { "epoch": 14.3, "learning_rate": 4.2851551355206845e-05, "loss": 0.0046, "step": 80210 }, { "epoch": 14.3, "learning_rate": 4.2850659771754636e-05, "loss": 0.0019, "step": 80220 }, { "epoch": 14.31, "learning_rate": 4.284976818830243e-05, "loss": 0.0028, "step": 80230 }, { "epoch": 14.31, "learning_rate": 4.284887660485021e-05, "loss": 0.003, "step": 80240 }, { "epoch": 14.31, "learning_rate": 4.284798502139801e-05, "loss": 0.0027, "step": 80250 }, { "epoch": 14.31, "learning_rate": 4.2847093437945794e-05, "loss": 0.0027, "step": 80260 }, { "epoch": 14.31, "learning_rate": 4.2846201854493586e-05, "loss": 0.0018, "step": 80270 }, { "epoch": 14.32, "learning_rate": 4.284531027104137e-05, "loss": 0.0032, "step": 80280 }, { "epoch": 14.32, "learning_rate": 4.284441868758916e-05, "loss": 0.0026, "step": 80290 }, { "epoch": 14.32, "learning_rate": 4.2843527104136946e-05, "loss": 0.0048, "step": 80300 }, { "epoch": 14.32, "learning_rate": 4.284263552068474e-05, "loss": 0.0034, "step": 80310 }, { "epoch": 14.32, "learning_rate": 4.284174393723253e-05, "loss": 0.0021, "step": 80320 }, { "epoch": 14.32, "learning_rate": 4.284085235378031e-05, "loss": 0.0034, "step": 80330 }, { "epoch": 14.33, "learning_rate": 4.2839960770328104e-05, "loss": 0.0027, "step": 80340 }, { "epoch": 14.33, "learning_rate": 4.283906918687589e-05, "loss": 0.0042, "step": 80350 }, { "epoch": 14.33, "learning_rate": 4.2838177603423686e-05, "loss": 0.0026, "step": 80360 }, { "epoch": 14.33, "learning_rate": 4.283728601997147e-05, "loss": 0.0044, "step": 80370 }, { "epoch": 14.33, "learning_rate": 4.283639443651926e-05, "loss": 0.0015, "step": 80380 }, { "epoch": 14.33, "learning_rate": 4.283550285306705e-05, "loss": 0.0067, "step": 80390 }, { "epoch": 14.34, "learning_rate": 4.283461126961484e-05, "loss": 0.0025, "step": 80400 }, { "epoch": 14.34, "learning_rate": 4.283371968616263e-05, "loss": 0.003, "step": 80410 }, { "epoch": 14.34, "learning_rate": 4.283282810271041e-05, "loss": 0.0025, "step": 80420 }, { "epoch": 14.34, "learning_rate": 4.2831936519258204e-05, "loss": 0.0017, "step": 80430 }, { "epoch": 14.34, "learning_rate": 4.283104493580599e-05, "loss": 0.0023, "step": 80440 }, { "epoch": 14.35, "learning_rate": 4.283015335235378e-05, "loss": 0.0043, "step": 80450 }, { "epoch": 14.35, "learning_rate": 4.282926176890157e-05, "loss": 0.0019, "step": 80460 }, { "epoch": 14.35, "learning_rate": 4.282837018544936e-05, "loss": 0.0035, "step": 80470 }, { "epoch": 14.35, "learning_rate": 4.2827478601997154e-05, "loss": 0.002, "step": 80480 }, { "epoch": 14.35, "learning_rate": 4.282658701854494e-05, "loss": 0.0037, "step": 80490 }, { "epoch": 14.35, "learning_rate": 4.282569543509273e-05, "loss": 0.0036, "step": 80500 }, { "epoch": 14.36, "learning_rate": 4.2824803851640514e-05, "loss": 0.0041, "step": 80510 }, { "epoch": 14.36, "learning_rate": 4.2823912268188305e-05, "loss": 0.0033, "step": 80520 }, { "epoch": 14.36, "learning_rate": 4.282302068473609e-05, "loss": 0.0038, "step": 80530 }, { "epoch": 14.36, "learning_rate": 4.282212910128388e-05, "loss": 0.0029, "step": 80540 }, { "epoch": 14.36, "learning_rate": 4.282123751783167e-05, "loss": 0.0028, "step": 80550 }, { "epoch": 14.37, "learning_rate": 4.2820345934379456e-05, "loss": 0.002, "step": 80560 }, { "epoch": 14.37, "learning_rate": 4.281945435092725e-05, "loss": 0.0029, "step": 80570 }, { "epoch": 14.37, "learning_rate": 4.281856276747504e-05, "loss": 0.0022, "step": 80580 }, { "epoch": 14.37, "learning_rate": 4.281767118402283e-05, "loss": 0.0021, "step": 80590 }, { "epoch": 14.37, "learning_rate": 4.2816779600570614e-05, "loss": 0.004, "step": 80600 }, { "epoch": 14.37, "learning_rate": 4.2815888017118405e-05, "loss": 0.0022, "step": 80610 }, { "epoch": 14.38, "learning_rate": 4.28149964336662e-05, "loss": 0.0035, "step": 80620 }, { "epoch": 14.38, "learning_rate": 4.281410485021398e-05, "loss": 0.0021, "step": 80630 }, { "epoch": 14.38, "learning_rate": 4.281321326676177e-05, "loss": 0.0028, "step": 80640 }, { "epoch": 14.38, "learning_rate": 4.281232168330956e-05, "loss": 0.0026, "step": 80650 }, { "epoch": 14.38, "learning_rate": 4.281143009985735e-05, "loss": 0.0024, "step": 80660 }, { "epoch": 14.38, "learning_rate": 4.281053851640513e-05, "loss": 0.005, "step": 80670 }, { "epoch": 14.39, "learning_rate": 4.2809646932952924e-05, "loss": 0.0021, "step": 80680 }, { "epoch": 14.39, "learning_rate": 4.2808755349500715e-05, "loss": 0.0019, "step": 80690 }, { "epoch": 14.39, "learning_rate": 4.2807863766048506e-05, "loss": 0.0015, "step": 80700 }, { "epoch": 14.39, "learning_rate": 4.28069721825963e-05, "loss": 0.0018, "step": 80710 }, { "epoch": 14.39, "learning_rate": 4.280608059914408e-05, "loss": 0.0023, "step": 80720 }, { "epoch": 14.4, "learning_rate": 4.280518901569187e-05, "loss": 0.0033, "step": 80730 }, { "epoch": 14.4, "learning_rate": 4.280429743223966e-05, "loss": 0.0012, "step": 80740 }, { "epoch": 14.4, "learning_rate": 4.280340584878745e-05, "loss": 0.0034, "step": 80750 }, { "epoch": 14.4, "learning_rate": 4.280251426533523e-05, "loss": 0.0057, "step": 80760 }, { "epoch": 14.4, "learning_rate": 4.2801622681883024e-05, "loss": 0.002, "step": 80770 }, { "epoch": 14.4, "learning_rate": 4.2800731098430815e-05, "loss": 0.0026, "step": 80780 }, { "epoch": 14.41, "learning_rate": 4.27998395149786e-05, "loss": 0.0038, "step": 80790 }, { "epoch": 14.41, "learning_rate": 4.27989479315264e-05, "loss": 0.0034, "step": 80800 }, { "epoch": 14.41, "learning_rate": 4.279805634807418e-05, "loss": 0.0028, "step": 80810 }, { "epoch": 14.41, "learning_rate": 4.279716476462197e-05, "loss": 0.0024, "step": 80820 }, { "epoch": 14.41, "learning_rate": 4.279627318116976e-05, "loss": 0.0015, "step": 80830 }, { "epoch": 14.42, "learning_rate": 4.279538159771755e-05, "loss": 0.0029, "step": 80840 }, { "epoch": 14.42, "learning_rate": 4.279449001426534e-05, "loss": 0.0022, "step": 80850 }, { "epoch": 14.42, "learning_rate": 4.2793598430813125e-05, "loss": 0.0013, "step": 80860 }, { "epoch": 14.42, "learning_rate": 4.2792706847360916e-05, "loss": 0.004, "step": 80870 }, { "epoch": 14.42, "learning_rate": 4.27918152639087e-05, "loss": 0.0054, "step": 80880 }, { "epoch": 14.42, "learning_rate": 4.279092368045649e-05, "loss": 0.0035, "step": 80890 }, { "epoch": 14.43, "learning_rate": 4.2790032097004276e-05, "loss": 0.0016, "step": 80900 }, { "epoch": 14.43, "learning_rate": 4.2789140513552074e-05, "loss": 0.0018, "step": 80910 }, { "epoch": 14.43, "learning_rate": 4.278824893009986e-05, "loss": 0.0012, "step": 80920 }, { "epoch": 14.43, "learning_rate": 4.278735734664765e-05, "loss": 0.0033, "step": 80930 }, { "epoch": 14.43, "learning_rate": 4.278646576319544e-05, "loss": 0.003, "step": 80940 }, { "epoch": 14.43, "learning_rate": 4.2785574179743225e-05, "loss": 0.0039, "step": 80950 }, { "epoch": 14.44, "learning_rate": 4.2784682596291016e-05, "loss": 0.0058, "step": 80960 }, { "epoch": 14.44, "learning_rate": 4.27837910128388e-05, "loss": 0.0022, "step": 80970 }, { "epoch": 14.44, "learning_rate": 4.278289942938659e-05, "loss": 0.0022, "step": 80980 }, { "epoch": 14.44, "learning_rate": 4.2782007845934377e-05, "loss": 0.002, "step": 80990 }, { "epoch": 14.44, "learning_rate": 4.278111626248217e-05, "loss": 0.0066, "step": 81000 }, { "epoch": 14.45, "learning_rate": 4.278022467902996e-05, "loss": 0.0016, "step": 81010 }, { "epoch": 14.45, "learning_rate": 4.277933309557775e-05, "loss": 0.004, "step": 81020 }, { "epoch": 14.45, "learning_rate": 4.277844151212554e-05, "loss": 0.0043, "step": 81030 }, { "epoch": 14.45, "learning_rate": 4.2777549928673326e-05, "loss": 0.0043, "step": 81040 }, { "epoch": 14.45, "learning_rate": 4.277665834522112e-05, "loss": 0.0036, "step": 81050 }, { "epoch": 14.45, "learning_rate": 4.27757667617689e-05, "loss": 0.0035, "step": 81060 }, { "epoch": 14.46, "learning_rate": 4.277487517831669e-05, "loss": 0.0041, "step": 81070 }, { "epoch": 14.46, "learning_rate": 4.2773983594864484e-05, "loss": 0.0023, "step": 81080 }, { "epoch": 14.46, "learning_rate": 4.277309201141227e-05, "loss": 0.0024, "step": 81090 }, { "epoch": 14.46, "learning_rate": 4.277220042796006e-05, "loss": 0.0052, "step": 81100 }, { "epoch": 14.46, "learning_rate": 4.2771308844507844e-05, "loss": 0.0033, "step": 81110 }, { "epoch": 14.47, "learning_rate": 4.2770417261055635e-05, "loss": 0.0036, "step": 81120 }, { "epoch": 14.47, "learning_rate": 4.2769525677603426e-05, "loss": 0.004, "step": 81130 }, { "epoch": 14.47, "learning_rate": 4.276863409415122e-05, "loss": 0.0032, "step": 81140 }, { "epoch": 14.47, "learning_rate": 4.2767742510699e-05, "loss": 0.002, "step": 81150 }, { "epoch": 14.47, "learning_rate": 4.276685092724679e-05, "loss": 0.0044, "step": 81160 }, { "epoch": 14.47, "learning_rate": 4.2765959343794584e-05, "loss": 0.0017, "step": 81170 }, { "epoch": 14.48, "learning_rate": 4.276506776034237e-05, "loss": 0.0029, "step": 81180 }, { "epoch": 14.48, "learning_rate": 4.276417617689016e-05, "loss": 0.0045, "step": 81190 }, { "epoch": 14.48, "learning_rate": 4.2763284593437944e-05, "loss": 0.0031, "step": 81200 }, { "epoch": 14.48, "learning_rate": 4.2762393009985736e-05, "loss": 0.0017, "step": 81210 }, { "epoch": 14.48, "learning_rate": 4.276150142653352e-05, "loss": 0.003, "step": 81220 }, { "epoch": 14.48, "learning_rate": 4.276060984308131e-05, "loss": 0.0038, "step": 81230 }, { "epoch": 14.49, "learning_rate": 4.27597182596291e-05, "loss": 0.003, "step": 81240 }, { "epoch": 14.49, "learning_rate": 4.2758826676176894e-05, "loss": 0.0031, "step": 81250 }, { "epoch": 14.49, "learning_rate": 4.2757935092724685e-05, "loss": 0.003, "step": 81260 }, { "epoch": 14.49, "learning_rate": 4.275704350927247e-05, "loss": 0.0038, "step": 81270 }, { "epoch": 14.49, "learning_rate": 4.275615192582026e-05, "loss": 0.0053, "step": 81280 }, { "epoch": 14.5, "learning_rate": 4.2755260342368045e-05, "loss": 0.0035, "step": 81290 }, { "epoch": 14.5, "learning_rate": 4.2754368758915836e-05, "loss": 0.0036, "step": 81300 }, { "epoch": 14.5, "learning_rate": 4.275347717546363e-05, "loss": 0.0038, "step": 81310 }, { "epoch": 14.5, "learning_rate": 4.275258559201141e-05, "loss": 0.0021, "step": 81320 }, { "epoch": 14.5, "learning_rate": 4.27516940085592e-05, "loss": 0.0035, "step": 81330 }, { "epoch": 14.5, "learning_rate": 4.275080242510699e-05, "loss": 0.0022, "step": 81340 }, { "epoch": 14.51, "learning_rate": 4.2749910841654786e-05, "loss": 0.0027, "step": 81350 }, { "epoch": 14.51, "learning_rate": 4.274901925820257e-05, "loss": 0.0031, "step": 81360 }, { "epoch": 14.51, "learning_rate": 4.274812767475036e-05, "loss": 0.0031, "step": 81370 }, { "epoch": 14.51, "learning_rate": 4.2747236091298146e-05, "loss": 0.0026, "step": 81380 }, { "epoch": 14.51, "learning_rate": 4.274634450784594e-05, "loss": 0.0031, "step": 81390 }, { "epoch": 14.51, "learning_rate": 4.274545292439373e-05, "loss": 0.0041, "step": 81400 }, { "epoch": 14.52, "learning_rate": 4.274456134094151e-05, "loss": 0.0033, "step": 81410 }, { "epoch": 14.52, "learning_rate": 4.2743669757489304e-05, "loss": 0.0025, "step": 81420 }, { "epoch": 14.52, "learning_rate": 4.274277817403709e-05, "loss": 0.0056, "step": 81430 }, { "epoch": 14.52, "learning_rate": 4.274188659058488e-05, "loss": 0.0016, "step": 81440 }, { "epoch": 14.52, "learning_rate": 4.2740995007132664e-05, "loss": 0.0032, "step": 81450 }, { "epoch": 14.53, "learning_rate": 4.274010342368046e-05, "loss": 0.0015, "step": 81460 }, { "epoch": 14.53, "learning_rate": 4.2739211840228246e-05, "loss": 0.0027, "step": 81470 }, { "epoch": 14.53, "learning_rate": 4.273832025677604e-05, "loss": 0.0013, "step": 81480 }, { "epoch": 14.53, "learning_rate": 4.273742867332383e-05, "loss": 0.0017, "step": 81490 }, { "epoch": 14.53, "learning_rate": 4.273653708987161e-05, "loss": 0.0023, "step": 81500 }, { "epoch": 14.53, "learning_rate": 4.2735645506419404e-05, "loss": 0.0027, "step": 81510 }, { "epoch": 14.54, "learning_rate": 4.273475392296719e-05, "loss": 0.0065, "step": 81520 }, { "epoch": 14.54, "learning_rate": 4.273386233951498e-05, "loss": 0.0014, "step": 81530 }, { "epoch": 14.54, "learning_rate": 4.273297075606277e-05, "loss": 0.003, "step": 81540 }, { "epoch": 14.54, "learning_rate": 4.2732079172610556e-05, "loss": 0.0028, "step": 81550 }, { "epoch": 14.54, "learning_rate": 4.273118758915835e-05, "loss": 0.0027, "step": 81560 }, { "epoch": 14.55, "learning_rate": 4.273029600570614e-05, "loss": 0.0027, "step": 81570 }, { "epoch": 14.55, "learning_rate": 4.272940442225393e-05, "loss": 0.0029, "step": 81580 }, { "epoch": 14.55, "learning_rate": 4.2728512838801714e-05, "loss": 0.0029, "step": 81590 }, { "epoch": 14.55, "learning_rate": 4.2727621255349505e-05, "loss": 0.0031, "step": 81600 }, { "epoch": 14.55, "learning_rate": 4.272672967189729e-05, "loss": 0.0022, "step": 81610 }, { "epoch": 14.55, "learning_rate": 4.272583808844508e-05, "loss": 0.0029, "step": 81620 }, { "epoch": 14.56, "learning_rate": 4.272494650499287e-05, "loss": 0.0022, "step": 81630 }, { "epoch": 14.56, "learning_rate": 4.2724054921540656e-05, "loss": 0.0012, "step": 81640 }, { "epoch": 14.56, "learning_rate": 4.272316333808845e-05, "loss": 0.007, "step": 81650 }, { "epoch": 14.56, "learning_rate": 4.272227175463623e-05, "loss": 0.0022, "step": 81660 }, { "epoch": 14.56, "learning_rate": 4.272138017118402e-05, "loss": 0.0055, "step": 81670 }, { "epoch": 14.56, "learning_rate": 4.2720488587731814e-05, "loss": 0.0014, "step": 81680 }, { "epoch": 14.57, "learning_rate": 4.2719597004279605e-05, "loss": 0.0026, "step": 81690 }, { "epoch": 14.57, "learning_rate": 4.271870542082739e-05, "loss": 0.0024, "step": 81700 }, { "epoch": 14.57, "learning_rate": 4.271781383737518e-05, "loss": 0.0035, "step": 81710 }, { "epoch": 14.57, "learning_rate": 4.271692225392297e-05, "loss": 0.003, "step": 81720 }, { "epoch": 14.57, "learning_rate": 4.271603067047076e-05, "loss": 0.0028, "step": 81730 }, { "epoch": 14.58, "learning_rate": 4.271513908701855e-05, "loss": 0.0023, "step": 81740 }, { "epoch": 14.58, "learning_rate": 4.271424750356633e-05, "loss": 0.0037, "step": 81750 }, { "epoch": 14.58, "learning_rate": 4.2713355920114124e-05, "loss": 0.0041, "step": 81760 }, { "epoch": 14.58, "learning_rate": 4.2712464336661915e-05, "loss": 0.0024, "step": 81770 }, { "epoch": 14.58, "learning_rate": 4.27115727532097e-05, "loss": 0.0071, "step": 81780 }, { "epoch": 14.58, "learning_rate": 4.27106811697575e-05, "loss": 0.0037, "step": 81790 }, { "epoch": 14.59, "learning_rate": 4.270978958630528e-05, "loss": 0.0046, "step": 81800 }, { "epoch": 14.59, "learning_rate": 4.270889800285307e-05, "loss": 0.0034, "step": 81810 }, { "epoch": 14.59, "learning_rate": 4.270800641940086e-05, "loss": 0.0028, "step": 81820 }, { "epoch": 14.59, "learning_rate": 4.270711483594865e-05, "loss": 0.0045, "step": 81830 }, { "epoch": 14.59, "learning_rate": 4.270622325249643e-05, "loss": 0.0057, "step": 81840 }, { "epoch": 14.6, "learning_rate": 4.2705331669044224e-05, "loss": 0.0028, "step": 81850 }, { "epoch": 14.6, "learning_rate": 4.2704440085592015e-05, "loss": 0.0025, "step": 81860 }, { "epoch": 14.6, "learning_rate": 4.27035485021398e-05, "loss": 0.0048, "step": 81870 }, { "epoch": 14.6, "learning_rate": 4.270265691868759e-05, "loss": 0.0013, "step": 81880 }, { "epoch": 14.6, "learning_rate": 4.2701765335235375e-05, "loss": 0.0023, "step": 81890 }, { "epoch": 14.6, "learning_rate": 4.270087375178317e-05, "loss": 0.0021, "step": 81900 }, { "epoch": 14.61, "learning_rate": 4.269998216833096e-05, "loss": 0.0045, "step": 81910 }, { "epoch": 14.61, "learning_rate": 4.269909058487875e-05, "loss": 0.0023, "step": 81920 }, { "epoch": 14.61, "learning_rate": 4.2698199001426533e-05, "loss": 0.0033, "step": 81930 }, { "epoch": 14.61, "learning_rate": 4.2697307417974325e-05, "loss": 0.0026, "step": 81940 }, { "epoch": 14.61, "learning_rate": 4.2696415834522116e-05, "loss": 0.0028, "step": 81950 }, { "epoch": 14.61, "learning_rate": 4.26955242510699e-05, "loss": 0.0031, "step": 81960 }, { "epoch": 14.62, "learning_rate": 4.269463266761769e-05, "loss": 0.0043, "step": 81970 }, { "epoch": 14.62, "learning_rate": 4.2693741084165476e-05, "loss": 0.0015, "step": 81980 }, { "epoch": 14.62, "learning_rate": 4.269284950071327e-05, "loss": 0.0027, "step": 81990 }, { "epoch": 14.62, "learning_rate": 4.269195791726106e-05, "loss": 0.0033, "step": 82000 }, { "epoch": 14.62, "learning_rate": 4.269106633380885e-05, "loss": 0.0032, "step": 82010 }, { "epoch": 14.63, "learning_rate": 4.269017475035664e-05, "loss": 0.002, "step": 82020 }, { "epoch": 14.63, "learning_rate": 4.2689283166904425e-05, "loss": 0.0033, "step": 82030 }, { "epoch": 14.63, "learning_rate": 4.2688391583452216e-05, "loss": 0.0017, "step": 82040 }, { "epoch": 14.63, "learning_rate": 4.26875e-05, "loss": 0.0038, "step": 82050 }, { "epoch": 14.63, "learning_rate": 4.268660841654779e-05, "loss": 0.0061, "step": 82060 }, { "epoch": 14.63, "learning_rate": 4.2685716833095576e-05, "loss": 0.0037, "step": 82070 }, { "epoch": 14.64, "learning_rate": 4.268482524964337e-05, "loss": 0.0034, "step": 82080 }, { "epoch": 14.64, "learning_rate": 4.268393366619116e-05, "loss": 0.0029, "step": 82090 }, { "epoch": 14.64, "learning_rate": 4.268304208273894e-05, "loss": 0.0026, "step": 82100 }, { "epoch": 14.64, "learning_rate": 4.2682150499286735e-05, "loss": 0.0024, "step": 82110 }, { "epoch": 14.64, "learning_rate": 4.2681258915834526e-05, "loss": 0.0035, "step": 82120 }, { "epoch": 14.65, "learning_rate": 4.268036733238232e-05, "loss": 0.0027, "step": 82130 }, { "epoch": 14.65, "learning_rate": 4.26794757489301e-05, "loss": 0.005, "step": 82140 }, { "epoch": 14.65, "learning_rate": 4.267858416547789e-05, "loss": 0.0025, "step": 82150 }, { "epoch": 14.65, "learning_rate": 4.267769258202568e-05, "loss": 0.0013, "step": 82160 }, { "epoch": 14.65, "learning_rate": 4.267680099857347e-05, "loss": 0.0026, "step": 82170 }, { "epoch": 14.65, "learning_rate": 4.267590941512126e-05, "loss": 0.0036, "step": 82180 }, { "epoch": 14.66, "learning_rate": 4.2675017831669044e-05, "loss": 0.0014, "step": 82190 }, { "epoch": 14.66, "learning_rate": 4.2674126248216835e-05, "loss": 0.0024, "step": 82200 }, { "epoch": 14.66, "learning_rate": 4.267323466476462e-05, "loss": 0.0022, "step": 82210 }, { "epoch": 14.66, "learning_rate": 4.267234308131241e-05, "loss": 0.0026, "step": 82220 }, { "epoch": 14.66, "learning_rate": 4.26714514978602e-05, "loss": 0.0033, "step": 82230 }, { "epoch": 14.66, "learning_rate": 4.267055991440799e-05, "loss": 0.0025, "step": 82240 }, { "epoch": 14.67, "learning_rate": 4.266966833095578e-05, "loss": 0.0028, "step": 82250 }, { "epoch": 14.67, "learning_rate": 4.266877674750357e-05, "loss": 0.0028, "step": 82260 }, { "epoch": 14.67, "learning_rate": 4.266788516405136e-05, "loss": 0.0041, "step": 82270 }, { "epoch": 14.67, "learning_rate": 4.2666993580599144e-05, "loss": 0.0048, "step": 82280 }, { "epoch": 14.67, "learning_rate": 4.2666101997146936e-05, "loss": 0.0036, "step": 82290 }, { "epoch": 14.68, "learning_rate": 4.266521041369472e-05, "loss": 0.0035, "step": 82300 }, { "epoch": 14.68, "learning_rate": 4.266431883024251e-05, "loss": 0.0053, "step": 82310 }, { "epoch": 14.68, "learning_rate": 4.26634272467903e-05, "loss": 0.0057, "step": 82320 }, { "epoch": 14.68, "learning_rate": 4.266253566333809e-05, "loss": 0.0032, "step": 82330 }, { "epoch": 14.68, "learning_rate": 4.2661644079885885e-05, "loss": 0.0042, "step": 82340 }, { "epoch": 14.68, "learning_rate": 4.266075249643367e-05, "loss": 0.0037, "step": 82350 }, { "epoch": 14.69, "learning_rate": 4.265986091298146e-05, "loss": 0.0012, "step": 82360 }, { "epoch": 14.69, "learning_rate": 4.2658969329529245e-05, "loss": 0.0027, "step": 82370 }, { "epoch": 14.69, "learning_rate": 4.2658077746077036e-05, "loss": 0.0033, "step": 82380 }, { "epoch": 14.69, "learning_rate": 4.265718616262482e-05, "loss": 0.0021, "step": 82390 }, { "epoch": 14.69, "learning_rate": 4.265629457917261e-05, "loss": 0.0032, "step": 82400 }, { "epoch": 14.7, "learning_rate": 4.26554029957204e-05, "loss": 0.0054, "step": 82410 }, { "epoch": 14.7, "learning_rate": 4.265451141226819e-05, "loss": 0.0024, "step": 82420 }, { "epoch": 14.7, "learning_rate": 4.265361982881598e-05, "loss": 0.003, "step": 82430 }, { "epoch": 14.7, "learning_rate": 4.265272824536376e-05, "loss": 0.0024, "step": 82440 }, { "epoch": 14.7, "learning_rate": 4.265183666191156e-05, "loss": 0.003, "step": 82450 }, { "epoch": 14.7, "learning_rate": 4.2650945078459346e-05, "loss": 0.0037, "step": 82460 }, { "epoch": 14.71, "learning_rate": 4.265005349500714e-05, "loss": 0.0026, "step": 82470 }, { "epoch": 14.71, "learning_rate": 4.264916191155492e-05, "loss": 0.0014, "step": 82480 }, { "epoch": 14.71, "learning_rate": 4.264827032810271e-05, "loss": 0.0045, "step": 82490 }, { "epoch": 14.71, "learning_rate": 4.2647378744650504e-05, "loss": 0.0037, "step": 82500 }, { "epoch": 14.71, "learning_rate": 4.264648716119829e-05, "loss": 0.0034, "step": 82510 }, { "epoch": 14.71, "learning_rate": 4.264559557774608e-05, "loss": 0.0028, "step": 82520 }, { "epoch": 14.72, "learning_rate": 4.2644703994293864e-05, "loss": 0.0034, "step": 82530 }, { "epoch": 14.72, "learning_rate": 4.2643812410841655e-05, "loss": 0.0022, "step": 82540 }, { "epoch": 14.72, "learning_rate": 4.2642920827389446e-05, "loss": 0.002, "step": 82550 }, { "epoch": 14.72, "learning_rate": 4.264202924393724e-05, "loss": 0.0029, "step": 82560 }, { "epoch": 14.72, "learning_rate": 4.264113766048503e-05, "loss": 0.0018, "step": 82570 }, { "epoch": 14.73, "learning_rate": 4.264024607703281e-05, "loss": 0.0017, "step": 82580 }, { "epoch": 14.73, "learning_rate": 4.2639354493580604e-05, "loss": 0.0045, "step": 82590 }, { "epoch": 14.73, "learning_rate": 4.263846291012839e-05, "loss": 0.0048, "step": 82600 }, { "epoch": 14.73, "learning_rate": 4.263757132667618e-05, "loss": 0.0037, "step": 82610 }, { "epoch": 14.73, "learning_rate": 4.2636679743223964e-05, "loss": 0.0027, "step": 82620 }, { "epoch": 14.73, "learning_rate": 4.2635788159771755e-05, "loss": 0.007, "step": 82630 }, { "epoch": 14.74, "learning_rate": 4.263489657631955e-05, "loss": 0.0025, "step": 82640 }, { "epoch": 14.74, "learning_rate": 4.263400499286733e-05, "loss": 0.0024, "step": 82650 }, { "epoch": 14.74, "learning_rate": 4.263311340941512e-05, "loss": 0.0046, "step": 82660 }, { "epoch": 14.74, "learning_rate": 4.2632221825962914e-05, "loss": 0.0015, "step": 82670 }, { "epoch": 14.74, "learning_rate": 4.263141940085592e-05, "loss": 0.0052, "step": 82680 }, { "epoch": 14.75, "learning_rate": 4.2630527817403706e-05, "loss": 0.0042, "step": 82690 }, { "epoch": 14.75, "learning_rate": 4.26296362339515e-05, "loss": 0.0032, "step": 82700 }, { "epoch": 14.75, "learning_rate": 4.262874465049929e-05, "loss": 0.0025, "step": 82710 }, { "epoch": 14.75, "learning_rate": 4.262785306704708e-05, "loss": 0.0047, "step": 82720 }, { "epoch": 14.75, "learning_rate": 4.262696148359487e-05, "loss": 0.0047, "step": 82730 }, { "epoch": 14.75, "learning_rate": 4.2626069900142655e-05, "loss": 0.0027, "step": 82740 }, { "epoch": 14.76, "learning_rate": 4.2625178316690446e-05, "loss": 0.0016, "step": 82750 }, { "epoch": 14.76, "learning_rate": 4.262428673323823e-05, "loss": 0.0038, "step": 82760 }, { "epoch": 14.76, "learning_rate": 4.262339514978602e-05, "loss": 0.0027, "step": 82770 }, { "epoch": 14.76, "learning_rate": 4.262250356633381e-05, "loss": 0.002, "step": 82780 }, { "epoch": 14.76, "learning_rate": 4.26216119828816e-05, "loss": 0.0071, "step": 82790 }, { "epoch": 14.76, "learning_rate": 4.262072039942939e-05, "loss": 0.0049, "step": 82800 }, { "epoch": 14.77, "learning_rate": 4.2619828815977173e-05, "loss": 0.0039, "step": 82810 }, { "epoch": 14.77, "learning_rate": 4.2618937232524965e-05, "loss": 0.0019, "step": 82820 }, { "epoch": 14.77, "learning_rate": 4.2618045649072756e-05, "loss": 0.0035, "step": 82830 }, { "epoch": 14.77, "learning_rate": 4.261715406562055e-05, "loss": 0.0035, "step": 82840 }, { "epoch": 14.77, "learning_rate": 4.261626248216833e-05, "loss": 0.0033, "step": 82850 }, { "epoch": 14.78, "learning_rate": 4.261537089871612e-05, "loss": 0.0042, "step": 82860 }, { "epoch": 14.78, "learning_rate": 4.2614479315263914e-05, "loss": 0.0051, "step": 82870 }, { "epoch": 14.78, "learning_rate": 4.26135877318117e-05, "loss": 0.0048, "step": 82880 }, { "epoch": 14.78, "learning_rate": 4.261269614835949e-05, "loss": 0.0037, "step": 82890 }, { "epoch": 14.78, "learning_rate": 4.2611804564907274e-05, "loss": 0.0033, "step": 82900 }, { "epoch": 14.78, "learning_rate": 4.2610912981455065e-05, "loss": 0.0029, "step": 82910 }, { "epoch": 14.79, "learning_rate": 4.261002139800285e-05, "loss": 0.0025, "step": 82920 }, { "epoch": 14.79, "learning_rate": 4.260912981455064e-05, "loss": 0.0018, "step": 82930 }, { "epoch": 14.79, "learning_rate": 4.260823823109843e-05, "loss": 0.003, "step": 82940 }, { "epoch": 14.79, "learning_rate": 4.260734664764622e-05, "loss": 0.0028, "step": 82950 }, { "epoch": 14.79, "learning_rate": 4.2606455064194014e-05, "loss": 0.0027, "step": 82960 }, { "epoch": 14.79, "learning_rate": 4.26055634807418e-05, "loss": 0.0061, "step": 82970 }, { "epoch": 14.8, "learning_rate": 4.260467189728959e-05, "loss": 0.0042, "step": 82980 }, { "epoch": 14.8, "learning_rate": 4.2603780313837375e-05, "loss": 0.0039, "step": 82990 }, { "epoch": 14.8, "learning_rate": 4.2602888730385166e-05, "loss": 0.0035, "step": 83000 }, { "epoch": 14.8, "learning_rate": 4.260199714693296e-05, "loss": 0.0041, "step": 83010 }, { "epoch": 14.8, "learning_rate": 4.260110556348074e-05, "loss": 0.0015, "step": 83020 }, { "epoch": 14.81, "learning_rate": 4.260021398002853e-05, "loss": 0.0011, "step": 83030 }, { "epoch": 14.81, "learning_rate": 4.259932239657632e-05, "loss": 0.0023, "step": 83040 }, { "epoch": 14.81, "learning_rate": 4.2598430813124115e-05, "loss": 0.0047, "step": 83050 }, { "epoch": 14.81, "learning_rate": 4.25975392296719e-05, "loss": 0.0032, "step": 83060 }, { "epoch": 14.81, "learning_rate": 4.259664764621969e-05, "loss": 0.0027, "step": 83070 }, { "epoch": 14.81, "learning_rate": 4.2595756062767475e-05, "loss": 0.0045, "step": 83080 }, { "epoch": 14.82, "learning_rate": 4.2594864479315266e-05, "loss": 0.003, "step": 83090 }, { "epoch": 14.82, "learning_rate": 4.259397289586306e-05, "loss": 0.0029, "step": 83100 }, { "epoch": 14.82, "learning_rate": 4.259308131241084e-05, "loss": 0.0026, "step": 83110 }, { "epoch": 14.82, "learning_rate": 4.259218972895863e-05, "loss": 0.0027, "step": 83120 }, { "epoch": 14.82, "learning_rate": 4.259129814550642e-05, "loss": 0.0036, "step": 83130 }, { "epoch": 14.83, "learning_rate": 4.259040656205421e-05, "loss": 0.0051, "step": 83140 }, { "epoch": 14.83, "learning_rate": 4.258951497860199e-05, "loss": 0.0026, "step": 83150 }, { "epoch": 14.83, "learning_rate": 4.258862339514979e-05, "loss": 0.0074, "step": 83160 }, { "epoch": 14.83, "learning_rate": 4.2587731811697576e-05, "loss": 0.0066, "step": 83170 }, { "epoch": 14.83, "learning_rate": 4.258684022824537e-05, "loss": 0.0016, "step": 83180 }, { "epoch": 14.83, "learning_rate": 4.258594864479316e-05, "loss": 0.0047, "step": 83190 }, { "epoch": 14.84, "learning_rate": 4.258505706134094e-05, "loss": 0.0041, "step": 83200 }, { "epoch": 14.84, "learning_rate": 4.2584165477888734e-05, "loss": 0.0018, "step": 83210 }, { "epoch": 14.84, "learning_rate": 4.258327389443652e-05, "loss": 0.0028, "step": 83220 }, { "epoch": 14.84, "learning_rate": 4.258238231098431e-05, "loss": 0.0031, "step": 83230 }, { "epoch": 14.84, "learning_rate": 4.25814907275321e-05, "loss": 0.0027, "step": 83240 }, { "epoch": 14.84, "learning_rate": 4.2580599144079885e-05, "loss": 0.0036, "step": 83250 }, { "epoch": 14.85, "learning_rate": 4.2579707560627676e-05, "loss": 0.0038, "step": 83260 }, { "epoch": 14.85, "learning_rate": 4.257881597717547e-05, "loss": 0.0034, "step": 83270 }, { "epoch": 14.85, "learning_rate": 4.257792439372326e-05, "loss": 0.0016, "step": 83280 }, { "epoch": 14.85, "learning_rate": 4.257703281027104e-05, "loss": 0.0041, "step": 83290 }, { "epoch": 14.85, "learning_rate": 4.2576141226818834e-05, "loss": 0.0033, "step": 83300 }, { "epoch": 14.86, "learning_rate": 4.257524964336662e-05, "loss": 0.0039, "step": 83310 }, { "epoch": 14.86, "learning_rate": 4.257435805991441e-05, "loss": 0.0027, "step": 83320 }, { "epoch": 14.86, "learning_rate": 4.25734664764622e-05, "loss": 0.0067, "step": 83330 }, { "epoch": 14.86, "learning_rate": 4.2572574893009986e-05, "loss": 0.0048, "step": 83340 }, { "epoch": 14.86, "learning_rate": 4.257168330955778e-05, "loss": 0.004, "step": 83350 }, { "epoch": 14.86, "learning_rate": 4.257079172610556e-05, "loss": 0.0036, "step": 83360 }, { "epoch": 14.87, "learning_rate": 4.256990014265335e-05, "loss": 0.0051, "step": 83370 }, { "epoch": 14.87, "learning_rate": 4.2569008559201144e-05, "loss": 0.0037, "step": 83380 }, { "epoch": 14.87, "learning_rate": 4.2568116975748935e-05, "loss": 0.0043, "step": 83390 }, { "epoch": 14.87, "learning_rate": 4.256722539229672e-05, "loss": 0.0017, "step": 83400 }, { "epoch": 14.87, "learning_rate": 4.256633380884451e-05, "loss": 0.0029, "step": 83410 }, { "epoch": 14.88, "learning_rate": 4.25654422253923e-05, "loss": 0.004, "step": 83420 }, { "epoch": 14.88, "learning_rate": 4.2564550641940086e-05, "loss": 0.0022, "step": 83430 }, { "epoch": 14.88, "learning_rate": 4.256365905848788e-05, "loss": 0.002, "step": 83440 }, { "epoch": 14.88, "learning_rate": 4.256276747503566e-05, "loss": 0.0036, "step": 83450 }, { "epoch": 14.88, "learning_rate": 4.256187589158345e-05, "loss": 0.0032, "step": 83460 }, { "epoch": 14.88, "learning_rate": 4.2560984308131244e-05, "loss": 0.0061, "step": 83470 }, { "epoch": 14.89, "learning_rate": 4.256009272467903e-05, "loss": 0.002, "step": 83480 }, { "epoch": 14.89, "learning_rate": 4.255920114122682e-05, "loss": 0.0084, "step": 83490 }, { "epoch": 14.89, "learning_rate": 4.255830955777461e-05, "loss": 0.002, "step": 83500 }, { "epoch": 14.89, "learning_rate": 4.25574179743224e-05, "loss": 0.0029, "step": 83510 }, { "epoch": 14.89, "learning_rate": 4.255652639087019e-05, "loss": 0.0039, "step": 83520 }, { "epoch": 14.89, "learning_rate": 4.255563480741798e-05, "loss": 0.0042, "step": 83530 }, { "epoch": 14.9, "learning_rate": 4.255474322396576e-05, "loss": 0.0033, "step": 83540 }, { "epoch": 14.9, "learning_rate": 4.2553851640513554e-05, "loss": 0.0055, "step": 83550 }, { "epoch": 14.9, "learning_rate": 4.2552960057061345e-05, "loss": 0.0031, "step": 83560 }, { "epoch": 14.9, "learning_rate": 4.255206847360913e-05, "loss": 0.0019, "step": 83570 }, { "epoch": 14.9, "learning_rate": 4.255117689015692e-05, "loss": 0.0024, "step": 83580 }, { "epoch": 14.91, "learning_rate": 4.2550285306704705e-05, "loss": 0.004, "step": 83590 }, { "epoch": 14.91, "learning_rate": 4.25493937232525e-05, "loss": 0.0037, "step": 83600 }, { "epoch": 14.91, "learning_rate": 4.254850213980029e-05, "loss": 0.0023, "step": 83610 }, { "epoch": 14.91, "learning_rate": 4.254761055634808e-05, "loss": 0.0039, "step": 83620 }, { "epoch": 14.91, "learning_rate": 4.254671897289586e-05, "loss": 0.0041, "step": 83630 }, { "epoch": 14.91, "learning_rate": 4.2545827389443654e-05, "loss": 0.0042, "step": 83640 }, { "epoch": 14.92, "learning_rate": 4.2544935805991445e-05, "loss": 0.0032, "step": 83650 }, { "epoch": 14.92, "learning_rate": 4.254404422253923e-05, "loss": 0.003, "step": 83660 }, { "epoch": 14.92, "learning_rate": 4.254315263908702e-05, "loss": 0.0015, "step": 83670 }, { "epoch": 14.92, "learning_rate": 4.2542261055634805e-05, "loss": 0.0029, "step": 83680 }, { "epoch": 14.92, "learning_rate": 4.25413694721826e-05, "loss": 0.0026, "step": 83690 }, { "epoch": 14.93, "learning_rate": 4.254047788873039e-05, "loss": 0.0026, "step": 83700 }, { "epoch": 14.93, "learning_rate": 4.253958630527818e-05, "loss": 0.0046, "step": 83710 }, { "epoch": 14.93, "learning_rate": 4.2538694721825963e-05, "loss": 0.0029, "step": 83720 }, { "epoch": 14.93, "learning_rate": 4.2537803138373755e-05, "loss": 0.0037, "step": 83730 }, { "epoch": 14.93, "learning_rate": 4.2536911554921546e-05, "loss": 0.0047, "step": 83740 }, { "epoch": 14.93, "learning_rate": 4.253601997146933e-05, "loss": 0.0044, "step": 83750 }, { "epoch": 14.94, "learning_rate": 4.253512838801712e-05, "loss": 0.003, "step": 83760 }, { "epoch": 14.94, "learning_rate": 4.2534236804564906e-05, "loss": 0.0038, "step": 83770 }, { "epoch": 14.94, "learning_rate": 4.25333452211127e-05, "loss": 0.0038, "step": 83780 }, { "epoch": 14.94, "learning_rate": 4.253245363766049e-05, "loss": 0.0026, "step": 83790 }, { "epoch": 14.94, "learning_rate": 4.253156205420827e-05, "loss": 0.0021, "step": 83800 }, { "epoch": 14.94, "learning_rate": 4.2530670470756064e-05, "loss": 0.0032, "step": 83810 }, { "epoch": 14.95, "learning_rate": 4.2529778887303855e-05, "loss": 0.003, "step": 83820 }, { "epoch": 14.95, "learning_rate": 4.2528887303851646e-05, "loss": 0.0038, "step": 83830 }, { "epoch": 14.95, "learning_rate": 4.252799572039943e-05, "loss": 0.0038, "step": 83840 }, { "epoch": 14.95, "learning_rate": 4.252710413694722e-05, "loss": 0.0014, "step": 83850 }, { "epoch": 14.95, "learning_rate": 4.2526212553495007e-05, "loss": 0.0027, "step": 83860 }, { "epoch": 14.96, "learning_rate": 4.25253209700428e-05, "loss": 0.0028, "step": 83870 }, { "epoch": 14.96, "learning_rate": 4.252442938659059e-05, "loss": 0.003, "step": 83880 }, { "epoch": 14.96, "learning_rate": 4.252353780313837e-05, "loss": 0.0029, "step": 83890 }, { "epoch": 14.96, "learning_rate": 4.2522646219686165e-05, "loss": 0.0024, "step": 83900 }, { "epoch": 14.96, "learning_rate": 4.252175463623395e-05, "loss": 0.0024, "step": 83910 }, { "epoch": 14.96, "learning_rate": 4.252086305278174e-05, "loss": 0.0032, "step": 83920 }, { "epoch": 14.97, "learning_rate": 4.251997146932953e-05, "loss": 0.0035, "step": 83930 }, { "epoch": 14.97, "learning_rate": 4.251907988587732e-05, "loss": 0.0018, "step": 83940 }, { "epoch": 14.97, "learning_rate": 4.251818830242511e-05, "loss": 0.0019, "step": 83950 }, { "epoch": 14.97, "learning_rate": 4.25172967189729e-05, "loss": 0.0038, "step": 83960 }, { "epoch": 14.97, "learning_rate": 4.251640513552069e-05, "loss": 0.0033, "step": 83970 }, { "epoch": 14.98, "learning_rate": 4.2515513552068474e-05, "loss": 0.0019, "step": 83980 }, { "epoch": 14.98, "learning_rate": 4.2514621968616265e-05, "loss": 0.0031, "step": 83990 }, { "epoch": 14.98, "learning_rate": 4.251373038516405e-05, "loss": 0.0033, "step": 84000 }, { "epoch": 14.98, "learning_rate": 4.251283880171184e-05, "loss": 0.0038, "step": 84010 }, { "epoch": 14.98, "learning_rate": 4.251194721825963e-05, "loss": 0.0016, "step": 84020 }, { "epoch": 14.98, "learning_rate": 4.2511055634807416e-05, "loss": 0.0019, "step": 84030 }, { "epoch": 14.99, "learning_rate": 4.2510164051355214e-05, "loss": 0.0012, "step": 84040 }, { "epoch": 14.99, "learning_rate": 4.2509272467903e-05, "loss": 0.004, "step": 84050 }, { "epoch": 14.99, "learning_rate": 4.250838088445079e-05, "loss": 0.0031, "step": 84060 }, { "epoch": 14.99, "learning_rate": 4.2507489300998575e-05, "loss": 0.0016, "step": 84070 }, { "epoch": 14.99, "learning_rate": 4.2506597717546366e-05, "loss": 0.0041, "step": 84080 }, { "epoch": 14.99, "learning_rate": 4.250570613409415e-05, "loss": 0.0038, "step": 84090 }, { "epoch": 15.0, "learning_rate": 4.250481455064194e-05, "loss": 0.0051, "step": 84100 }, { "epoch": 15.0, "learning_rate": 4.250392296718973e-05, "loss": 0.0018, "step": 84110 }, { "epoch": 15.0, "learning_rate": 4.250303138373752e-05, "loss": 0.0056, "step": 84120 }, { "epoch": 15.0, "eval_loss": 0.020025817677378654, "eval_runtime": 195.701, "eval_samples_per_second": 23.705, "eval_steps_per_second": 2.964, "step": 84120 }, { "epoch": 15.0, "learning_rate": 4.250213980028531e-05, "loss": 0.0035, "step": 84130 }, { "epoch": 15.0, "learning_rate": 4.250124821683309e-05, "loss": 0.0013, "step": 84140 }, { "epoch": 15.01, "learning_rate": 4.250035663338089e-05, "loss": 0.003, "step": 84150 }, { "epoch": 15.01, "learning_rate": 4.2499465049928675e-05, "loss": 0.0035, "step": 84160 }, { "epoch": 15.01, "learning_rate": 4.2498573466476466e-05, "loss": 0.002, "step": 84170 }, { "epoch": 15.01, "learning_rate": 4.249768188302425e-05, "loss": 0.0025, "step": 84180 }, { "epoch": 15.01, "learning_rate": 4.249679029957204e-05, "loss": 0.0027, "step": 84190 }, { "epoch": 15.01, "learning_rate": 4.249589871611983e-05, "loss": 0.0013, "step": 84200 }, { "epoch": 15.02, "learning_rate": 4.249500713266762e-05, "loss": 0.0041, "step": 84210 }, { "epoch": 15.02, "learning_rate": 4.249411554921541e-05, "loss": 0.0025, "step": 84220 }, { "epoch": 15.02, "learning_rate": 4.249322396576319e-05, "loss": 0.0018, "step": 84230 }, { "epoch": 15.02, "learning_rate": 4.2492332382310984e-05, "loss": 0.0007, "step": 84240 }, { "epoch": 15.02, "learning_rate": 4.2491440798858776e-05, "loss": 0.0019, "step": 84250 }, { "epoch": 15.02, "learning_rate": 4.249054921540657e-05, "loss": 0.0019, "step": 84260 }, { "epoch": 15.03, "learning_rate": 4.248965763195436e-05, "loss": 0.0021, "step": 84270 }, { "epoch": 15.03, "learning_rate": 4.248876604850214e-05, "loss": 0.0065, "step": 84280 }, { "epoch": 15.03, "learning_rate": 4.2487874465049934e-05, "loss": 0.0015, "step": 84290 }, { "epoch": 15.03, "learning_rate": 4.248698288159772e-05, "loss": 0.0028, "step": 84300 }, { "epoch": 15.03, "learning_rate": 4.248609129814551e-05, "loss": 0.0026, "step": 84310 }, { "epoch": 15.04, "learning_rate": 4.2485199714693294e-05, "loss": 0.0018, "step": 84320 }, { "epoch": 15.04, "learning_rate": 4.2484308131241085e-05, "loss": 0.0039, "step": 84330 }, { "epoch": 15.04, "learning_rate": 4.2483416547788876e-05, "loss": 0.0019, "step": 84340 }, { "epoch": 15.04, "learning_rate": 4.248252496433666e-05, "loss": 0.004, "step": 84350 }, { "epoch": 15.04, "learning_rate": 4.248163338088445e-05, "loss": 0.002, "step": 84360 }, { "epoch": 15.04, "learning_rate": 4.248074179743224e-05, "loss": 0.0034, "step": 84370 }, { "epoch": 15.05, "learning_rate": 4.2479850213980034e-05, "loss": 0.0022, "step": 84380 }, { "epoch": 15.05, "learning_rate": 4.247895863052782e-05, "loss": 0.001, "step": 84390 }, { "epoch": 15.05, "learning_rate": 4.247806704707561e-05, "loss": 0.0029, "step": 84400 }, { "epoch": 15.05, "learning_rate": 4.2477175463623394e-05, "loss": 0.0037, "step": 84410 }, { "epoch": 15.05, "learning_rate": 4.2476283880171186e-05, "loss": 0.0013, "step": 84420 }, { "epoch": 15.06, "learning_rate": 4.247539229671898e-05, "loss": 0.003, "step": 84430 }, { "epoch": 15.06, "learning_rate": 4.247450071326676e-05, "loss": 0.0017, "step": 84440 }, { "epoch": 15.06, "learning_rate": 4.247360912981455e-05, "loss": 0.004, "step": 84450 }, { "epoch": 15.06, "learning_rate": 4.247271754636234e-05, "loss": 0.0018, "step": 84460 }, { "epoch": 15.06, "learning_rate": 4.247182596291013e-05, "loss": 0.0023, "step": 84470 }, { "epoch": 15.06, "learning_rate": 4.247093437945792e-05, "loss": 0.0028, "step": 84480 }, { "epoch": 15.07, "learning_rate": 4.247004279600571e-05, "loss": 0.0017, "step": 84490 }, { "epoch": 15.07, "learning_rate": 4.24691512125535e-05, "loss": 0.0011, "step": 84500 }, { "epoch": 15.07, "learning_rate": 4.2468259629101286e-05, "loss": 0.0023, "step": 84510 }, { "epoch": 15.07, "learning_rate": 4.246736804564908e-05, "loss": 0.0038, "step": 84520 }, { "epoch": 15.07, "learning_rate": 4.246647646219686e-05, "loss": 0.002, "step": 84530 }, { "epoch": 15.07, "learning_rate": 4.246558487874465e-05, "loss": 0.0067, "step": 84540 }, { "epoch": 15.08, "learning_rate": 4.246469329529244e-05, "loss": 0.0035, "step": 84550 }, { "epoch": 15.08, "learning_rate": 4.246380171184023e-05, "loss": 0.0036, "step": 84560 }, { "epoch": 15.08, "learning_rate": 4.246291012838802e-05, "loss": 0.0051, "step": 84570 }, { "epoch": 15.08, "learning_rate": 4.2462018544935804e-05, "loss": 0.0036, "step": 84580 }, { "epoch": 15.08, "learning_rate": 4.24611269614836e-05, "loss": 0.0044, "step": 84590 }, { "epoch": 15.09, "learning_rate": 4.246023537803139e-05, "loss": 0.0034, "step": 84600 }, { "epoch": 15.09, "learning_rate": 4.245934379457918e-05, "loss": 0.0036, "step": 84610 }, { "epoch": 15.09, "learning_rate": 4.245845221112696e-05, "loss": 0.003, "step": 84620 }, { "epoch": 15.09, "learning_rate": 4.2457560627674754e-05, "loss": 0.0023, "step": 84630 }, { "epoch": 15.09, "learning_rate": 4.245666904422254e-05, "loss": 0.0028, "step": 84640 }, { "epoch": 15.09, "learning_rate": 4.245577746077033e-05, "loss": 0.0028, "step": 84650 }, { "epoch": 15.1, "learning_rate": 4.245488587731812e-05, "loss": 0.0037, "step": 84660 }, { "epoch": 15.1, "learning_rate": 4.2453994293865905e-05, "loss": 0.0034, "step": 84670 }, { "epoch": 15.1, "learning_rate": 4.2453102710413696e-05, "loss": 0.0023, "step": 84680 }, { "epoch": 15.1, "learning_rate": 4.245221112696148e-05, "loss": 0.0023, "step": 84690 }, { "epoch": 15.1, "learning_rate": 4.245131954350928e-05, "loss": 0.0025, "step": 84700 }, { "epoch": 15.11, "learning_rate": 4.245042796005706e-05, "loss": 0.0047, "step": 84710 }, { "epoch": 15.11, "learning_rate": 4.2449536376604854e-05, "loss": 0.0019, "step": 84720 }, { "epoch": 15.11, "learning_rate": 4.2448644793152645e-05, "loss": 0.0028, "step": 84730 }, { "epoch": 15.11, "learning_rate": 4.244775320970043e-05, "loss": 0.0047, "step": 84740 }, { "epoch": 15.11, "learning_rate": 4.244686162624822e-05, "loss": 0.002, "step": 84750 }, { "epoch": 15.11, "learning_rate": 4.2445970042796005e-05, "loss": 0.0018, "step": 84760 }, { "epoch": 15.12, "learning_rate": 4.2445078459343797e-05, "loss": 0.0023, "step": 84770 }, { "epoch": 15.12, "learning_rate": 4.244418687589158e-05, "loss": 0.0021, "step": 84780 }, { "epoch": 15.12, "learning_rate": 4.244329529243937e-05, "loss": 0.0025, "step": 84790 }, { "epoch": 15.12, "learning_rate": 4.2442403708987163e-05, "loss": 0.0029, "step": 84800 }, { "epoch": 15.12, "learning_rate": 4.2441512125534955e-05, "loss": 0.0022, "step": 84810 }, { "epoch": 15.12, "learning_rate": 4.2440620542082746e-05, "loss": 0.0021, "step": 84820 }, { "epoch": 15.13, "learning_rate": 4.243972895863053e-05, "loss": 0.0027, "step": 84830 }, { "epoch": 15.13, "learning_rate": 4.243883737517832e-05, "loss": 0.0029, "step": 84840 }, { "epoch": 15.13, "learning_rate": 4.2437945791726106e-05, "loss": 0.007, "step": 84850 }, { "epoch": 15.13, "learning_rate": 4.24370542082739e-05, "loss": 0.0012, "step": 84860 }, { "epoch": 15.13, "learning_rate": 4.243616262482168e-05, "loss": 0.0031, "step": 84870 }, { "epoch": 15.14, "learning_rate": 4.243527104136947e-05, "loss": 0.0027, "step": 84880 }, { "epoch": 15.14, "learning_rate": 4.2434379457917264e-05, "loss": 0.0022, "step": 84890 }, { "epoch": 15.14, "learning_rate": 4.243348787446505e-05, "loss": 0.0016, "step": 84900 }, { "epoch": 15.14, "learning_rate": 4.243259629101284e-05, "loss": 0.0029, "step": 84910 }, { "epoch": 15.14, "learning_rate": 4.243170470756063e-05, "loss": 0.0017, "step": 84920 }, { "epoch": 15.14, "learning_rate": 4.243081312410842e-05, "loss": 0.0035, "step": 84930 }, { "epoch": 15.15, "learning_rate": 4.2429921540656206e-05, "loss": 0.0044, "step": 84940 }, { "epoch": 15.15, "learning_rate": 4.2429029957204e-05, "loss": 0.0024, "step": 84950 }, { "epoch": 15.15, "learning_rate": 4.242813837375179e-05, "loss": 0.0023, "step": 84960 }, { "epoch": 15.15, "learning_rate": 4.242724679029957e-05, "loss": 0.0032, "step": 84970 }, { "epoch": 15.15, "learning_rate": 4.2426355206847365e-05, "loss": 0.0045, "step": 84980 }, { "epoch": 15.16, "learning_rate": 4.242546362339515e-05, "loss": 0.0057, "step": 84990 }, { "epoch": 15.16, "learning_rate": 4.2424661198288164e-05, "loss": 0.0066, "step": 85000 }, { "epoch": 15.16, "learning_rate": 4.242376961483595e-05, "loss": 0.003, "step": 85010 }, { "epoch": 15.16, "learning_rate": 4.242287803138374e-05, "loss": 0.003, "step": 85020 }, { "epoch": 15.16, "learning_rate": 4.242198644793153e-05, "loss": 0.0013, "step": 85030 }, { "epoch": 15.16, "learning_rate": 4.2421094864479315e-05, "loss": 0.0047, "step": 85040 }, { "epoch": 15.17, "learning_rate": 4.2420203281027106e-05, "loss": 0.0071, "step": 85050 }, { "epoch": 15.17, "learning_rate": 4.241931169757489e-05, "loss": 0.0017, "step": 85060 }, { "epoch": 15.17, "learning_rate": 4.241842011412269e-05, "loss": 0.0025, "step": 85070 }, { "epoch": 15.17, "learning_rate": 4.241752853067047e-05, "loss": 0.0037, "step": 85080 }, { "epoch": 15.17, "learning_rate": 4.2416636947218264e-05, "loss": 0.0059, "step": 85090 }, { "epoch": 15.17, "learning_rate": 4.241574536376605e-05, "loss": 0.0032, "step": 85100 }, { "epoch": 15.18, "learning_rate": 4.241485378031384e-05, "loss": 0.0027, "step": 85110 }, { "epoch": 15.18, "learning_rate": 4.241396219686163e-05, "loss": 0.0045, "step": 85120 }, { "epoch": 15.18, "learning_rate": 4.2413070613409416e-05, "loss": 0.0015, "step": 85130 }, { "epoch": 15.18, "learning_rate": 4.241217902995721e-05, "loss": 0.0011, "step": 85140 }, { "epoch": 15.18, "learning_rate": 4.241128744650499e-05, "loss": 0.0025, "step": 85150 }, { "epoch": 15.19, "learning_rate": 4.241039586305278e-05, "loss": 0.0033, "step": 85160 }, { "epoch": 15.19, "learning_rate": 4.2409504279600574e-05, "loss": 0.0023, "step": 85170 }, { "epoch": 15.19, "learning_rate": 4.240861269614836e-05, "loss": 0.0026, "step": 85180 }, { "epoch": 15.19, "learning_rate": 4.240772111269615e-05, "loss": 0.007, "step": 85190 }, { "epoch": 15.19, "learning_rate": 4.240682952924394e-05, "loss": 0.0014, "step": 85200 }, { "epoch": 15.19, "learning_rate": 4.240593794579173e-05, "loss": 0.0026, "step": 85210 }, { "epoch": 15.2, "learning_rate": 4.2405046362339516e-05, "loss": 0.0024, "step": 85220 }, { "epoch": 15.2, "learning_rate": 4.240415477888731e-05, "loss": 0.0011, "step": 85230 }, { "epoch": 15.2, "learning_rate": 4.240326319543509e-05, "loss": 0.0033, "step": 85240 }, { "epoch": 15.2, "learning_rate": 4.240237161198288e-05, "loss": 0.0017, "step": 85250 }, { "epoch": 15.2, "learning_rate": 4.2401480028530674e-05, "loss": 0.0018, "step": 85260 }, { "epoch": 15.21, "learning_rate": 4.240058844507846e-05, "loss": 0.0045, "step": 85270 }, { "epoch": 15.21, "learning_rate": 4.239969686162625e-05, "loss": 0.0025, "step": 85280 }, { "epoch": 15.21, "learning_rate": 4.2398805278174034e-05, "loss": 0.0038, "step": 85290 }, { "epoch": 15.21, "learning_rate": 4.239791369472183e-05, "loss": 0.0032, "step": 85300 }, { "epoch": 15.21, "learning_rate": 4.239702211126962e-05, "loss": 0.0019, "step": 85310 }, { "epoch": 15.21, "learning_rate": 4.239613052781741e-05, "loss": 0.0042, "step": 85320 }, { "epoch": 15.22, "learning_rate": 4.239523894436519e-05, "loss": 0.0042, "step": 85330 }, { "epoch": 15.22, "learning_rate": 4.2394347360912984e-05, "loss": 0.0015, "step": 85340 }, { "epoch": 15.22, "learning_rate": 4.2393455777460775e-05, "loss": 0.003, "step": 85350 }, { "epoch": 15.22, "learning_rate": 4.239256419400856e-05, "loss": 0.0029, "step": 85360 }, { "epoch": 15.22, "learning_rate": 4.239167261055635e-05, "loss": 0.0038, "step": 85370 }, { "epoch": 15.22, "learning_rate": 4.2390781027104135e-05, "loss": 0.0034, "step": 85380 }, { "epoch": 15.23, "learning_rate": 4.2389889443651926e-05, "loss": 0.0024, "step": 85390 }, { "epoch": 15.23, "learning_rate": 4.238899786019972e-05, "loss": 0.0021, "step": 85400 }, { "epoch": 15.23, "learning_rate": 4.238810627674751e-05, "loss": 0.0065, "step": 85410 }, { "epoch": 15.23, "learning_rate": 4.238721469329529e-05, "loss": 0.0034, "step": 85420 }, { "epoch": 15.23, "learning_rate": 4.2386323109843084e-05, "loss": 0.0025, "step": 85430 }, { "epoch": 15.24, "learning_rate": 4.2385431526390875e-05, "loss": 0.0028, "step": 85440 }, { "epoch": 15.24, "learning_rate": 4.238453994293866e-05, "loss": 0.0014, "step": 85450 }, { "epoch": 15.24, "learning_rate": 4.238364835948645e-05, "loss": 0.0068, "step": 85460 }, { "epoch": 15.24, "learning_rate": 4.2382756776034235e-05, "loss": 0.0027, "step": 85470 }, { "epoch": 15.24, "learning_rate": 4.238186519258203e-05, "loss": 0.0017, "step": 85480 }, { "epoch": 15.24, "learning_rate": 4.238097360912982e-05, "loss": 0.0041, "step": 85490 }, { "epoch": 15.25, "learning_rate": 4.23800820256776e-05, "loss": 0.0019, "step": 85500 }, { "epoch": 15.25, "learning_rate": 4.2379190442225394e-05, "loss": 0.0022, "step": 85510 }, { "epoch": 15.25, "learning_rate": 4.2378298858773185e-05, "loss": 0.0025, "step": 85520 }, { "epoch": 15.25, "learning_rate": 4.2377407275320976e-05, "loss": 0.001, "step": 85530 }, { "epoch": 15.25, "learning_rate": 4.237651569186876e-05, "loss": 0.0017, "step": 85540 }, { "epoch": 15.25, "learning_rate": 4.237562410841655e-05, "loss": 0.0021, "step": 85550 }, { "epoch": 15.26, "learning_rate": 4.2374732524964336e-05, "loss": 0.0027, "step": 85560 }, { "epoch": 15.26, "learning_rate": 4.237384094151213e-05, "loss": 0.0059, "step": 85570 }, { "epoch": 15.26, "learning_rate": 4.237294935805992e-05, "loss": 0.009, "step": 85580 }, { "epoch": 15.26, "learning_rate": 4.23720577746077e-05, "loss": 0.0017, "step": 85590 }, { "epoch": 15.26, "learning_rate": 4.2371166191155494e-05, "loss": 0.0041, "step": 85600 }, { "epoch": 15.27, "learning_rate": 4.237027460770328e-05, "loss": 0.0029, "step": 85610 }, { "epoch": 15.27, "learning_rate": 4.236938302425107e-05, "loss": 0.0017, "step": 85620 }, { "epoch": 15.27, "learning_rate": 4.236849144079886e-05, "loss": 0.003, "step": 85630 }, { "epoch": 15.27, "learning_rate": 4.236759985734665e-05, "loss": 0.0028, "step": 85640 }, { "epoch": 15.27, "learning_rate": 4.2366708273894437e-05, "loss": 0.0039, "step": 85650 }, { "epoch": 15.27, "learning_rate": 4.236581669044223e-05, "loss": 0.0024, "step": 85660 }, { "epoch": 15.28, "learning_rate": 4.236492510699002e-05, "loss": 0.0025, "step": 85670 }, { "epoch": 15.28, "learning_rate": 4.2364033523537803e-05, "loss": 0.0026, "step": 85680 }, { "epoch": 15.28, "learning_rate": 4.2363141940085595e-05, "loss": 0.0025, "step": 85690 }, { "epoch": 15.28, "learning_rate": 4.236225035663338e-05, "loss": 0.0037, "step": 85700 }, { "epoch": 15.28, "learning_rate": 4.236135877318117e-05, "loss": 0.0038, "step": 85710 }, { "epoch": 15.29, "learning_rate": 4.236046718972896e-05, "loss": 0.0018, "step": 85720 }, { "epoch": 15.29, "learning_rate": 4.2359575606276746e-05, "loss": 0.0051, "step": 85730 }, { "epoch": 15.29, "learning_rate": 4.2358684022824544e-05, "loss": 0.0037, "step": 85740 }, { "epoch": 15.29, "learning_rate": 4.235779243937233e-05, "loss": 0.0016, "step": 85750 }, { "epoch": 15.29, "learning_rate": 4.235690085592012e-05, "loss": 0.0014, "step": 85760 }, { "epoch": 15.29, "learning_rate": 4.2356009272467904e-05, "loss": 0.0045, "step": 85770 }, { "epoch": 15.3, "learning_rate": 4.2355117689015695e-05, "loss": 0.0028, "step": 85780 }, { "epoch": 15.3, "learning_rate": 4.235422610556348e-05, "loss": 0.0031, "step": 85790 }, { "epoch": 15.3, "learning_rate": 4.235333452211127e-05, "loss": 0.0023, "step": 85800 }, { "epoch": 15.3, "learning_rate": 4.235244293865906e-05, "loss": 0.0011, "step": 85810 }, { "epoch": 15.3, "learning_rate": 4.2351551355206846e-05, "loss": 0.0022, "step": 85820 }, { "epoch": 15.3, "learning_rate": 4.235065977175464e-05, "loss": 0.0015, "step": 85830 }, { "epoch": 15.31, "learning_rate": 4.234976818830242e-05, "loss": 0.0027, "step": 85840 }, { "epoch": 15.31, "learning_rate": 4.234887660485022e-05, "loss": 0.0037, "step": 85850 }, { "epoch": 15.31, "learning_rate": 4.2347985021398005e-05, "loss": 0.0025, "step": 85860 }, { "epoch": 15.31, "learning_rate": 4.2347093437945796e-05, "loss": 0.0028, "step": 85870 }, { "epoch": 15.31, "learning_rate": 4.234620185449358e-05, "loss": 0.0025, "step": 85880 }, { "epoch": 15.32, "learning_rate": 4.234531027104137e-05, "loss": 0.0024, "step": 85890 }, { "epoch": 15.32, "learning_rate": 4.234441868758916e-05, "loss": 0.0025, "step": 85900 }, { "epoch": 15.32, "learning_rate": 4.234352710413695e-05, "loss": 0.0033, "step": 85910 }, { "epoch": 15.32, "learning_rate": 4.234263552068474e-05, "loss": 0.0031, "step": 85920 }, { "epoch": 15.32, "learning_rate": 4.234174393723252e-05, "loss": 0.0018, "step": 85930 }, { "epoch": 15.32, "learning_rate": 4.2340852353780314e-05, "loss": 0.0015, "step": 85940 }, { "epoch": 15.33, "learning_rate": 4.2339960770328105e-05, "loss": 0.0019, "step": 85950 }, { "epoch": 15.33, "learning_rate": 4.2339069186875896e-05, "loss": 0.0015, "step": 85960 }, { "epoch": 15.33, "learning_rate": 4.233817760342369e-05, "loss": 0.005, "step": 85970 }, { "epoch": 15.33, "learning_rate": 4.233728601997147e-05, "loss": 0.0035, "step": 85980 }, { "epoch": 15.33, "learning_rate": 4.233639443651926e-05, "loss": 0.0026, "step": 85990 }, { "epoch": 15.34, "learning_rate": 4.233550285306705e-05, "loss": 0.0036, "step": 86000 }, { "epoch": 15.34, "learning_rate": 4.233461126961484e-05, "loss": 0.0025, "step": 86010 }, { "epoch": 15.34, "learning_rate": 4.233371968616262e-05, "loss": 0.0018, "step": 86020 }, { "epoch": 15.34, "learning_rate": 4.2332828102710414e-05, "loss": 0.0032, "step": 86030 }, { "epoch": 15.34, "learning_rate": 4.2331936519258206e-05, "loss": 0.0044, "step": 86040 }, { "epoch": 15.34, "learning_rate": 4.233104493580599e-05, "loss": 0.0024, "step": 86050 }, { "epoch": 15.35, "learning_rate": 4.233015335235378e-05, "loss": 0.0046, "step": 86060 }, { "epoch": 15.35, "learning_rate": 4.232926176890157e-05, "loss": 0.0023, "step": 86070 }, { "epoch": 15.35, "learning_rate": 4.2328370185449364e-05, "loss": 0.0023, "step": 86080 }, { "epoch": 15.35, "learning_rate": 4.232747860199715e-05, "loss": 0.0031, "step": 86090 }, { "epoch": 15.35, "learning_rate": 4.232658701854494e-05, "loss": 0.0029, "step": 86100 }, { "epoch": 15.35, "learning_rate": 4.2325695435092724e-05, "loss": 0.0022, "step": 86110 }, { "epoch": 15.36, "learning_rate": 4.2324803851640515e-05, "loss": 0.0028, "step": 86120 }, { "epoch": 15.36, "learning_rate": 4.2323912268188306e-05, "loss": 0.0028, "step": 86130 }, { "epoch": 15.36, "learning_rate": 4.232302068473609e-05, "loss": 0.0026, "step": 86140 }, { "epoch": 15.36, "learning_rate": 4.232212910128388e-05, "loss": 0.0068, "step": 86150 }, { "epoch": 15.36, "learning_rate": 4.2321237517831666e-05, "loss": 0.0036, "step": 86160 }, { "epoch": 15.37, "learning_rate": 4.232034593437946e-05, "loss": 0.0065, "step": 86170 }, { "epoch": 15.37, "learning_rate": 4.231945435092725e-05, "loss": 0.002, "step": 86180 }, { "epoch": 15.37, "learning_rate": 4.231856276747504e-05, "loss": 0.0015, "step": 86190 }, { "epoch": 15.37, "learning_rate": 4.231767118402283e-05, "loss": 0.0031, "step": 86200 }, { "epoch": 15.37, "learning_rate": 4.2316779600570616e-05, "loss": 0.0027, "step": 86210 }, { "epoch": 15.37, "learning_rate": 4.231588801711841e-05, "loss": 0.0025, "step": 86220 }, { "epoch": 15.38, "learning_rate": 4.231499643366619e-05, "loss": 0.0027, "step": 86230 }, { "epoch": 15.38, "learning_rate": 4.231410485021398e-05, "loss": 0.0028, "step": 86240 }, { "epoch": 15.38, "learning_rate": 4.231321326676177e-05, "loss": 0.0036, "step": 86250 }, { "epoch": 15.38, "learning_rate": 4.231232168330956e-05, "loss": 0.0021, "step": 86260 }, { "epoch": 15.38, "learning_rate": 4.231143009985735e-05, "loss": 0.0022, "step": 86270 }, { "epoch": 15.39, "learning_rate": 4.2310538516405134e-05, "loss": 0.0051, "step": 86280 }, { "epoch": 15.39, "learning_rate": 4.230964693295293e-05, "loss": 0.002, "step": 86290 }, { "epoch": 15.39, "learning_rate": 4.2308755349500716e-05, "loss": 0.0018, "step": 86300 }, { "epoch": 15.39, "learning_rate": 4.230786376604851e-05, "loss": 0.003, "step": 86310 }, { "epoch": 15.39, "learning_rate": 4.230697218259629e-05, "loss": 0.0027, "step": 86320 }, { "epoch": 15.39, "learning_rate": 4.230608059914408e-05, "loss": 0.0054, "step": 86330 }, { "epoch": 15.4, "learning_rate": 4.230518901569187e-05, "loss": 0.0034, "step": 86340 }, { "epoch": 15.4, "learning_rate": 4.230429743223966e-05, "loss": 0.0025, "step": 86350 }, { "epoch": 15.4, "learning_rate": 4.230340584878745e-05, "loss": 0.0011, "step": 86360 }, { "epoch": 15.4, "learning_rate": 4.2302514265335234e-05, "loss": 0.0026, "step": 86370 }, { "epoch": 15.4, "learning_rate": 4.2301622681883026e-05, "loss": 0.0033, "step": 86380 }, { "epoch": 15.4, "learning_rate": 4.230073109843081e-05, "loss": 0.0028, "step": 86390 }, { "epoch": 15.41, "learning_rate": 4.229983951497861e-05, "loss": 0.0018, "step": 86400 }, { "epoch": 15.41, "learning_rate": 4.229894793152639e-05, "loss": 0.0043, "step": 86410 }, { "epoch": 15.41, "learning_rate": 4.2298056348074184e-05, "loss": 0.0035, "step": 86420 }, { "epoch": 15.41, "learning_rate": 4.2297164764621975e-05, "loss": 0.0021, "step": 86430 }, { "epoch": 15.41, "learning_rate": 4.229627318116976e-05, "loss": 0.0035, "step": 86440 }, { "epoch": 15.42, "learning_rate": 4.229538159771755e-05, "loss": 0.0037, "step": 86450 }, { "epoch": 15.42, "learning_rate": 4.2294490014265335e-05, "loss": 0.0019, "step": 86460 }, { "epoch": 15.42, "learning_rate": 4.2293598430813126e-05, "loss": 0.0047, "step": 86470 }, { "epoch": 15.42, "learning_rate": 4.229270684736091e-05, "loss": 0.005, "step": 86480 }, { "epoch": 15.42, "learning_rate": 4.22918152639087e-05, "loss": 0.0019, "step": 86490 }, { "epoch": 15.42, "learning_rate": 4.229092368045649e-05, "loss": 0.0037, "step": 86500 }, { "epoch": 15.43, "learning_rate": 4.2290032097004284e-05, "loss": 0.0017, "step": 86510 }, { "epoch": 15.43, "learning_rate": 4.2289140513552075e-05, "loss": 0.0025, "step": 86520 }, { "epoch": 15.43, "learning_rate": 4.228824893009986e-05, "loss": 0.003, "step": 86530 }, { "epoch": 15.43, "learning_rate": 4.228735734664765e-05, "loss": 0.0049, "step": 86540 }, { "epoch": 15.43, "learning_rate": 4.2286465763195435e-05, "loss": 0.0011, "step": 86550 }, { "epoch": 15.44, "learning_rate": 4.228557417974323e-05, "loss": 0.0023, "step": 86560 }, { "epoch": 15.44, "learning_rate": 4.228468259629101e-05, "loss": 0.0024, "step": 86570 }, { "epoch": 15.44, "learning_rate": 4.22837910128388e-05, "loss": 0.002, "step": 86580 }, { "epoch": 15.44, "learning_rate": 4.2282899429386593e-05, "loss": 0.0031, "step": 86590 }, { "epoch": 15.44, "learning_rate": 4.228200784593438e-05, "loss": 0.0021, "step": 86600 }, { "epoch": 15.44, "learning_rate": 4.228111626248217e-05, "loss": 0.0023, "step": 86610 }, { "epoch": 15.45, "learning_rate": 4.228022467902996e-05, "loss": 0.0023, "step": 86620 }, { "epoch": 15.45, "learning_rate": 4.227933309557775e-05, "loss": 0.0033, "step": 86630 }, { "epoch": 15.45, "learning_rate": 4.2278441512125536e-05, "loss": 0.0014, "step": 86640 }, { "epoch": 15.45, "learning_rate": 4.227754992867333e-05, "loss": 0.0016, "step": 86650 }, { "epoch": 15.45, "learning_rate": 4.227665834522112e-05, "loss": 0.0022, "step": 86660 }, { "epoch": 15.45, "learning_rate": 4.22757667617689e-05, "loss": 0.0031, "step": 86670 }, { "epoch": 15.46, "learning_rate": 4.2274875178316694e-05, "loss": 0.0044, "step": 86680 }, { "epoch": 15.46, "learning_rate": 4.227398359486448e-05, "loss": 0.0019, "step": 86690 }, { "epoch": 15.46, "learning_rate": 4.227309201141227e-05, "loss": 0.0042, "step": 86700 }, { "epoch": 15.46, "learning_rate": 4.2272200427960054e-05, "loss": 0.0029, "step": 86710 }, { "epoch": 15.46, "learning_rate": 4.2271308844507845e-05, "loss": 0.0041, "step": 86720 }, { "epoch": 15.47, "learning_rate": 4.2270417261055637e-05, "loss": 0.0059, "step": 86730 }, { "epoch": 15.47, "learning_rate": 4.226952567760343e-05, "loss": 0.0036, "step": 86740 }, { "epoch": 15.47, "learning_rate": 4.226863409415122e-05, "loss": 0.0012, "step": 86750 }, { "epoch": 15.47, "learning_rate": 4.2267742510699003e-05, "loss": 0.0034, "step": 86760 }, { "epoch": 15.47, "learning_rate": 4.2266850927246795e-05, "loss": 0.003, "step": 86770 }, { "epoch": 15.47, "learning_rate": 4.226595934379458e-05, "loss": 0.0031, "step": 86780 }, { "epoch": 15.48, "learning_rate": 4.226506776034237e-05, "loss": 0.0048, "step": 86790 }, { "epoch": 15.48, "learning_rate": 4.2264176176890155e-05, "loss": 0.004, "step": 86800 }, { "epoch": 15.48, "learning_rate": 4.2263284593437946e-05, "loss": 0.002, "step": 86810 }, { "epoch": 15.48, "learning_rate": 4.226239300998574e-05, "loss": 0.006, "step": 86820 }, { "epoch": 15.48, "learning_rate": 4.226150142653352e-05, "loss": 0.0033, "step": 86830 }, { "epoch": 15.49, "learning_rate": 4.226060984308132e-05, "loss": 0.0032, "step": 86840 }, { "epoch": 15.49, "learning_rate": 4.2259718259629104e-05, "loss": 0.0025, "step": 86850 }, { "epoch": 15.49, "learning_rate": 4.2258826676176895e-05, "loss": 0.0031, "step": 86860 }, { "epoch": 15.49, "learning_rate": 4.225793509272468e-05, "loss": 0.0023, "step": 86870 }, { "epoch": 15.49, "learning_rate": 4.225704350927247e-05, "loss": 0.0032, "step": 86880 }, { "epoch": 15.49, "learning_rate": 4.225615192582026e-05, "loss": 0.0028, "step": 86890 }, { "epoch": 15.5, "learning_rate": 4.2255260342368046e-05, "loss": 0.004, "step": 86900 }, { "epoch": 15.5, "learning_rate": 4.225436875891584e-05, "loss": 0.0027, "step": 86910 }, { "epoch": 15.5, "learning_rate": 4.225347717546362e-05, "loss": 0.0016, "step": 86920 }, { "epoch": 15.5, "learning_rate": 4.225258559201141e-05, "loss": 0.0019, "step": 86930 }, { "epoch": 15.5, "learning_rate": 4.22516940085592e-05, "loss": 0.0028, "step": 86940 }, { "epoch": 15.5, "learning_rate": 4.2250802425106996e-05, "loss": 0.0015, "step": 86950 }, { "epoch": 15.51, "learning_rate": 4.224991084165478e-05, "loss": 0.0024, "step": 86960 }, { "epoch": 15.51, "learning_rate": 4.224901925820257e-05, "loss": 0.0029, "step": 86970 }, { "epoch": 15.51, "learning_rate": 4.224812767475036e-05, "loss": 0.0024, "step": 86980 }, { "epoch": 15.51, "learning_rate": 4.224723609129815e-05, "loss": 0.0043, "step": 86990 }, { "epoch": 15.51, "learning_rate": 4.224634450784594e-05, "loss": 0.0017, "step": 87000 }, { "epoch": 15.52, "learning_rate": 4.224545292439372e-05, "loss": 0.0025, "step": 87010 }, { "epoch": 15.52, "learning_rate": 4.2244561340941514e-05, "loss": 0.0046, "step": 87020 }, { "epoch": 15.52, "learning_rate": 4.22436697574893e-05, "loss": 0.0041, "step": 87030 }, { "epoch": 15.52, "learning_rate": 4.224277817403709e-05, "loss": 0.0023, "step": 87040 }, { "epoch": 15.52, "learning_rate": 4.224188659058488e-05, "loss": 0.0034, "step": 87050 }, { "epoch": 15.52, "learning_rate": 4.224099500713267e-05, "loss": 0.0015, "step": 87060 }, { "epoch": 15.53, "learning_rate": 4.224010342368046e-05, "loss": 0.0025, "step": 87070 }, { "epoch": 15.53, "learning_rate": 4.223921184022825e-05, "loss": 0.0015, "step": 87080 }, { "epoch": 15.53, "learning_rate": 4.223832025677604e-05, "loss": 0.0023, "step": 87090 }, { "epoch": 15.53, "learning_rate": 4.223742867332382e-05, "loss": 0.0015, "step": 87100 }, { "epoch": 15.53, "learning_rate": 4.2236537089871614e-05, "loss": 0.0022, "step": 87110 }, { "epoch": 15.53, "learning_rate": 4.2235645506419406e-05, "loss": 0.0012, "step": 87120 }, { "epoch": 15.54, "learning_rate": 4.223475392296719e-05, "loss": 0.0023, "step": 87130 }, { "epoch": 15.54, "learning_rate": 4.223386233951498e-05, "loss": 0.001, "step": 87140 }, { "epoch": 15.54, "learning_rate": 4.2232970756062766e-05, "loss": 0.0029, "step": 87150 }, { "epoch": 15.54, "learning_rate": 4.223207917261056e-05, "loss": 0.0019, "step": 87160 }, { "epoch": 15.54, "learning_rate": 4.223118758915835e-05, "loss": 0.0032, "step": 87170 }, { "epoch": 15.55, "learning_rate": 4.223029600570614e-05, "loss": 0.0024, "step": 87180 }, { "epoch": 15.55, "learning_rate": 4.2229404422253924e-05, "loss": 0.0016, "step": 87190 }, { "epoch": 15.55, "learning_rate": 4.2228512838801715e-05, "loss": 0.0013, "step": 87200 }, { "epoch": 15.55, "learning_rate": 4.2227621255349506e-05, "loss": 0.0026, "step": 87210 }, { "epoch": 15.55, "learning_rate": 4.222672967189729e-05, "loss": 0.0021, "step": 87220 }, { "epoch": 15.55, "learning_rate": 4.222583808844508e-05, "loss": 0.0019, "step": 87230 }, { "epoch": 15.56, "learning_rate": 4.2224946504992866e-05, "loss": 0.0034, "step": 87240 }, { "epoch": 15.56, "learning_rate": 4.222405492154066e-05, "loss": 0.0014, "step": 87250 }, { "epoch": 15.56, "learning_rate": 4.222316333808844e-05, "loss": 0.0018, "step": 87260 }, { "epoch": 15.56, "learning_rate": 4.222227175463623e-05, "loss": 0.0037, "step": 87270 }, { "epoch": 15.56, "learning_rate": 4.2221380171184024e-05, "loss": 0.0027, "step": 87280 }, { "epoch": 15.57, "learning_rate": 4.2220488587731816e-05, "loss": 0.0047, "step": 87290 }, { "epoch": 15.57, "learning_rate": 4.221959700427961e-05, "loss": 0.004, "step": 87300 }, { "epoch": 15.57, "learning_rate": 4.221870542082739e-05, "loss": 0.0018, "step": 87310 }, { "epoch": 15.57, "learning_rate": 4.221781383737518e-05, "loss": 0.0051, "step": 87320 }, { "epoch": 15.57, "learning_rate": 4.221692225392297e-05, "loss": 0.0061, "step": 87330 }, { "epoch": 15.57, "learning_rate": 4.221603067047076e-05, "loss": 0.0023, "step": 87340 }, { "epoch": 15.58, "learning_rate": 4.221513908701855e-05, "loss": 0.0044, "step": 87350 }, { "epoch": 15.58, "learning_rate": 4.2214247503566334e-05, "loss": 0.0025, "step": 87360 }, { "epoch": 15.58, "learning_rate": 4.2213355920114125e-05, "loss": 0.0038, "step": 87370 }, { "epoch": 15.58, "learning_rate": 4.221246433666191e-05, "loss": 0.0046, "step": 87380 }, { "epoch": 15.58, "learning_rate": 4.221157275320971e-05, "loss": 0.0012, "step": 87390 }, { "epoch": 15.58, "learning_rate": 4.221068116975749e-05, "loss": 0.0042, "step": 87400 }, { "epoch": 15.59, "learning_rate": 4.220978958630528e-05, "loss": 0.003, "step": 87410 }, { "epoch": 15.59, "learning_rate": 4.220889800285307e-05, "loss": 0.003, "step": 87420 }, { "epoch": 15.59, "learning_rate": 4.220800641940086e-05, "loss": 0.0026, "step": 87430 }, { "epoch": 15.59, "learning_rate": 4.220711483594865e-05, "loss": 0.0039, "step": 87440 }, { "epoch": 15.59, "learning_rate": 4.2206223252496434e-05, "loss": 0.0041, "step": 87450 }, { "epoch": 15.6, "learning_rate": 4.2205331669044225e-05, "loss": 0.0026, "step": 87460 }, { "epoch": 15.6, "learning_rate": 4.220444008559201e-05, "loss": 0.0023, "step": 87470 }, { "epoch": 15.6, "learning_rate": 4.22035485021398e-05, "loss": 0.0035, "step": 87480 }, { "epoch": 15.6, "learning_rate": 4.2202656918687586e-05, "loss": 0.0029, "step": 87490 }, { "epoch": 15.6, "learning_rate": 4.2201765335235384e-05, "loss": 0.0027, "step": 87500 }, { "epoch": 15.6, "learning_rate": 4.220087375178317e-05, "loss": 0.0033, "step": 87510 }, { "epoch": 15.61, "learning_rate": 4.219998216833096e-05, "loss": 0.0052, "step": 87520 }, { "epoch": 15.61, "learning_rate": 4.219909058487875e-05, "loss": 0.0022, "step": 87530 }, { "epoch": 15.61, "learning_rate": 4.2198199001426535e-05, "loss": 0.0049, "step": 87540 }, { "epoch": 15.61, "learning_rate": 4.2197307417974326e-05, "loss": 0.0026, "step": 87550 }, { "epoch": 15.61, "learning_rate": 4.219641583452211e-05, "loss": 0.0023, "step": 87560 }, { "epoch": 15.62, "learning_rate": 4.21955242510699e-05, "loss": 0.0039, "step": 87570 }, { "epoch": 15.62, "learning_rate": 4.219463266761769e-05, "loss": 0.0035, "step": 87580 }, { "epoch": 15.62, "learning_rate": 4.219374108416548e-05, "loss": 0.0021, "step": 87590 }, { "epoch": 15.62, "learning_rate": 4.219284950071327e-05, "loss": 0.0024, "step": 87600 }, { "epoch": 15.62, "learning_rate": 4.219195791726106e-05, "loss": 0.0036, "step": 87610 }, { "epoch": 15.62, "learning_rate": 4.219106633380885e-05, "loss": 0.0021, "step": 87620 }, { "epoch": 15.63, "learning_rate": 4.2190174750356635e-05, "loss": 0.0036, "step": 87630 }, { "epoch": 15.63, "learning_rate": 4.2189283166904427e-05, "loss": 0.003, "step": 87640 }, { "epoch": 15.63, "learning_rate": 4.218839158345221e-05, "loss": 0.0016, "step": 87650 }, { "epoch": 15.63, "learning_rate": 4.21875e-05, "loss": 0.0031, "step": 87660 }, { "epoch": 15.63, "learning_rate": 4.2186608416547793e-05, "loss": 0.002, "step": 87670 }, { "epoch": 15.63, "learning_rate": 4.218571683309558e-05, "loss": 0.0019, "step": 87680 }, { "epoch": 15.64, "learning_rate": 4.218482524964337e-05, "loss": 0.0055, "step": 87690 }, { "epoch": 15.64, "learning_rate": 4.2183933666191154e-05, "loss": 0.0022, "step": 87700 }, { "epoch": 15.64, "learning_rate": 4.2183042082738945e-05, "loss": 0.0032, "step": 87710 }, { "epoch": 15.64, "learning_rate": 4.2182150499286736e-05, "loss": 0.0024, "step": 87720 }, { "epoch": 15.64, "learning_rate": 4.218125891583453e-05, "loss": 0.0026, "step": 87730 }, { "epoch": 15.65, "learning_rate": 4.218036733238231e-05, "loss": 0.0021, "step": 87740 }, { "epoch": 15.65, "learning_rate": 4.21794757489301e-05, "loss": 0.0026, "step": 87750 }, { "epoch": 15.65, "learning_rate": 4.2178584165477894e-05, "loss": 0.0023, "step": 87760 }, { "epoch": 15.65, "learning_rate": 4.217769258202568e-05, "loss": 0.0008, "step": 87770 }, { "epoch": 15.65, "learning_rate": 4.217680099857347e-05, "loss": 0.0019, "step": 87780 }, { "epoch": 15.65, "learning_rate": 4.2175909415121254e-05, "loss": 0.0026, "step": 87790 }, { "epoch": 15.66, "learning_rate": 4.2175017831669045e-05, "loss": 0.0035, "step": 87800 }, { "epoch": 15.66, "learning_rate": 4.217412624821683e-05, "loss": 0.003, "step": 87810 }, { "epoch": 15.66, "learning_rate": 4.217323466476462e-05, "loss": 0.0038, "step": 87820 }, { "epoch": 15.66, "learning_rate": 4.217234308131241e-05, "loss": 0.0023, "step": 87830 }, { "epoch": 15.66, "learning_rate": 4.21714514978602e-05, "loss": 0.0019, "step": 87840 }, { "epoch": 15.67, "learning_rate": 4.2170559914407995e-05, "loss": 0.0038, "step": 87850 }, { "epoch": 15.67, "learning_rate": 4.216966833095578e-05, "loss": 0.0024, "step": 87860 }, { "epoch": 15.67, "learning_rate": 4.216877674750357e-05, "loss": 0.0022, "step": 87870 }, { "epoch": 15.67, "learning_rate": 4.2167885164051355e-05, "loss": 0.006, "step": 87880 }, { "epoch": 15.67, "learning_rate": 4.2166993580599146e-05, "loss": 0.002, "step": 87890 }, { "epoch": 15.67, "learning_rate": 4.216610199714694e-05, "loss": 0.0021, "step": 87900 }, { "epoch": 15.68, "learning_rate": 4.216521041369472e-05, "loss": 0.0027, "step": 87910 }, { "epoch": 15.68, "learning_rate": 4.216431883024251e-05, "loss": 0.0022, "step": 87920 }, { "epoch": 15.68, "learning_rate": 4.21634272467903e-05, "loss": 0.0024, "step": 87930 }, { "epoch": 15.68, "learning_rate": 4.2162535663338095e-05, "loss": 0.0042, "step": 87940 }, { "epoch": 15.68, "learning_rate": 4.216164407988588e-05, "loss": 0.0025, "step": 87950 }, { "epoch": 15.68, "learning_rate": 4.216075249643367e-05, "loss": 0.0038, "step": 87960 }, { "epoch": 15.69, "learning_rate": 4.2159860912981455e-05, "loss": 0.0066, "step": 87970 }, { "epoch": 15.69, "learning_rate": 4.2158969329529246e-05, "loss": 0.0026, "step": 87980 }, { "epoch": 15.69, "learning_rate": 4.215807774607704e-05, "loss": 0.0013, "step": 87990 }, { "epoch": 15.69, "learning_rate": 4.215718616262482e-05, "loss": 0.0018, "step": 88000 }, { "epoch": 15.69, "learning_rate": 4.215629457917261e-05, "loss": 0.0021, "step": 88010 }, { "epoch": 15.7, "learning_rate": 4.21554029957204e-05, "loss": 0.0033, "step": 88020 }, { "epoch": 15.7, "learning_rate": 4.215451141226819e-05, "loss": 0.0029, "step": 88030 }, { "epoch": 15.7, "learning_rate": 4.215361982881597e-05, "loss": 0.003, "step": 88040 }, { "epoch": 15.7, "learning_rate": 4.215272824536377e-05, "loss": 0.0025, "step": 88050 }, { "epoch": 15.7, "learning_rate": 4.2151836661911556e-05, "loss": 0.0039, "step": 88060 }, { "epoch": 15.7, "learning_rate": 4.215094507845935e-05, "loss": 0.0039, "step": 88070 }, { "epoch": 15.71, "learning_rate": 4.215005349500714e-05, "loss": 0.0019, "step": 88080 }, { "epoch": 15.71, "learning_rate": 4.214916191155492e-05, "loss": 0.0018, "step": 88090 }, { "epoch": 15.71, "learning_rate": 4.2148270328102714e-05, "loss": 0.0027, "step": 88100 }, { "epoch": 15.71, "learning_rate": 4.21473787446505e-05, "loss": 0.0034, "step": 88110 }, { "epoch": 15.71, "learning_rate": 4.214648716119829e-05, "loss": 0.0013, "step": 88120 }, { "epoch": 15.72, "learning_rate": 4.214559557774608e-05, "loss": 0.0033, "step": 88130 }, { "epoch": 15.72, "learning_rate": 4.2144703994293865e-05, "loss": 0.0036, "step": 88140 }, { "epoch": 15.72, "learning_rate": 4.2143812410841656e-05, "loss": 0.0043, "step": 88150 }, { "epoch": 15.72, "learning_rate": 4.214292082738945e-05, "loss": 0.003, "step": 88160 }, { "epoch": 15.72, "learning_rate": 4.214202924393724e-05, "loss": 0.003, "step": 88170 }, { "epoch": 15.72, "learning_rate": 4.214113766048502e-05, "loss": 0.004, "step": 88180 }, { "epoch": 15.73, "learning_rate": 4.2140246077032814e-05, "loss": 0.0026, "step": 88190 }, { "epoch": 15.73, "learning_rate": 4.21393544935806e-05, "loss": 0.0019, "step": 88200 }, { "epoch": 15.73, "learning_rate": 4.213846291012839e-05, "loss": 0.0023, "step": 88210 }, { "epoch": 15.73, "learning_rate": 4.213757132667618e-05, "loss": 0.0027, "step": 88220 }, { "epoch": 15.73, "learning_rate": 4.2136679743223966e-05, "loss": 0.0033, "step": 88230 }, { "epoch": 15.73, "learning_rate": 4.213578815977176e-05, "loss": 0.0053, "step": 88240 }, { "epoch": 15.74, "learning_rate": 4.213489657631954e-05, "loss": 0.0024, "step": 88250 }, { "epoch": 15.74, "learning_rate": 4.213400499286733e-05, "loss": 0.0043, "step": 88260 }, { "epoch": 15.74, "learning_rate": 4.2133113409415124e-05, "loss": 0.0035, "step": 88270 }, { "epoch": 15.74, "learning_rate": 4.2132221825962915e-05, "loss": 0.0034, "step": 88280 }, { "epoch": 15.74, "learning_rate": 4.21313302425107e-05, "loss": 0.0022, "step": 88290 }, { "epoch": 15.75, "learning_rate": 4.213043865905849e-05, "loss": 0.0034, "step": 88300 }, { "epoch": 15.75, "learning_rate": 4.212954707560628e-05, "loss": 0.0043, "step": 88310 }, { "epoch": 15.75, "learning_rate": 4.2128655492154066e-05, "loss": 0.0032, "step": 88320 }, { "epoch": 15.75, "learning_rate": 4.212776390870186e-05, "loss": 0.0021, "step": 88330 }, { "epoch": 15.75, "learning_rate": 4.212687232524964e-05, "loss": 0.002, "step": 88340 }, { "epoch": 15.75, "learning_rate": 4.212598074179743e-05, "loss": 0.0055, "step": 88350 }, { "epoch": 15.76, "learning_rate": 4.2125089158345224e-05, "loss": 0.0021, "step": 88360 }, { "epoch": 15.76, "learning_rate": 4.212419757489301e-05, "loss": 0.0032, "step": 88370 }, { "epoch": 15.76, "learning_rate": 4.212330599144081e-05, "loss": 0.0028, "step": 88380 }, { "epoch": 15.76, "learning_rate": 4.212241440798859e-05, "loss": 0.0042, "step": 88390 }, { "epoch": 15.76, "learning_rate": 4.212152282453638e-05, "loss": 0.0026, "step": 88400 }, { "epoch": 15.76, "learning_rate": 4.212063124108417e-05, "loss": 0.0025, "step": 88410 }, { "epoch": 15.77, "learning_rate": 4.211973965763196e-05, "loss": 0.003, "step": 88420 }, { "epoch": 15.77, "learning_rate": 4.211884807417974e-05, "loss": 0.0046, "step": 88430 }, { "epoch": 15.77, "learning_rate": 4.2117956490727534e-05, "loss": 0.0015, "step": 88440 }, { "epoch": 15.77, "learning_rate": 4.2117064907275325e-05, "loss": 0.0016, "step": 88450 }, { "epoch": 15.77, "learning_rate": 4.211617332382311e-05, "loss": 0.0043, "step": 88460 }, { "epoch": 15.78, "learning_rate": 4.2115370898716124e-05, "loss": 0.0037, "step": 88470 }, { "epoch": 15.78, "learning_rate": 4.211447931526391e-05, "loss": 0.0024, "step": 88480 }, { "epoch": 15.78, "learning_rate": 4.21135877318117e-05, "loss": 0.0023, "step": 88490 }, { "epoch": 15.78, "learning_rate": 4.2112696148359484e-05, "loss": 0.003, "step": 88500 }, { "epoch": 15.78, "learning_rate": 4.2111804564907275e-05, "loss": 0.0026, "step": 88510 }, { "epoch": 15.78, "learning_rate": 4.2110912981455067e-05, "loss": 0.0022, "step": 88520 }, { "epoch": 15.79, "learning_rate": 4.211002139800285e-05, "loss": 0.0023, "step": 88530 }, { "epoch": 15.79, "learning_rate": 4.210912981455065e-05, "loss": 0.003, "step": 88540 }, { "epoch": 15.79, "learning_rate": 4.2108238231098433e-05, "loss": 0.0021, "step": 88550 }, { "epoch": 15.79, "learning_rate": 4.2107346647646225e-05, "loss": 0.002, "step": 88560 }, { "epoch": 15.79, "learning_rate": 4.210645506419401e-05, "loss": 0.0015, "step": 88570 }, { "epoch": 15.8, "learning_rate": 4.21055634807418e-05, "loss": 0.0015, "step": 88580 }, { "epoch": 15.8, "learning_rate": 4.210467189728959e-05, "loss": 0.0024, "step": 88590 }, { "epoch": 15.8, "learning_rate": 4.2103780313837376e-05, "loss": 0.0018, "step": 88600 }, { "epoch": 15.8, "learning_rate": 4.210288873038517e-05, "loss": 0.0022, "step": 88610 }, { "epoch": 15.8, "learning_rate": 4.210199714693295e-05, "loss": 0.0045, "step": 88620 }, { "epoch": 15.8, "learning_rate": 4.210110556348074e-05, "loss": 0.0022, "step": 88630 }, { "epoch": 15.81, "learning_rate": 4.210021398002853e-05, "loss": 0.004, "step": 88640 }, { "epoch": 15.81, "learning_rate": 4.2099322396576325e-05, "loss": 0.003, "step": 88650 }, { "epoch": 15.81, "learning_rate": 4.209843081312411e-05, "loss": 0.0034, "step": 88660 }, { "epoch": 15.81, "learning_rate": 4.20975392296719e-05, "loss": 0.0025, "step": 88670 }, { "epoch": 15.81, "learning_rate": 4.209664764621969e-05, "loss": 0.0048, "step": 88680 }, { "epoch": 15.81, "learning_rate": 4.2095756062767477e-05, "loss": 0.0023, "step": 88690 }, { "epoch": 15.82, "learning_rate": 4.209486447931527e-05, "loss": 0.0022, "step": 88700 }, { "epoch": 15.82, "learning_rate": 4.209397289586305e-05, "loss": 0.0031, "step": 88710 }, { "epoch": 15.82, "learning_rate": 4.209308131241084e-05, "loss": 0.0033, "step": 88720 }, { "epoch": 15.82, "learning_rate": 4.209218972895863e-05, "loss": 0.0032, "step": 88730 }, { "epoch": 15.82, "learning_rate": 4.209129814550642e-05, "loss": 0.0015, "step": 88740 }, { "epoch": 15.83, "learning_rate": 4.209040656205421e-05, "loss": 0.0036, "step": 88750 }, { "epoch": 15.83, "learning_rate": 4.2089514978602e-05, "loss": 0.0059, "step": 88760 }, { "epoch": 15.83, "learning_rate": 4.208862339514979e-05, "loss": 0.0028, "step": 88770 }, { "epoch": 15.83, "learning_rate": 4.208773181169758e-05, "loss": 0.004, "step": 88780 }, { "epoch": 15.83, "learning_rate": 4.208684022824537e-05, "loss": 0.0037, "step": 88790 }, { "epoch": 15.83, "learning_rate": 4.208594864479315e-05, "loss": 0.0026, "step": 88800 }, { "epoch": 15.84, "learning_rate": 4.2085057061340944e-05, "loss": 0.0023, "step": 88810 }, { "epoch": 15.84, "learning_rate": 4.2084165477888735e-05, "loss": 0.0051, "step": 88820 }, { "epoch": 15.84, "learning_rate": 4.208327389443652e-05, "loss": 0.0018, "step": 88830 }, { "epoch": 15.84, "learning_rate": 4.208238231098431e-05, "loss": 0.0044, "step": 88840 }, { "epoch": 15.84, "learning_rate": 4.2081490727532095e-05, "loss": 0.0032, "step": 88850 }, { "epoch": 15.85, "learning_rate": 4.2080599144079886e-05, "loss": 0.0025, "step": 88860 }, { "epoch": 15.85, "learning_rate": 4.207970756062768e-05, "loss": 0.0016, "step": 88870 }, { "epoch": 15.85, "learning_rate": 4.207881597717547e-05, "loss": 0.0031, "step": 88880 }, { "epoch": 15.85, "learning_rate": 4.207792439372325e-05, "loss": 0.0031, "step": 88890 }, { "epoch": 15.85, "learning_rate": 4.2077032810271044e-05, "loss": 0.0028, "step": 88900 }, { "epoch": 15.85, "learning_rate": 4.2076141226818836e-05, "loss": 0.0027, "step": 88910 }, { "epoch": 15.86, "learning_rate": 4.207524964336662e-05, "loss": 0.0031, "step": 88920 }, { "epoch": 15.86, "learning_rate": 4.207435805991441e-05, "loss": 0.0021, "step": 88930 }, { "epoch": 15.86, "learning_rate": 4.2073466476462196e-05, "loss": 0.0024, "step": 88940 }, { "epoch": 15.86, "learning_rate": 4.207257489300999e-05, "loss": 0.0037, "step": 88950 }, { "epoch": 15.86, "learning_rate": 4.207168330955777e-05, "loss": 0.0028, "step": 88960 }, { "epoch": 15.86, "learning_rate": 4.207079172610556e-05, "loss": 0.0039, "step": 88970 }, { "epoch": 15.87, "learning_rate": 4.2069900142653354e-05, "loss": 0.002, "step": 88980 }, { "epoch": 15.87, "learning_rate": 4.2069008559201145e-05, "loss": 0.0045, "step": 88990 }, { "epoch": 15.87, "learning_rate": 4.2068116975748936e-05, "loss": 0.0038, "step": 89000 }, { "epoch": 15.87, "learning_rate": 4.206722539229672e-05, "loss": 0.0045, "step": 89010 }, { "epoch": 15.87, "learning_rate": 4.206633380884451e-05, "loss": 0.0014, "step": 89020 }, { "epoch": 15.88, "learning_rate": 4.2065442225392296e-05, "loss": 0.0019, "step": 89030 }, { "epoch": 15.88, "learning_rate": 4.206455064194009e-05, "loss": 0.0036, "step": 89040 }, { "epoch": 15.88, "learning_rate": 4.206365905848787e-05, "loss": 0.0053, "step": 89050 }, { "epoch": 15.88, "learning_rate": 4.206276747503566e-05, "loss": 0.0025, "step": 89060 }, { "epoch": 15.88, "learning_rate": 4.2061875891583454e-05, "loss": 0.003, "step": 89070 }, { "epoch": 15.88, "learning_rate": 4.206098430813124e-05, "loss": 0.0017, "step": 89080 }, { "epoch": 15.89, "learning_rate": 4.206009272467904e-05, "loss": 0.0023, "step": 89090 }, { "epoch": 15.89, "learning_rate": 4.205920114122682e-05, "loss": 0.0027, "step": 89100 }, { "epoch": 15.89, "learning_rate": 4.205830955777461e-05, "loss": 0.0022, "step": 89110 }, { "epoch": 15.89, "learning_rate": 4.20574179743224e-05, "loss": 0.0033, "step": 89120 }, { "epoch": 15.89, "learning_rate": 4.205652639087019e-05, "loss": 0.0029, "step": 89130 }, { "epoch": 15.9, "learning_rate": 4.205563480741798e-05, "loss": 0.0036, "step": 89140 }, { "epoch": 15.9, "learning_rate": 4.2054743223965764e-05, "loss": 0.0024, "step": 89150 }, { "epoch": 15.9, "learning_rate": 4.2053851640513555e-05, "loss": 0.0019, "step": 89160 }, { "epoch": 15.9, "learning_rate": 4.205296005706134e-05, "loss": 0.0014, "step": 89170 }, { "epoch": 15.9, "learning_rate": 4.205206847360913e-05, "loss": 0.0015, "step": 89180 }, { "epoch": 15.9, "learning_rate": 4.2051176890156915e-05, "loss": 0.0026, "step": 89190 }, { "epoch": 15.91, "learning_rate": 4.205028530670471e-05, "loss": 0.0021, "step": 89200 }, { "epoch": 15.91, "learning_rate": 4.20493937232525e-05, "loss": 0.004, "step": 89210 }, { "epoch": 15.91, "learning_rate": 4.2048591298145505e-05, "loss": 0.0052, "step": 89220 }, { "epoch": 15.91, "learning_rate": 4.20476997146933e-05, "loss": 0.0025, "step": 89230 }, { "epoch": 15.91, "learning_rate": 4.204680813124109e-05, "loss": 0.0024, "step": 89240 }, { "epoch": 15.91, "learning_rate": 4.204591654778888e-05, "loss": 0.003, "step": 89250 }, { "epoch": 15.92, "learning_rate": 4.2045024964336664e-05, "loss": 0.0035, "step": 89260 }, { "epoch": 15.92, "learning_rate": 4.2044133380884455e-05, "loss": 0.0026, "step": 89270 }, { "epoch": 15.92, "learning_rate": 4.204324179743224e-05, "loss": 0.0021, "step": 89280 }, { "epoch": 15.92, "learning_rate": 4.204235021398003e-05, "loss": 0.0034, "step": 89290 }, { "epoch": 15.92, "learning_rate": 4.204145863052782e-05, "loss": 0.0062, "step": 89300 }, { "epoch": 15.93, "learning_rate": 4.2040567047075606e-05, "loss": 0.0016, "step": 89310 }, { "epoch": 15.93, "learning_rate": 4.20396754636234e-05, "loss": 0.0038, "step": 89320 }, { "epoch": 15.93, "learning_rate": 4.203878388017118e-05, "loss": 0.0026, "step": 89330 }, { "epoch": 15.93, "learning_rate": 4.203789229671897e-05, "loss": 0.0019, "step": 89340 }, { "epoch": 15.93, "learning_rate": 4.2037000713266764e-05, "loss": 0.0021, "step": 89350 }, { "epoch": 15.93, "learning_rate": 4.2036109129814555e-05, "loss": 0.0029, "step": 89360 }, { "epoch": 15.94, "learning_rate": 4.2035217546362347e-05, "loss": 0.0021, "step": 89370 }, { "epoch": 15.94, "learning_rate": 4.203432596291013e-05, "loss": 0.0021, "step": 89380 }, { "epoch": 15.94, "learning_rate": 4.203343437945792e-05, "loss": 0.0031, "step": 89390 }, { "epoch": 15.94, "learning_rate": 4.2032542796005707e-05, "loss": 0.002, "step": 89400 }, { "epoch": 15.94, "learning_rate": 4.20316512125535e-05, "loss": 0.0022, "step": 89410 }, { "epoch": 15.95, "learning_rate": 4.203075962910128e-05, "loss": 0.0041, "step": 89420 }, { "epoch": 15.95, "learning_rate": 4.2029868045649073e-05, "loss": 0.0045, "step": 89430 }, { "epoch": 15.95, "learning_rate": 4.2028976462196865e-05, "loss": 0.0032, "step": 89440 }, { "epoch": 15.95, "learning_rate": 4.202808487874465e-05, "loss": 0.0036, "step": 89450 }, { "epoch": 15.95, "learning_rate": 4.202719329529245e-05, "loss": 0.002, "step": 89460 }, { "epoch": 15.95, "learning_rate": 4.202630171184023e-05, "loss": 0.0028, "step": 89470 }, { "epoch": 15.96, "learning_rate": 4.202541012838802e-05, "loss": 0.003, "step": 89480 }, { "epoch": 15.96, "learning_rate": 4.202451854493581e-05, "loss": 0.0035, "step": 89490 }, { "epoch": 15.96, "learning_rate": 4.20236269614836e-05, "loss": 0.0014, "step": 89500 }, { "epoch": 15.96, "learning_rate": 4.202273537803138e-05, "loss": 0.0022, "step": 89510 }, { "epoch": 15.96, "learning_rate": 4.2021843794579174e-05, "loss": 0.0046, "step": 89520 }, { "epoch": 15.96, "learning_rate": 4.2020952211126965e-05, "loss": 0.0021, "step": 89530 }, { "epoch": 15.97, "learning_rate": 4.202006062767475e-05, "loss": 0.003, "step": 89540 }, { "epoch": 15.97, "learning_rate": 4.201916904422254e-05, "loss": 0.0008, "step": 89550 }, { "epoch": 15.97, "learning_rate": 4.2018277460770325e-05, "loss": 0.0014, "step": 89560 }, { "epoch": 15.97, "learning_rate": 4.201738587731812e-05, "loss": 0.0031, "step": 89570 }, { "epoch": 15.97, "learning_rate": 4.201649429386591e-05, "loss": 0.0028, "step": 89580 }, { "epoch": 15.98, "learning_rate": 4.20156027104137e-05, "loss": 0.0039, "step": 89590 }, { "epoch": 15.98, "learning_rate": 4.201471112696149e-05, "loss": 0.0061, "step": 89600 }, { "epoch": 15.98, "learning_rate": 4.2013819543509275e-05, "loss": 0.0016, "step": 89610 }, { "epoch": 15.98, "learning_rate": 4.2012927960057066e-05, "loss": 0.0028, "step": 89620 }, { "epoch": 15.98, "learning_rate": 4.201203637660485e-05, "loss": 0.0025, "step": 89630 }, { "epoch": 15.98, "learning_rate": 4.201114479315264e-05, "loss": 0.0018, "step": 89640 }, { "epoch": 15.99, "learning_rate": 4.2010253209700426e-05, "loss": 0.0018, "step": 89650 }, { "epoch": 15.99, "learning_rate": 4.200936162624822e-05, "loss": 0.0017, "step": 89660 }, { "epoch": 15.99, "learning_rate": 4.200847004279601e-05, "loss": 0.0019, "step": 89670 }, { "epoch": 15.99, "learning_rate": 4.20075784593438e-05, "loss": 0.0048, "step": 89680 }, { "epoch": 15.99, "learning_rate": 4.200668687589159e-05, "loss": 0.0029, "step": 89690 }, { "epoch": 16.0, "learning_rate": 4.2005795292439375e-05, "loss": 0.0022, "step": 89700 }, { "epoch": 16.0, "learning_rate": 4.2004903708987166e-05, "loss": 0.0031, "step": 89710 }, { "epoch": 16.0, "learning_rate": 4.200401212553495e-05, "loss": 0.001, "step": 89720 }, { "epoch": 16.0, "eval_loss": 0.020624889060854912, "eval_runtime": 195.8925, "eval_samples_per_second": 23.681, "eval_steps_per_second": 2.961, "step": 89728 }, { "epoch": 16.0, "step": 89728, "total_flos": 8.436933281646816e+19, "train_loss": 0.006225432415173189, "train_runtime": 115573.2859, "train_samples_per_second": 38.819, "train_steps_per_second": 4.852 } ], "max_steps": 560800, "num_train_epochs": 100, "total_flos": 8.436933281646816e+19, "trial_name": null, "trial_params": null }