{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "global_step": 1598244, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0002999061469963284, "loss": 9.0229, "step": 500 }, { "epoch": 0.0, "learning_rate": 0.00029981229399265693, "loss": 8.3193, "step": 1000 }, { "epoch": 0.0, "learning_rate": 0.0002997184409889854, "loss": 8.0421, "step": 1500 }, { "epoch": 0.0, "learning_rate": 0.00029962458798531383, "loss": 7.8282, "step": 2000 }, { "epoch": 0.0, "learning_rate": 0.00029953073498164233, "loss": 7.6413, "step": 2500 }, { "epoch": 0.01, "learning_rate": 0.0002994368819779708, "loss": 7.4768, "step": 3000 }, { "epoch": 0.01, "learning_rate": 0.00029934302897429923, "loss": 7.3366, "step": 3500 }, { "epoch": 0.01, "learning_rate": 0.00029924917597062774, "loss": 7.1968, "step": 4000 }, { "epoch": 0.01, "learning_rate": 0.0002991553229669562, "loss": 7.0463, "step": 4500 }, { "epoch": 0.01, "learning_rate": 0.0002990614699632847, "loss": 6.9269, "step": 5000 }, { "epoch": 0.01, "learning_rate": 0.00029896761695961314, "loss": 6.796, "step": 5500 }, { "epoch": 0.01, "learning_rate": 0.00029887376395594165, "loss": 6.7006, "step": 6000 }, { "epoch": 0.01, "learning_rate": 0.0002987799109522701, "loss": 6.5999, "step": 6500 }, { "epoch": 0.01, "learning_rate": 0.00029868605794859855, "loss": 6.516, "step": 7000 }, { "epoch": 0.01, "learning_rate": 0.00029859220494492705, "loss": 6.4314, "step": 7500 }, { "epoch": 0.02, "learning_rate": 0.0002984983519412555, "loss": 6.3386, "step": 8000 }, { "epoch": 0.02, "learning_rate": 0.00029840449893758395, "loss": 6.2853, "step": 8500 }, { "epoch": 0.02, "learning_rate": 0.00029831064593391246, "loss": 6.2242, "step": 9000 }, { "epoch": 0.02, "learning_rate": 0.0002982167929302409, "loss": 6.1438, "step": 9500 }, { "epoch": 0.02, "learning_rate": 0.00029812293992656936, "loss": 6.1042, "step": 10000 }, { "epoch": 0.02, "learning_rate": 0.00029802908692289786, "loss": 6.0451, "step": 10500 }, { "epoch": 0.02, "learning_rate": 0.0002979352339192263, "loss": 5.9868, "step": 11000 }, { "epoch": 0.02, "learning_rate": 0.00029784138091555476, "loss": 5.9375, "step": 11500 }, { "epoch": 0.02, "learning_rate": 0.00029774752791188327, "loss": 5.896, "step": 12000 }, { "epoch": 0.02, "learning_rate": 0.0002976536749082117, "loss": 5.8403, "step": 12500 }, { "epoch": 0.02, "learning_rate": 0.00029755982190454017, "loss": 5.8008, "step": 13000 }, { "epoch": 0.03, "learning_rate": 0.0002974659689008687, "loss": 5.7554, "step": 13500 }, { "epoch": 0.03, "learning_rate": 0.0002973721158971971, "loss": 5.726, "step": 14000 }, { "epoch": 0.03, "learning_rate": 0.00029727826289352563, "loss": 5.6831, "step": 14500 }, { "epoch": 0.03, "learning_rate": 0.0002971844098898541, "loss": 5.6498, "step": 15000 }, { "epoch": 0.03, "learning_rate": 0.0002970905568861826, "loss": 5.6124, "step": 15500 }, { "epoch": 0.03, "learning_rate": 0.00029699670388251103, "loss": 5.5981, "step": 16000 }, { "epoch": 0.03, "learning_rate": 0.00029690285087883954, "loss": 5.5783, "step": 16500 }, { "epoch": 0.03, "learning_rate": 0.000296808997875168, "loss": 5.543, "step": 17000 }, { "epoch": 0.03, "learning_rate": 0.00029671514487149644, "loss": 5.5149, "step": 17500 }, { "epoch": 0.03, "learning_rate": 0.00029662129186782494, "loss": 5.4884, "step": 18000 }, { "epoch": 0.03, "learning_rate": 0.0002965274388641534, "loss": 5.4705, "step": 18500 }, { "epoch": 0.04, "learning_rate": 0.00029643358586048184, "loss": 5.455, "step": 19000 }, { "epoch": 0.04, "learning_rate": 0.0002963397328568103, "loss": 5.4311, "step": 19500 }, { "epoch": 0.04, "learning_rate": 0.0002962458798531388, "loss": 5.4137, "step": 20000 }, { "epoch": 0.04, "learning_rate": 0.00029615202684946725, "loss": 5.3846, "step": 20500 }, { "epoch": 0.04, "learning_rate": 0.0002960581738457957, "loss": 5.3621, "step": 21000 }, { "epoch": 0.04, "learning_rate": 0.0002959643208421242, "loss": 5.3677, "step": 21500 }, { "epoch": 0.04, "learning_rate": 0.00029587046783845265, "loss": 5.3455, "step": 22000 }, { "epoch": 0.04, "learning_rate": 0.00029577661483478116, "loss": 5.3252, "step": 22500 }, { "epoch": 0.04, "learning_rate": 0.0002956827618311096, "loss": 5.3115, "step": 23000 }, { "epoch": 0.04, "learning_rate": 0.00029558890882743806, "loss": 5.3063, "step": 23500 }, { "epoch": 0.05, "learning_rate": 0.00029549505582376656, "loss": 5.2829, "step": 24000 }, { "epoch": 0.05, "learning_rate": 0.000295401202820095, "loss": 5.2593, "step": 24500 }, { "epoch": 0.05, "learning_rate": 0.0002953073498164235, "loss": 5.2641, "step": 25000 }, { "epoch": 0.05, "learning_rate": 0.00029521349681275197, "loss": 5.2502, "step": 25500 }, { "epoch": 0.05, "learning_rate": 0.0002951196438090805, "loss": 5.2255, "step": 26000 }, { "epoch": 0.05, "learning_rate": 0.0002950257908054089, "loss": 5.2052, "step": 26500 }, { "epoch": 0.05, "learning_rate": 0.0002949319378017374, "loss": 5.1995, "step": 27000 }, { "epoch": 0.05, "learning_rate": 0.0002948380847980659, "loss": 5.1872, "step": 27500 }, { "epoch": 0.05, "learning_rate": 0.00029474423179439433, "loss": 5.2049, "step": 28000 }, { "epoch": 0.05, "learning_rate": 0.0002946503787907228, "loss": 5.1725, "step": 28500 }, { "epoch": 0.05, "learning_rate": 0.0002945565257870513, "loss": 5.176, "step": 29000 }, { "epoch": 0.06, "learning_rate": 0.00029446267278337973, "loss": 5.1581, "step": 29500 }, { "epoch": 0.06, "learning_rate": 0.0002943688197797082, "loss": 5.1469, "step": 30000 }, { "epoch": 0.06, "learning_rate": 0.00029427496677603664, "loss": 5.1353, "step": 30500 }, { "epoch": 0.06, "learning_rate": 0.00029418111377236514, "loss": 5.1279, "step": 31000 }, { "epoch": 0.06, "learning_rate": 0.0002940872607686936, "loss": 5.1085, "step": 31500 }, { "epoch": 0.06, "learning_rate": 0.0002939934077650221, "loss": 5.1187, "step": 32000 }, { "epoch": 0.06, "learning_rate": 0.00029389955476135055, "loss": 5.0909, "step": 32500 }, { "epoch": 0.06, "learning_rate": 0.00029380570175767905, "loss": 5.0936, "step": 33000 }, { "epoch": 0.06, "learning_rate": 0.0002937118487540075, "loss": 5.0821, "step": 33500 }, { "epoch": 0.06, "learning_rate": 0.00029361799575033595, "loss": 5.0849, "step": 34000 }, { "epoch": 0.06, "learning_rate": 0.00029352414274666445, "loss": 5.0655, "step": 34500 }, { "epoch": 0.07, "learning_rate": 0.0002934302897429929, "loss": 5.0643, "step": 35000 }, { "epoch": 0.07, "learning_rate": 0.0002933364367393214, "loss": 5.0693, "step": 35500 }, { "epoch": 0.07, "learning_rate": 0.00029324258373564986, "loss": 5.0493, "step": 36000 }, { "epoch": 0.07, "learning_rate": 0.0002931487307319783, "loss": 5.0408, "step": 36500 }, { "epoch": 0.07, "learning_rate": 0.0002930548777283068, "loss": 5.0339, "step": 37000 }, { "epoch": 0.07, "learning_rate": 0.00029296102472463526, "loss": 5.0291, "step": 37500 }, { "epoch": 0.07, "learning_rate": 0.0002928671717209637, "loss": 5.0077, "step": 38000 }, { "epoch": 0.07, "learning_rate": 0.0002927733187172922, "loss": 5.0201, "step": 38500 }, { "epoch": 0.07, "learning_rate": 0.00029267946571362067, "loss": 5.0042, "step": 39000 }, { "epoch": 0.07, "learning_rate": 0.0002925856127099491, "loss": 4.9954, "step": 39500 }, { "epoch": 0.08, "learning_rate": 0.0002924917597062776, "loss": 5.0108, "step": 40000 }, { "epoch": 0.08, "learning_rate": 0.0002923979067026061, "loss": 4.9867, "step": 40500 }, { "epoch": 0.08, "learning_rate": 0.0002923040536989345, "loss": 4.9779, "step": 41000 }, { "epoch": 0.08, "learning_rate": 0.00029221020069526303, "loss": 4.9828, "step": 41500 }, { "epoch": 0.08, "learning_rate": 0.0002921163476915915, "loss": 4.9588, "step": 42000 }, { "epoch": 0.08, "learning_rate": 0.00029202249468792, "loss": 4.9711, "step": 42500 }, { "epoch": 0.08, "learning_rate": 0.00029192864168424844, "loss": 4.9616, "step": 43000 }, { "epoch": 0.08, "learning_rate": 0.00029183478868057694, "loss": 4.9597, "step": 43500 }, { "epoch": 0.08, "learning_rate": 0.0002917409356769054, "loss": 4.9501, "step": 44000 }, { "epoch": 0.08, "learning_rate": 0.00029164708267323384, "loss": 4.9507, "step": 44500 }, { "epoch": 0.08, "learning_rate": 0.00029155322966956234, "loss": 4.9347, "step": 45000 }, { "epoch": 0.09, "learning_rate": 0.0002914593766658908, "loss": 4.9387, "step": 45500 }, { "epoch": 0.09, "learning_rate": 0.00029136552366221925, "loss": 4.9312, "step": 46000 }, { "epoch": 0.09, "learning_rate": 0.00029127167065854775, "loss": 4.9258, "step": 46500 }, { "epoch": 0.09, "learning_rate": 0.0002911778176548762, "loss": 4.9162, "step": 47000 }, { "epoch": 0.09, "learning_rate": 0.00029108396465120465, "loss": 4.9197, "step": 47500 }, { "epoch": 0.09, "learning_rate": 0.00029099011164753316, "loss": 4.8996, "step": 48000 }, { "epoch": 0.09, "learning_rate": 0.0002908962586438616, "loss": 4.8886, "step": 48500 }, { "epoch": 0.09, "learning_rate": 0.00029080240564019006, "loss": 4.9008, "step": 49000 }, { "epoch": 0.09, "learning_rate": 0.00029070855263651856, "loss": 4.9134, "step": 49500 }, { "epoch": 0.09, "learning_rate": 0.000290614699632847, "loss": 4.8817, "step": 50000 }, { "epoch": 0.09, "learning_rate": 0.00029052084662917546, "loss": 4.8882, "step": 50500 }, { "epoch": 0.1, "learning_rate": 0.00029042699362550397, "loss": 4.8738, "step": 51000 }, { "epoch": 0.1, "learning_rate": 0.0002903331406218324, "loss": 4.8696, "step": 51500 }, { "epoch": 0.1, "learning_rate": 0.0002902392876181609, "loss": 4.8936, "step": 52000 }, { "epoch": 0.1, "learning_rate": 0.00029014543461448937, "loss": 4.8812, "step": 52500 }, { "epoch": 0.1, "learning_rate": 0.0002900515816108179, "loss": 4.8556, "step": 53000 }, { "epoch": 0.1, "learning_rate": 0.0002899577286071463, "loss": 4.8639, "step": 53500 }, { "epoch": 0.1, "learning_rate": 0.00028986387560347483, "loss": 4.8762, "step": 54000 }, { "epoch": 0.1, "learning_rate": 0.0002897700225998033, "loss": 4.8768, "step": 54500 }, { "epoch": 0.1, "learning_rate": 0.00028967616959613173, "loss": 4.8507, "step": 55000 }, { "epoch": 0.1, "learning_rate": 0.0002895823165924602, "loss": 4.8578, "step": 55500 }, { "epoch": 0.11, "learning_rate": 0.0002894884635887887, "loss": 4.8623, "step": 56000 }, { "epoch": 0.11, "learning_rate": 0.00028939461058511714, "loss": 4.8503, "step": 56500 }, { "epoch": 0.11, "learning_rate": 0.0002893007575814456, "loss": 4.83, "step": 57000 }, { "epoch": 0.11, "learning_rate": 0.0002892069045777741, "loss": 4.8259, "step": 57500 }, { "epoch": 0.11, "learning_rate": 0.00028911305157410254, "loss": 4.8262, "step": 58000 }, { "epoch": 0.11, "learning_rate": 0.000289019198570431, "loss": 4.8296, "step": 58500 }, { "epoch": 0.11, "learning_rate": 0.0002889253455667595, "loss": 4.8132, "step": 59000 }, { "epoch": 0.11, "learning_rate": 0.00028883149256308795, "loss": 4.8128, "step": 59500 }, { "epoch": 0.11, "learning_rate": 0.0002887376395594164, "loss": 4.8257, "step": 60000 }, { "epoch": 0.11, "learning_rate": 0.0002886437865557449, "loss": 4.8056, "step": 60500 }, { "epoch": 0.11, "learning_rate": 0.00028854993355207335, "loss": 4.8002, "step": 61000 }, { "epoch": 0.12, "learning_rate": 0.00028845608054840186, "loss": 4.8176, "step": 61500 }, { "epoch": 0.12, "learning_rate": 0.0002883622275447303, "loss": 4.8031, "step": 62000 }, { "epoch": 0.12, "learning_rate": 0.0002882683745410588, "loss": 4.7931, "step": 62500 }, { "epoch": 0.12, "learning_rate": 0.00028817452153738726, "loss": 4.7996, "step": 63000 }, { "epoch": 0.12, "learning_rate": 0.00028808066853371577, "loss": 4.7932, "step": 63500 }, { "epoch": 0.12, "learning_rate": 0.0002879868155300442, "loss": 4.7942, "step": 64000 }, { "epoch": 0.12, "learning_rate": 0.00028789296252637267, "loss": 4.7629, "step": 64500 }, { "epoch": 0.12, "learning_rate": 0.00028779910952270117, "loss": 4.7946, "step": 65000 }, { "epoch": 0.12, "learning_rate": 0.0002877052565190296, "loss": 4.7972, "step": 65500 }, { "epoch": 0.12, "learning_rate": 0.00028761140351535807, "loss": 4.7747, "step": 66000 }, { "epoch": 0.12, "learning_rate": 0.0002875175505116866, "loss": 4.7656, "step": 66500 }, { "epoch": 0.13, "learning_rate": 0.000287423697508015, "loss": 4.7701, "step": 67000 }, { "epoch": 0.13, "learning_rate": 0.0002873298445043435, "loss": 4.7587, "step": 67500 }, { "epoch": 0.13, "learning_rate": 0.0002872359915006719, "loss": 4.7727, "step": 68000 }, { "epoch": 0.13, "learning_rate": 0.00028714213849700043, "loss": 4.7734, "step": 68500 }, { "epoch": 0.13, "learning_rate": 0.0002870482854933289, "loss": 4.7839, "step": 69000 }, { "epoch": 0.13, "learning_rate": 0.0002869544324896574, "loss": 4.7712, "step": 69500 }, { "epoch": 0.13, "learning_rate": 0.00028686057948598584, "loss": 4.7754, "step": 70000 }, { "epoch": 0.13, "learning_rate": 0.0002867667264823143, "loss": 4.7686, "step": 70500 }, { "epoch": 0.13, "learning_rate": 0.0002866728734786428, "loss": 4.7434, "step": 71000 }, { "epoch": 0.13, "learning_rate": 0.00028657902047497124, "loss": 4.7537, "step": 71500 }, { "epoch": 0.14, "learning_rate": 0.00028648516747129975, "loss": 4.7555, "step": 72000 }, { "epoch": 0.14, "learning_rate": 0.0002863913144676282, "loss": 4.7473, "step": 72500 }, { "epoch": 0.14, "learning_rate": 0.0002862974614639567, "loss": 4.7394, "step": 73000 }, { "epoch": 0.14, "learning_rate": 0.00028620360846028515, "loss": 4.7185, "step": 73500 }, { "epoch": 0.14, "learning_rate": 0.0002861097554566136, "loss": 4.7267, "step": 74000 }, { "epoch": 0.14, "learning_rate": 0.0002860159024529421, "loss": 4.7256, "step": 74500 }, { "epoch": 0.14, "learning_rate": 0.00028592204944927056, "loss": 4.7324, "step": 75000 }, { "epoch": 0.14, "learning_rate": 0.000285828196445599, "loss": 4.7389, "step": 75500 }, { "epoch": 0.14, "learning_rate": 0.0002857343434419275, "loss": 4.7295, "step": 76000 }, { "epoch": 0.14, "learning_rate": 0.00028564049043825596, "loss": 4.7122, "step": 76500 }, { "epoch": 0.14, "learning_rate": 0.0002855466374345844, "loss": 4.7236, "step": 77000 }, { "epoch": 0.15, "learning_rate": 0.0002854527844309129, "loss": 4.7286, "step": 77500 }, { "epoch": 0.15, "learning_rate": 0.00028535893142724137, "loss": 4.7192, "step": 78000 }, { "epoch": 0.15, "learning_rate": 0.0002852650784235698, "loss": 4.7253, "step": 78500 }, { "epoch": 0.15, "learning_rate": 0.0002851712254198983, "loss": 4.7103, "step": 79000 }, { "epoch": 0.15, "learning_rate": 0.00028507737241622677, "loss": 4.7163, "step": 79500 }, { "epoch": 0.15, "learning_rate": 0.0002849835194125553, "loss": 4.7043, "step": 80000 }, { "epoch": 0.15, "learning_rate": 0.0002848896664088837, "loss": 4.6942, "step": 80500 }, { "epoch": 0.15, "learning_rate": 0.0002847958134052122, "loss": 4.7068, "step": 81000 }, { "epoch": 0.15, "learning_rate": 0.0002847019604015407, "loss": 4.7219, "step": 81500 }, { "epoch": 0.15, "learning_rate": 0.00028460810739786913, "loss": 4.6885, "step": 82000 }, { "epoch": 0.15, "learning_rate": 0.00028451425439419764, "loss": 4.7104, "step": 82500 }, { "epoch": 0.16, "learning_rate": 0.0002844204013905261, "loss": 4.7067, "step": 83000 }, { "epoch": 0.16, "learning_rate": 0.00028432654838685454, "loss": 4.7005, "step": 83500 }, { "epoch": 0.16, "learning_rate": 0.00028423269538318304, "loss": 4.6733, "step": 84000 }, { "epoch": 0.16, "learning_rate": 0.0002841388423795115, "loss": 4.6877, "step": 84500 }, { "epoch": 0.16, "learning_rate": 0.00028404498937583994, "loss": 4.6818, "step": 85000 }, { "epoch": 0.16, "learning_rate": 0.00028395113637216845, "loss": 4.6662, "step": 85500 }, { "epoch": 0.16, "learning_rate": 0.0002838572833684969, "loss": 4.6931, "step": 86000 }, { "epoch": 0.16, "learning_rate": 0.00028376343036482535, "loss": 4.6788, "step": 86500 }, { "epoch": 0.16, "learning_rate": 0.00028366957736115385, "loss": 4.6833, "step": 87000 }, { "epoch": 0.16, "learning_rate": 0.0002835757243574823, "loss": 4.6715, "step": 87500 }, { "epoch": 0.17, "learning_rate": 0.00028348187135381075, "loss": 4.6749, "step": 88000 }, { "epoch": 0.17, "learning_rate": 0.00028338801835013926, "loss": 4.6802, "step": 88500 }, { "epoch": 0.17, "learning_rate": 0.0002832941653464677, "loss": 4.6582, "step": 89000 }, { "epoch": 0.17, "learning_rate": 0.0002832003123427962, "loss": 4.6841, "step": 89500 }, { "epoch": 0.17, "learning_rate": 0.00028310645933912466, "loss": 4.6766, "step": 90000 }, { "epoch": 0.17, "learning_rate": 0.00028301260633545317, "loss": 4.6729, "step": 90500 }, { "epoch": 0.17, "learning_rate": 0.0002829187533317816, "loss": 4.6545, "step": 91000 }, { "epoch": 0.17, "learning_rate": 0.00028282490032811007, "loss": 4.668, "step": 91500 }, { "epoch": 0.17, "learning_rate": 0.00028273104732443857, "loss": 4.6747, "step": 92000 }, { "epoch": 0.17, "learning_rate": 0.000282637194320767, "loss": 4.6719, "step": 92500 }, { "epoch": 0.17, "learning_rate": 0.00028254334131709547, "loss": 4.642, "step": 93000 }, { "epoch": 0.18, "learning_rate": 0.000282449488313424, "loss": 4.6562, "step": 93500 }, { "epoch": 0.18, "learning_rate": 0.0002823556353097524, "loss": 4.6628, "step": 94000 }, { "epoch": 0.18, "learning_rate": 0.0002822617823060809, "loss": 4.6466, "step": 94500 }, { "epoch": 0.18, "learning_rate": 0.0002821679293024094, "loss": 4.6609, "step": 95000 }, { "epoch": 0.18, "learning_rate": 0.00028207407629873783, "loss": 4.6409, "step": 95500 }, { "epoch": 0.18, "learning_rate": 0.0002819802232950663, "loss": 4.6591, "step": 96000 }, { "epoch": 0.18, "learning_rate": 0.0002818863702913948, "loss": 4.6383, "step": 96500 }, { "epoch": 0.18, "learning_rate": 0.00028179251728772324, "loss": 4.6406, "step": 97000 }, { "epoch": 0.18, "learning_rate": 0.0002816986642840517, "loss": 4.6608, "step": 97500 }, { "epoch": 0.18, "learning_rate": 0.0002816048112803802, "loss": 4.6444, "step": 98000 }, { "epoch": 0.18, "learning_rate": 0.00028151095827670864, "loss": 4.6573, "step": 98500 }, { "epoch": 0.19, "learning_rate": 0.00028141710527303715, "loss": 4.646, "step": 99000 }, { "epoch": 0.19, "learning_rate": 0.0002813232522693656, "loss": 4.647, "step": 99500 }, { "epoch": 0.19, "learning_rate": 0.0002812293992656941, "loss": 4.6531, "step": 100000 }, { "epoch": 0.19, "learning_rate": 0.00028113554626202255, "loss": 4.6351, "step": 100500 }, { "epoch": 0.19, "learning_rate": 0.00028104169325835106, "loss": 4.6135, "step": 101000 }, { "epoch": 0.19, "learning_rate": 0.0002809478402546795, "loss": 4.6253, "step": 101500 }, { "epoch": 0.19, "learning_rate": 0.00028085398725100796, "loss": 4.6441, "step": 102000 }, { "epoch": 0.19, "learning_rate": 0.00028076013424733646, "loss": 4.6277, "step": 102500 }, { "epoch": 0.19, "learning_rate": 0.0002806662812436649, "loss": 4.622, "step": 103000 }, { "epoch": 0.19, "learning_rate": 0.00028057242823999336, "loss": 4.6247, "step": 103500 }, { "epoch": 0.2, "learning_rate": 0.0002804785752363218, "loss": 4.6346, "step": 104000 }, { "epoch": 0.2, "learning_rate": 0.0002803847222326503, "loss": 4.6262, "step": 104500 }, { "epoch": 0.2, "learning_rate": 0.00028029086922897877, "loss": 4.6191, "step": 105000 }, { "epoch": 0.2, "learning_rate": 0.0002801970162253072, "loss": 4.6266, "step": 105500 }, { "epoch": 0.2, "learning_rate": 0.0002801031632216357, "loss": 4.6181, "step": 106000 }, { "epoch": 0.2, "learning_rate": 0.00028000931021796417, "loss": 4.6199, "step": 106500 }, { "epoch": 0.2, "learning_rate": 0.0002799154572142926, "loss": 4.6123, "step": 107000 }, { "epoch": 0.2, "learning_rate": 0.00027982160421062113, "loss": 4.6061, "step": 107500 }, { "epoch": 0.2, "learning_rate": 0.0002797277512069496, "loss": 4.6127, "step": 108000 }, { "epoch": 0.2, "learning_rate": 0.0002796338982032781, "loss": 4.6098, "step": 108500 }, { "epoch": 0.2, "learning_rate": 0.00027954004519960653, "loss": 4.5959, "step": 109000 }, { "epoch": 0.21, "learning_rate": 0.00027944619219593504, "loss": 4.6217, "step": 109500 }, { "epoch": 0.21, "learning_rate": 0.0002793523391922635, "loss": 4.6196, "step": 110000 }, { "epoch": 0.21, "learning_rate": 0.000279258486188592, "loss": 4.613, "step": 110500 }, { "epoch": 0.21, "learning_rate": 0.00027916463318492044, "loss": 4.5969, "step": 111000 }, { "epoch": 0.21, "learning_rate": 0.0002790707801812489, "loss": 4.5849, "step": 111500 }, { "epoch": 0.21, "learning_rate": 0.0002789769271775774, "loss": 4.5936, "step": 112000 }, { "epoch": 0.21, "learning_rate": 0.00027888307417390585, "loss": 4.5909, "step": 112500 }, { "epoch": 0.21, "learning_rate": 0.0002787892211702343, "loss": 4.5804, "step": 113000 }, { "epoch": 0.21, "learning_rate": 0.0002786953681665628, "loss": 4.5991, "step": 113500 }, { "epoch": 0.21, "learning_rate": 0.00027860151516289125, "loss": 4.5796, "step": 114000 }, { "epoch": 0.21, "learning_rate": 0.0002785076621592197, "loss": 4.6, "step": 114500 }, { "epoch": 0.22, "learning_rate": 0.00027841380915554815, "loss": 4.5904, "step": 115000 }, { "epoch": 0.22, "learning_rate": 0.00027831995615187666, "loss": 4.6135, "step": 115500 }, { "epoch": 0.22, "learning_rate": 0.0002782261031482051, "loss": 4.5849, "step": 116000 }, { "epoch": 0.22, "learning_rate": 0.0002781322501445336, "loss": 4.5934, "step": 116500 }, { "epoch": 0.22, "learning_rate": 0.00027803839714086206, "loss": 4.5789, "step": 117000 }, { "epoch": 0.22, "learning_rate": 0.0002779445441371905, "loss": 4.6019, "step": 117500 }, { "epoch": 0.22, "learning_rate": 0.000277850691133519, "loss": 4.5784, "step": 118000 }, { "epoch": 0.22, "learning_rate": 0.00027775683812984747, "loss": 4.5894, "step": 118500 }, { "epoch": 0.22, "learning_rate": 0.00027766298512617597, "loss": 4.5661, "step": 119000 }, { "epoch": 0.22, "learning_rate": 0.0002775691321225044, "loss": 4.5731, "step": 119500 }, { "epoch": 0.23, "learning_rate": 0.00027747527911883293, "loss": 4.5879, "step": 120000 }, { "epoch": 0.23, "learning_rate": 0.0002773814261151614, "loss": 4.5668, "step": 120500 }, { "epoch": 0.23, "learning_rate": 0.00027728757311148983, "loss": 4.5854, "step": 121000 }, { "epoch": 0.23, "learning_rate": 0.00027719372010781833, "loss": 4.5802, "step": 121500 }, { "epoch": 0.23, "learning_rate": 0.0002770998671041468, "loss": 4.5791, "step": 122000 }, { "epoch": 0.23, "learning_rate": 0.00027700601410047523, "loss": 4.5802, "step": 122500 }, { "epoch": 0.23, "learning_rate": 0.00027691216109680374, "loss": 4.5772, "step": 123000 }, { "epoch": 0.23, "learning_rate": 0.0002768183080931322, "loss": 4.5704, "step": 123500 }, { "epoch": 0.23, "learning_rate": 0.00027672445508946064, "loss": 4.5865, "step": 124000 }, { "epoch": 0.23, "learning_rate": 0.00027663060208578914, "loss": 4.5698, "step": 124500 }, { "epoch": 0.23, "learning_rate": 0.0002765367490821176, "loss": 4.58, "step": 125000 }, { "epoch": 0.24, "learning_rate": 0.00027644289607844604, "loss": 4.5589, "step": 125500 }, { "epoch": 0.24, "learning_rate": 0.00027634904307477455, "loss": 4.557, "step": 126000 }, { "epoch": 0.24, "learning_rate": 0.000276255190071103, "loss": 4.5452, "step": 126500 }, { "epoch": 0.24, "learning_rate": 0.0002761613370674315, "loss": 4.5473, "step": 127000 }, { "epoch": 0.24, "learning_rate": 0.00027606748406375995, "loss": 4.5498, "step": 127500 }, { "epoch": 0.24, "learning_rate": 0.0002759736310600884, "loss": 4.5575, "step": 128000 }, { "epoch": 0.24, "learning_rate": 0.0002758797780564169, "loss": 4.5452, "step": 128500 }, { "epoch": 0.24, "learning_rate": 0.00027578592505274536, "loss": 4.5502, "step": 129000 }, { "epoch": 0.24, "learning_rate": 0.00027569207204907386, "loss": 4.5673, "step": 129500 }, { "epoch": 0.24, "learning_rate": 0.0002755982190454023, "loss": 4.5635, "step": 130000 }, { "epoch": 0.24, "learning_rate": 0.00027550436604173076, "loss": 4.556, "step": 130500 }, { "epoch": 0.25, "learning_rate": 0.00027541051303805927, "loss": 4.5591, "step": 131000 }, { "epoch": 0.25, "learning_rate": 0.0002753166600343877, "loss": 4.5233, "step": 131500 }, { "epoch": 0.25, "learning_rate": 0.00027522280703071617, "loss": 4.5415, "step": 132000 }, { "epoch": 0.25, "learning_rate": 0.0002751289540270447, "loss": 4.5755, "step": 132500 }, { "epoch": 0.25, "learning_rate": 0.0002750351010233731, "loss": 4.5587, "step": 133000 }, { "epoch": 0.25, "learning_rate": 0.0002749412480197016, "loss": 4.5436, "step": 133500 }, { "epoch": 0.25, "learning_rate": 0.0002748473950160301, "loss": 4.5312, "step": 134000 }, { "epoch": 0.25, "learning_rate": 0.00027475354201235853, "loss": 4.562, "step": 134500 }, { "epoch": 0.25, "learning_rate": 0.000274659689008687, "loss": 4.5392, "step": 135000 }, { "epoch": 0.25, "learning_rate": 0.0002745658360050155, "loss": 4.5207, "step": 135500 }, { "epoch": 0.26, "learning_rate": 0.00027447198300134393, "loss": 4.5521, "step": 136000 }, { "epoch": 0.26, "learning_rate": 0.00027437812999767244, "loss": 4.5322, "step": 136500 }, { "epoch": 0.26, "learning_rate": 0.0002742842769940009, "loss": 4.5305, "step": 137000 }, { "epoch": 0.26, "learning_rate": 0.0002741904239903294, "loss": 4.5166, "step": 137500 }, { "epoch": 0.26, "learning_rate": 0.00027409657098665784, "loss": 4.5248, "step": 138000 }, { "epoch": 0.26, "learning_rate": 0.0002740027179829863, "loss": 4.5413, "step": 138500 }, { "epoch": 0.26, "learning_rate": 0.0002739088649793148, "loss": 4.5441, "step": 139000 }, { "epoch": 0.26, "learning_rate": 0.00027381501197564325, "loss": 4.5325, "step": 139500 }, { "epoch": 0.26, "learning_rate": 0.0002737211589719717, "loss": 4.5529, "step": 140000 }, { "epoch": 0.26, "learning_rate": 0.0002736273059683002, "loss": 4.5275, "step": 140500 }, { "epoch": 0.26, "learning_rate": 0.00027353345296462865, "loss": 4.543, "step": 141000 }, { "epoch": 0.27, "learning_rate": 0.0002734395999609571, "loss": 4.5283, "step": 141500 }, { "epoch": 0.27, "learning_rate": 0.0002733457469572856, "loss": 4.5547, "step": 142000 }, { "epoch": 0.27, "learning_rate": 0.00027325189395361406, "loss": 4.523, "step": 142500 }, { "epoch": 0.27, "learning_rate": 0.0002731580409499425, "loss": 4.5202, "step": 143000 }, { "epoch": 0.27, "learning_rate": 0.000273064187946271, "loss": 4.5235, "step": 143500 }, { "epoch": 0.27, "learning_rate": 0.00027297033494259946, "loss": 4.5292, "step": 144000 }, { "epoch": 0.27, "learning_rate": 0.0002728764819389279, "loss": 4.5304, "step": 144500 }, { "epoch": 0.27, "learning_rate": 0.0002727826289352564, "loss": 4.5203, "step": 145000 }, { "epoch": 0.27, "learning_rate": 0.00027268877593158487, "loss": 4.5426, "step": 145500 }, { "epoch": 0.27, "learning_rate": 0.0002725949229279134, "loss": 4.5062, "step": 146000 }, { "epoch": 0.27, "learning_rate": 0.0002725010699242418, "loss": 4.5218, "step": 146500 }, { "epoch": 0.28, "learning_rate": 0.00027240721692057033, "loss": 4.5159, "step": 147000 }, { "epoch": 0.28, "learning_rate": 0.0002723133639168988, "loss": 4.5134, "step": 147500 }, { "epoch": 0.28, "learning_rate": 0.0002722195109132273, "loss": 4.5127, "step": 148000 }, { "epoch": 0.28, "learning_rate": 0.00027212565790955573, "loss": 4.5214, "step": 148500 }, { "epoch": 0.28, "learning_rate": 0.0002720318049058842, "loss": 4.5216, "step": 149000 }, { "epoch": 0.28, "learning_rate": 0.0002719379519022127, "loss": 4.5274, "step": 149500 }, { "epoch": 0.28, "learning_rate": 0.00027184409889854114, "loss": 4.5214, "step": 150000 }, { "epoch": 0.28, "learning_rate": 0.0002717502458948696, "loss": 4.5199, "step": 150500 }, { "epoch": 0.28, "learning_rate": 0.0002716563928911981, "loss": 4.498, "step": 151000 }, { "epoch": 0.28, "learning_rate": 0.00027156253988752654, "loss": 4.5243, "step": 151500 }, { "epoch": 0.29, "learning_rate": 0.000271468686883855, "loss": 4.5082, "step": 152000 }, { "epoch": 0.29, "learning_rate": 0.00027137483388018344, "loss": 4.4959, "step": 152500 }, { "epoch": 0.29, "learning_rate": 0.00027128098087651195, "loss": 4.4927, "step": 153000 }, { "epoch": 0.29, "learning_rate": 0.0002711871278728404, "loss": 4.4983, "step": 153500 }, { "epoch": 0.29, "learning_rate": 0.00027109327486916885, "loss": 4.5191, "step": 154000 }, { "epoch": 0.29, "learning_rate": 0.00027099942186549735, "loss": 4.4982, "step": 154500 }, { "epoch": 0.29, "learning_rate": 0.0002709055688618258, "loss": 4.5146, "step": 155000 }, { "epoch": 0.29, "learning_rate": 0.0002708117158581543, "loss": 4.5323, "step": 155500 }, { "epoch": 0.29, "learning_rate": 0.00027071786285448276, "loss": 4.4845, "step": 156000 }, { "epoch": 0.29, "learning_rate": 0.00027062400985081126, "loss": 4.5019, "step": 156500 }, { "epoch": 0.29, "learning_rate": 0.0002705301568471397, "loss": 4.5051, "step": 157000 }, { "epoch": 0.3, "learning_rate": 0.0002704363038434682, "loss": 4.4981, "step": 157500 }, { "epoch": 0.3, "learning_rate": 0.00027034245083979667, "loss": 4.5029, "step": 158000 }, { "epoch": 0.3, "learning_rate": 0.0002702485978361251, "loss": 4.4943, "step": 158500 }, { "epoch": 0.3, "learning_rate": 0.0002701547448324536, "loss": 4.5068, "step": 159000 }, { "epoch": 0.3, "learning_rate": 0.0002700608918287821, "loss": 4.5022, "step": 159500 }, { "epoch": 0.3, "learning_rate": 0.0002699670388251105, "loss": 4.5088, "step": 160000 }, { "epoch": 0.3, "learning_rate": 0.00026987318582143903, "loss": 4.4995, "step": 160500 }, { "epoch": 0.3, "learning_rate": 0.0002697793328177675, "loss": 4.482, "step": 161000 }, { "epoch": 0.3, "learning_rate": 0.00026968547981409593, "loss": 4.498, "step": 161500 }, { "epoch": 0.3, "learning_rate": 0.00026959162681042443, "loss": 4.4966, "step": 162000 }, { "epoch": 0.31, "learning_rate": 0.0002694977738067529, "loss": 4.4759, "step": 162500 }, { "epoch": 0.31, "learning_rate": 0.00026940392080308133, "loss": 4.4983, "step": 163000 }, { "epoch": 0.31, "learning_rate": 0.0002693100677994098, "loss": 4.501, "step": 163500 }, { "epoch": 0.31, "learning_rate": 0.0002692162147957383, "loss": 4.493, "step": 164000 }, { "epoch": 0.31, "learning_rate": 0.00026912236179206674, "loss": 4.5082, "step": 164500 }, { "epoch": 0.31, "learning_rate": 0.00026902850878839524, "loss": 4.4999, "step": 165000 }, { "epoch": 0.31, "learning_rate": 0.0002689346557847237, "loss": 4.499, "step": 165500 }, { "epoch": 0.31, "learning_rate": 0.0002688408027810522, "loss": 4.4951, "step": 166000 }, { "epoch": 0.31, "learning_rate": 0.00026874694977738065, "loss": 4.4889, "step": 166500 }, { "epoch": 0.31, "learning_rate": 0.00026865309677370915, "loss": 4.4975, "step": 167000 }, { "epoch": 0.31, "learning_rate": 0.0002685592437700376, "loss": 4.4951, "step": 167500 }, { "epoch": 0.32, "learning_rate": 0.00026846539076636605, "loss": 4.4833, "step": 168000 }, { "epoch": 0.32, "learning_rate": 0.00026837153776269456, "loss": 4.4934, "step": 168500 }, { "epoch": 0.32, "learning_rate": 0.000268277684759023, "loss": 4.4715, "step": 169000 }, { "epoch": 0.32, "learning_rate": 0.00026818383175535146, "loss": 4.4913, "step": 169500 }, { "epoch": 0.32, "learning_rate": 0.00026808997875167996, "loss": 4.4812, "step": 170000 }, { "epoch": 0.32, "learning_rate": 0.0002679961257480084, "loss": 4.4829, "step": 170500 }, { "epoch": 0.32, "learning_rate": 0.00026790227274433687, "loss": 4.4816, "step": 171000 }, { "epoch": 0.32, "learning_rate": 0.00026780841974066537, "loss": 4.4943, "step": 171500 }, { "epoch": 0.32, "learning_rate": 0.0002677145667369938, "loss": 4.4927, "step": 172000 }, { "epoch": 0.32, "learning_rate": 0.00026762071373332227, "loss": 4.4701, "step": 172500 }, { "epoch": 0.32, "learning_rate": 0.0002675268607296508, "loss": 4.4765, "step": 173000 }, { "epoch": 0.33, "learning_rate": 0.0002674330077259792, "loss": 4.4852, "step": 173500 }, { "epoch": 0.33, "learning_rate": 0.0002673391547223077, "loss": 4.4734, "step": 174000 }, { "epoch": 0.33, "learning_rate": 0.0002672453017186362, "loss": 4.487, "step": 174500 }, { "epoch": 0.33, "learning_rate": 0.00026715144871496463, "loss": 4.4866, "step": 175000 }, { "epoch": 0.33, "learning_rate": 0.00026705759571129313, "loss": 4.499, "step": 175500 }, { "epoch": 0.33, "learning_rate": 0.0002669637427076216, "loss": 4.4848, "step": 176000 }, { "epoch": 0.33, "learning_rate": 0.0002668698897039501, "loss": 4.4732, "step": 176500 }, { "epoch": 0.33, "learning_rate": 0.00026677603670027854, "loss": 4.4708, "step": 177000 }, { "epoch": 0.33, "learning_rate": 0.000266682183696607, "loss": 4.4751, "step": 177500 }, { "epoch": 0.33, "learning_rate": 0.0002665883306929355, "loss": 4.4744, "step": 178000 }, { "epoch": 0.34, "learning_rate": 0.00026649447768926394, "loss": 4.4781, "step": 178500 }, { "epoch": 0.34, "learning_rate": 0.0002664006246855924, "loss": 4.4573, "step": 179000 }, { "epoch": 0.34, "learning_rate": 0.0002663067716819209, "loss": 4.4974, "step": 179500 }, { "epoch": 0.34, "learning_rate": 0.00026621291867824935, "loss": 4.482, "step": 180000 }, { "epoch": 0.34, "learning_rate": 0.0002661190656745778, "loss": 4.4881, "step": 180500 }, { "epoch": 0.34, "learning_rate": 0.0002660252126709063, "loss": 4.4581, "step": 181000 }, { "epoch": 0.34, "learning_rate": 0.00026593135966723476, "loss": 4.4804, "step": 181500 }, { "epoch": 0.34, "learning_rate": 0.0002658375066635632, "loss": 4.4607, "step": 182000 }, { "epoch": 0.34, "learning_rate": 0.0002657436536598917, "loss": 4.4511, "step": 182500 }, { "epoch": 0.34, "learning_rate": 0.00026564980065622016, "loss": 4.4807, "step": 183000 }, { "epoch": 0.34, "learning_rate": 0.00026555594765254866, "loss": 4.455, "step": 183500 }, { "epoch": 0.35, "learning_rate": 0.0002654620946488771, "loss": 4.4794, "step": 184000 }, { "epoch": 0.35, "learning_rate": 0.00026536824164520557, "loss": 4.4458, "step": 184500 }, { "epoch": 0.35, "learning_rate": 0.00026527438864153407, "loss": 4.4586, "step": 185000 }, { "epoch": 0.35, "learning_rate": 0.0002651805356378625, "loss": 4.455, "step": 185500 }, { "epoch": 0.35, "learning_rate": 0.000265086682634191, "loss": 4.4608, "step": 186000 }, { "epoch": 0.35, "learning_rate": 0.0002649928296305195, "loss": 4.4719, "step": 186500 }, { "epoch": 0.35, "learning_rate": 0.000264898976626848, "loss": 4.4616, "step": 187000 }, { "epoch": 0.35, "learning_rate": 0.00026480512362317643, "loss": 4.4581, "step": 187500 }, { "epoch": 0.35, "learning_rate": 0.0002647112706195049, "loss": 4.4627, "step": 188000 }, { "epoch": 0.35, "learning_rate": 0.00026461741761583333, "loss": 4.4613, "step": 188500 }, { "epoch": 0.35, "learning_rate": 0.00026452356461216184, "loss": 4.4615, "step": 189000 }, { "epoch": 0.36, "learning_rate": 0.0002644297116084903, "loss": 4.4574, "step": 189500 }, { "epoch": 0.36, "learning_rate": 0.00026433585860481874, "loss": 4.4601, "step": 190000 }, { "epoch": 0.36, "learning_rate": 0.00026424200560114724, "loss": 4.4783, "step": 190500 }, { "epoch": 0.36, "learning_rate": 0.0002641481525974757, "loss": 4.4601, "step": 191000 }, { "epoch": 0.36, "learning_rate": 0.00026405429959380414, "loss": 4.4601, "step": 191500 }, { "epoch": 0.36, "learning_rate": 0.00026396044659013265, "loss": 4.4616, "step": 192000 }, { "epoch": 0.36, "learning_rate": 0.0002638665935864611, "loss": 4.4425, "step": 192500 }, { "epoch": 0.36, "learning_rate": 0.0002637727405827896, "loss": 4.454, "step": 193000 }, { "epoch": 0.36, "learning_rate": 0.00026367888757911805, "loss": 4.442, "step": 193500 }, { "epoch": 0.36, "learning_rate": 0.00026358503457544656, "loss": 4.4526, "step": 194000 }, { "epoch": 0.37, "learning_rate": 0.000263491181571775, "loss": 4.4622, "step": 194500 }, { "epoch": 0.37, "learning_rate": 0.00026339732856810346, "loss": 4.4352, "step": 195000 }, { "epoch": 0.37, "learning_rate": 0.00026330347556443196, "loss": 4.4377, "step": 195500 }, { "epoch": 0.37, "learning_rate": 0.0002632096225607604, "loss": 4.457, "step": 196000 }, { "epoch": 0.37, "learning_rate": 0.0002631157695570889, "loss": 4.4503, "step": 196500 }, { "epoch": 0.37, "learning_rate": 0.00026302191655341737, "loss": 4.4549, "step": 197000 }, { "epoch": 0.37, "learning_rate": 0.0002629280635497458, "loss": 4.4328, "step": 197500 }, { "epoch": 0.37, "learning_rate": 0.0002628342105460743, "loss": 4.4321, "step": 198000 }, { "epoch": 0.37, "learning_rate": 0.00026274035754240277, "loss": 4.4529, "step": 198500 }, { "epoch": 0.37, "learning_rate": 0.0002626465045387312, "loss": 4.4408, "step": 199000 }, { "epoch": 0.37, "learning_rate": 0.0002625526515350597, "loss": 4.4626, "step": 199500 }, { "epoch": 0.38, "learning_rate": 0.0002624587985313882, "loss": 4.4443, "step": 200000 }, { "epoch": 0.38, "learning_rate": 0.0002623649455277166, "loss": 4.4406, "step": 200500 }, { "epoch": 0.38, "learning_rate": 0.0002622710925240451, "loss": 4.4512, "step": 201000 }, { "epoch": 0.38, "learning_rate": 0.0002621772395203736, "loss": 4.452, "step": 201500 }, { "epoch": 0.38, "learning_rate": 0.00026208338651670203, "loss": 4.4217, "step": 202000 }, { "epoch": 0.38, "learning_rate": 0.00026198953351303054, "loss": 4.4309, "step": 202500 }, { "epoch": 0.38, "learning_rate": 0.000261895680509359, "loss": 4.4316, "step": 203000 }, { "epoch": 0.38, "learning_rate": 0.0002618018275056875, "loss": 4.4368, "step": 203500 }, { "epoch": 0.38, "learning_rate": 0.00026170797450201594, "loss": 4.444, "step": 204000 }, { "epoch": 0.38, "learning_rate": 0.00026161412149834445, "loss": 4.4473, "step": 204500 }, { "epoch": 0.38, "learning_rate": 0.0002615202684946729, "loss": 4.4358, "step": 205000 }, { "epoch": 0.39, "learning_rate": 0.00026142641549100135, "loss": 4.4415, "step": 205500 }, { "epoch": 0.39, "learning_rate": 0.00026133256248732985, "loss": 4.449, "step": 206000 }, { "epoch": 0.39, "learning_rate": 0.0002612387094836583, "loss": 4.4278, "step": 206500 }, { "epoch": 0.39, "learning_rate": 0.00026114485647998675, "loss": 4.4475, "step": 207000 }, { "epoch": 0.39, "learning_rate": 0.00026105100347631526, "loss": 4.4435, "step": 207500 }, { "epoch": 0.39, "learning_rate": 0.0002609571504726437, "loss": 4.4337, "step": 208000 }, { "epoch": 0.39, "learning_rate": 0.00026086329746897216, "loss": 4.4399, "step": 208500 }, { "epoch": 0.39, "learning_rate": 0.00026076944446530066, "loss": 4.4535, "step": 209000 }, { "epoch": 0.39, "learning_rate": 0.0002606755914616291, "loss": 4.4317, "step": 209500 }, { "epoch": 0.39, "learning_rate": 0.00026058173845795756, "loss": 4.4487, "step": 210000 }, { "epoch": 0.4, "learning_rate": 0.00026048788545428607, "loss": 4.4395, "step": 210500 }, { "epoch": 0.4, "learning_rate": 0.0002603940324506145, "loss": 4.4324, "step": 211000 }, { "epoch": 0.4, "learning_rate": 0.00026030017944694297, "loss": 4.4298, "step": 211500 }, { "epoch": 0.4, "learning_rate": 0.00026020632644327147, "loss": 4.4465, "step": 212000 }, { "epoch": 0.4, "learning_rate": 0.0002601124734395999, "loss": 4.4077, "step": 212500 }, { "epoch": 0.4, "learning_rate": 0.0002600186204359284, "loss": 4.4179, "step": 213000 }, { "epoch": 0.4, "learning_rate": 0.0002599247674322569, "loss": 4.4179, "step": 213500 }, { "epoch": 0.4, "learning_rate": 0.0002598309144285854, "loss": 4.4445, "step": 214000 }, { "epoch": 0.4, "learning_rate": 0.00025973706142491383, "loss": 4.4384, "step": 214500 }, { "epoch": 0.4, "learning_rate": 0.0002596432084212423, "loss": 4.414, "step": 215000 }, { "epoch": 0.4, "learning_rate": 0.0002595493554175708, "loss": 4.4242, "step": 215500 }, { "epoch": 0.41, "learning_rate": 0.00025945550241389924, "loss": 4.4212, "step": 216000 }, { "epoch": 0.41, "learning_rate": 0.0002593616494102277, "loss": 4.4517, "step": 216500 }, { "epoch": 0.41, "learning_rate": 0.0002592677964065562, "loss": 4.4202, "step": 217000 }, { "epoch": 0.41, "learning_rate": 0.00025917394340288464, "loss": 4.419, "step": 217500 }, { "epoch": 0.41, "learning_rate": 0.0002590800903992131, "loss": 4.4212, "step": 218000 }, { "epoch": 0.41, "learning_rate": 0.0002589862373955416, "loss": 4.4205, "step": 218500 }, { "epoch": 0.41, "learning_rate": 0.00025889238439187005, "loss": 4.4269, "step": 219000 }, { "epoch": 0.41, "learning_rate": 0.0002587985313881985, "loss": 4.4234, "step": 219500 }, { "epoch": 0.41, "learning_rate": 0.000258704678384527, "loss": 4.4394, "step": 220000 }, { "epoch": 0.41, "learning_rate": 0.00025861082538085545, "loss": 4.448, "step": 220500 }, { "epoch": 0.41, "learning_rate": 0.0002585169723771839, "loss": 4.4311, "step": 221000 }, { "epoch": 0.42, "learning_rate": 0.0002584231193735124, "loss": 4.4127, "step": 221500 }, { "epoch": 0.42, "learning_rate": 0.00025832926636984086, "loss": 4.4226, "step": 222000 }, { "epoch": 0.42, "learning_rate": 0.00025823541336616936, "loss": 4.4315, "step": 222500 }, { "epoch": 0.42, "learning_rate": 0.0002581415603624978, "loss": 4.4174, "step": 223000 }, { "epoch": 0.42, "learning_rate": 0.0002580477073588263, "loss": 4.408, "step": 223500 }, { "epoch": 0.42, "learning_rate": 0.00025795385435515477, "loss": 4.4174, "step": 224000 }, { "epoch": 0.42, "learning_rate": 0.0002578600013514832, "loss": 4.4194, "step": 224500 }, { "epoch": 0.42, "learning_rate": 0.0002577661483478117, "loss": 4.4232, "step": 225000 }, { "epoch": 0.42, "learning_rate": 0.00025767229534414017, "loss": 4.4008, "step": 225500 }, { "epoch": 0.42, "learning_rate": 0.0002575784423404686, "loss": 4.4126, "step": 226000 }, { "epoch": 0.43, "learning_rate": 0.0002574845893367971, "loss": 4.419, "step": 226500 }, { "epoch": 0.43, "learning_rate": 0.0002573907363331256, "loss": 4.4076, "step": 227000 }, { "epoch": 0.43, "learning_rate": 0.000257296883329454, "loss": 4.4135, "step": 227500 }, { "epoch": 0.43, "learning_rate": 0.00025720303032578253, "loss": 4.4165, "step": 228000 }, { "epoch": 0.43, "learning_rate": 0.000257109177322111, "loss": 4.3921, "step": 228500 }, { "epoch": 0.43, "learning_rate": 0.00025701532431843943, "loss": 4.4044, "step": 229000 }, { "epoch": 0.43, "learning_rate": 0.00025692147131476794, "loss": 4.4253, "step": 229500 }, { "epoch": 0.43, "learning_rate": 0.0002568276183110964, "loss": 4.3979, "step": 230000 }, { "epoch": 0.43, "learning_rate": 0.0002567337653074249, "loss": 4.4103, "step": 230500 }, { "epoch": 0.43, "learning_rate": 0.00025663991230375334, "loss": 4.4104, "step": 231000 }, { "epoch": 0.43, "learning_rate": 0.0002565460593000818, "loss": 4.4139, "step": 231500 }, { "epoch": 0.44, "learning_rate": 0.0002564522062964103, "loss": 4.4317, "step": 232000 }, { "epoch": 0.44, "learning_rate": 0.00025635835329273875, "loss": 4.4092, "step": 232500 }, { "epoch": 0.44, "learning_rate": 0.00025626450028906725, "loss": 4.4138, "step": 233000 }, { "epoch": 0.44, "learning_rate": 0.0002561706472853957, "loss": 4.4008, "step": 233500 }, { "epoch": 0.44, "learning_rate": 0.0002560767942817242, "loss": 4.4074, "step": 234000 }, { "epoch": 0.44, "learning_rate": 0.00025598294127805266, "loss": 4.4017, "step": 234500 }, { "epoch": 0.44, "learning_rate": 0.0002558890882743811, "loss": 4.3993, "step": 235000 }, { "epoch": 0.44, "learning_rate": 0.0002557952352707096, "loss": 4.3899, "step": 235500 }, { "epoch": 0.44, "learning_rate": 0.00025570138226703806, "loss": 4.411, "step": 236000 }, { "epoch": 0.44, "learning_rate": 0.0002556075292633665, "loss": 4.4277, "step": 236500 }, { "epoch": 0.44, "learning_rate": 0.00025551367625969496, "loss": 4.3965, "step": 237000 }, { "epoch": 0.45, "learning_rate": 0.00025541982325602347, "loss": 4.4047, "step": 237500 }, { "epoch": 0.45, "learning_rate": 0.0002553259702523519, "loss": 4.4081, "step": 238000 }, { "epoch": 0.45, "learning_rate": 0.00025523211724868037, "loss": 4.4082, "step": 238500 }, { "epoch": 0.45, "learning_rate": 0.00025513826424500887, "loss": 4.397, "step": 239000 }, { "epoch": 0.45, "learning_rate": 0.0002550444112413373, "loss": 4.3929, "step": 239500 }, { "epoch": 0.45, "learning_rate": 0.0002549505582376658, "loss": 4.4188, "step": 240000 }, { "epoch": 0.45, "learning_rate": 0.0002548567052339943, "loss": 4.3988, "step": 240500 }, { "epoch": 0.45, "learning_rate": 0.0002547628522303228, "loss": 4.4008, "step": 241000 }, { "epoch": 0.45, "learning_rate": 0.00025466899922665123, "loss": 4.4084, "step": 241500 }, { "epoch": 0.45, "learning_rate": 0.0002545751462229797, "loss": 4.4074, "step": 242000 }, { "epoch": 0.46, "learning_rate": 0.0002544812932193082, "loss": 4.4194, "step": 242500 }, { "epoch": 0.46, "learning_rate": 0.00025438744021563664, "loss": 4.4067, "step": 243000 }, { "epoch": 0.46, "learning_rate": 0.00025429358721196514, "loss": 4.4004, "step": 243500 }, { "epoch": 0.46, "learning_rate": 0.0002541997342082936, "loss": 4.4099, "step": 244000 }, { "epoch": 0.46, "learning_rate": 0.00025410588120462204, "loss": 4.4007, "step": 244500 }, { "epoch": 0.46, "learning_rate": 0.00025401202820095055, "loss": 4.3855, "step": 245000 }, { "epoch": 0.46, "learning_rate": 0.000253918175197279, "loss": 4.3895, "step": 245500 }, { "epoch": 0.46, "learning_rate": 0.00025382432219360745, "loss": 4.4021, "step": 246000 }, { "epoch": 0.46, "learning_rate": 0.00025373046918993595, "loss": 4.4017, "step": 246500 }, { "epoch": 0.46, "learning_rate": 0.0002536366161862644, "loss": 4.3945, "step": 247000 }, { "epoch": 0.46, "learning_rate": 0.00025354276318259285, "loss": 4.4167, "step": 247500 }, { "epoch": 0.47, "learning_rate": 0.00025344891017892136, "loss": 4.3858, "step": 248000 }, { "epoch": 0.47, "learning_rate": 0.0002533550571752498, "loss": 4.385, "step": 248500 }, { "epoch": 0.47, "learning_rate": 0.00025326120417157826, "loss": 4.3828, "step": 249000 }, { "epoch": 0.47, "learning_rate": 0.00025316735116790676, "loss": 4.3942, "step": 249500 }, { "epoch": 0.47, "learning_rate": 0.0002530734981642352, "loss": 4.4001, "step": 250000 }, { "epoch": 0.47, "learning_rate": 0.0002529796451605637, "loss": 4.3929, "step": 250500 }, { "epoch": 0.47, "learning_rate": 0.00025288579215689217, "loss": 4.4087, "step": 251000 }, { "epoch": 0.47, "learning_rate": 0.00025279193915322067, "loss": 4.389, "step": 251500 }, { "epoch": 0.47, "learning_rate": 0.0002526980861495491, "loss": 4.3932, "step": 252000 }, { "epoch": 0.47, "learning_rate": 0.00025260423314587757, "loss": 4.403, "step": 252500 }, { "epoch": 0.47, "learning_rate": 0.0002525103801422061, "loss": 4.3735, "step": 253000 }, { "epoch": 0.48, "learning_rate": 0.00025241652713853453, "loss": 4.3942, "step": 253500 }, { "epoch": 0.48, "learning_rate": 0.000252322674134863, "loss": 4.3777, "step": 254000 }, { "epoch": 0.48, "learning_rate": 0.0002522288211311915, "loss": 4.3981, "step": 254500 }, { "epoch": 0.48, "learning_rate": 0.00025213496812751993, "loss": 4.3964, "step": 255000 }, { "epoch": 0.48, "learning_rate": 0.0002520411151238484, "loss": 4.3924, "step": 255500 }, { "epoch": 0.48, "learning_rate": 0.0002519472621201769, "loss": 4.4021, "step": 256000 }, { "epoch": 0.48, "learning_rate": 0.00025185340911650534, "loss": 4.3905, "step": 256500 }, { "epoch": 0.48, "learning_rate": 0.0002517595561128338, "loss": 4.3912, "step": 257000 }, { "epoch": 0.48, "learning_rate": 0.0002516657031091623, "loss": 4.3911, "step": 257500 }, { "epoch": 0.48, "learning_rate": 0.00025157185010549074, "loss": 4.384, "step": 258000 }, { "epoch": 0.49, "learning_rate": 0.0002514779971018192, "loss": 4.3818, "step": 258500 }, { "epoch": 0.49, "learning_rate": 0.0002513841440981477, "loss": 4.3821, "step": 259000 }, { "epoch": 0.49, "learning_rate": 0.00025129029109447615, "loss": 4.3911, "step": 259500 }, { "epoch": 0.49, "learning_rate": 0.00025119643809080465, "loss": 4.3689, "step": 260000 }, { "epoch": 0.49, "learning_rate": 0.0002511025850871331, "loss": 4.3823, "step": 260500 }, { "epoch": 0.49, "learning_rate": 0.0002510087320834616, "loss": 4.383, "step": 261000 }, { "epoch": 0.49, "learning_rate": 0.00025091487907979006, "loss": 4.3888, "step": 261500 }, { "epoch": 0.49, "learning_rate": 0.0002508210260761185, "loss": 4.3815, "step": 262000 }, { "epoch": 0.49, "learning_rate": 0.000250727173072447, "loss": 4.3687, "step": 262500 }, { "epoch": 0.49, "learning_rate": 0.00025063332006877546, "loss": 4.3823, "step": 263000 }, { "epoch": 0.49, "learning_rate": 0.0002505394670651039, "loss": 4.3825, "step": 263500 }, { "epoch": 0.5, "learning_rate": 0.0002504456140614324, "loss": 4.374, "step": 264000 }, { "epoch": 0.5, "learning_rate": 0.00025035176105776087, "loss": 4.3859, "step": 264500 }, { "epoch": 0.5, "learning_rate": 0.0002502579080540893, "loss": 4.4123, "step": 265000 }, { "epoch": 0.5, "learning_rate": 0.0002501640550504178, "loss": 4.3858, "step": 265500 }, { "epoch": 0.5, "learning_rate": 0.0002500702020467463, "loss": 4.3665, "step": 266000 }, { "epoch": 0.5, "learning_rate": 0.0002499763490430747, "loss": 4.3723, "step": 266500 }, { "epoch": 0.5, "learning_rate": 0.00024988249603940323, "loss": 4.3791, "step": 267000 }, { "epoch": 0.5, "learning_rate": 0.0002497886430357317, "loss": 4.3884, "step": 267500 }, { "epoch": 0.5, "learning_rate": 0.00024969479003206013, "loss": 4.3901, "step": 268000 }, { "epoch": 0.5, "learning_rate": 0.00024960093702838863, "loss": 4.381, "step": 268500 }, { "epoch": 0.5, "learning_rate": 0.0002495070840247171, "loss": 4.3554, "step": 269000 }, { "epoch": 0.51, "learning_rate": 0.0002494132310210456, "loss": 4.3889, "step": 269500 }, { "epoch": 0.51, "learning_rate": 0.00024931937801737404, "loss": 4.3601, "step": 270000 }, { "epoch": 0.51, "learning_rate": 0.00024922552501370254, "loss": 4.38, "step": 270500 }, { "epoch": 0.51, "learning_rate": 0.000249131672010031, "loss": 4.3733, "step": 271000 }, { "epoch": 0.51, "learning_rate": 0.0002490378190063595, "loss": 4.3805, "step": 271500 }, { "epoch": 0.51, "learning_rate": 0.00024894396600268795, "loss": 4.3747, "step": 272000 }, { "epoch": 0.51, "learning_rate": 0.0002488501129990164, "loss": 4.3724, "step": 272500 }, { "epoch": 0.51, "learning_rate": 0.00024875625999534485, "loss": 4.3476, "step": 273000 }, { "epoch": 0.51, "learning_rate": 0.00024866240699167335, "loss": 4.364, "step": 273500 }, { "epoch": 0.51, "learning_rate": 0.0002485685539880018, "loss": 4.385, "step": 274000 }, { "epoch": 0.52, "learning_rate": 0.00024847470098433025, "loss": 4.3838, "step": 274500 }, { "epoch": 0.52, "learning_rate": 0.00024838084798065876, "loss": 4.3826, "step": 275000 }, { "epoch": 0.52, "learning_rate": 0.0002482869949769872, "loss": 4.377, "step": 275500 }, { "epoch": 0.52, "learning_rate": 0.00024819314197331566, "loss": 4.3527, "step": 276000 }, { "epoch": 0.52, "learning_rate": 0.00024809928896964416, "loss": 4.3549, "step": 276500 }, { "epoch": 0.52, "learning_rate": 0.0002480054359659726, "loss": 4.3647, "step": 277000 }, { "epoch": 0.52, "learning_rate": 0.0002479115829623011, "loss": 4.363, "step": 277500 }, { "epoch": 0.52, "learning_rate": 0.00024781772995862957, "loss": 4.3738, "step": 278000 }, { "epoch": 0.52, "learning_rate": 0.000247723876954958, "loss": 4.3596, "step": 278500 }, { "epoch": 0.52, "learning_rate": 0.0002476300239512865, "loss": 4.3564, "step": 279000 }, { "epoch": 0.52, "learning_rate": 0.000247536170947615, "loss": 4.3638, "step": 279500 }, { "epoch": 0.53, "learning_rate": 0.0002474423179439435, "loss": 4.3797, "step": 280000 }, { "epoch": 0.53, "learning_rate": 0.00024734846494027193, "loss": 4.3552, "step": 280500 }, { "epoch": 0.53, "learning_rate": 0.00024725461193660043, "loss": 4.3644, "step": 281000 }, { "epoch": 0.53, "learning_rate": 0.0002471607589329289, "loss": 4.3884, "step": 281500 }, { "epoch": 0.53, "learning_rate": 0.00024706690592925733, "loss": 4.3544, "step": 282000 }, { "epoch": 0.53, "learning_rate": 0.00024697305292558584, "loss": 4.3692, "step": 282500 }, { "epoch": 0.53, "learning_rate": 0.0002468791999219143, "loss": 4.3538, "step": 283000 }, { "epoch": 0.53, "learning_rate": 0.00024678534691824274, "loss": 4.3398, "step": 283500 }, { "epoch": 0.53, "learning_rate": 0.00024669149391457124, "loss": 4.3694, "step": 284000 }, { "epoch": 0.53, "learning_rate": 0.0002465976409108997, "loss": 4.3593, "step": 284500 }, { "epoch": 0.53, "learning_rate": 0.00024650378790722814, "loss": 4.366, "step": 285000 }, { "epoch": 0.54, "learning_rate": 0.0002464099349035566, "loss": 4.3603, "step": 285500 }, { "epoch": 0.54, "learning_rate": 0.0002463160818998851, "loss": 4.3453, "step": 286000 }, { "epoch": 0.54, "learning_rate": 0.00024622222889621355, "loss": 4.3559, "step": 286500 }, { "epoch": 0.54, "learning_rate": 0.00024612837589254205, "loss": 4.3786, "step": 287000 }, { "epoch": 0.54, "learning_rate": 0.0002460345228888705, "loss": 4.3518, "step": 287500 }, { "epoch": 0.54, "learning_rate": 0.000245940669885199, "loss": 4.359, "step": 288000 }, { "epoch": 0.54, "learning_rate": 0.00024584681688152746, "loss": 4.3653, "step": 288500 }, { "epoch": 0.54, "learning_rate": 0.0002457529638778559, "loss": 4.3666, "step": 289000 }, { "epoch": 0.54, "learning_rate": 0.0002456591108741844, "loss": 4.3611, "step": 289500 }, { "epoch": 0.54, "learning_rate": 0.00024556525787051286, "loss": 4.3547, "step": 290000 }, { "epoch": 0.55, "learning_rate": 0.00024547140486684137, "loss": 4.3447, "step": 290500 }, { "epoch": 0.55, "learning_rate": 0.0002453775518631698, "loss": 4.3746, "step": 291000 }, { "epoch": 0.55, "learning_rate": 0.00024528369885949827, "loss": 4.3582, "step": 291500 }, { "epoch": 0.55, "learning_rate": 0.0002451898458558268, "loss": 4.3502, "step": 292000 }, { "epoch": 0.55, "learning_rate": 0.0002450959928521552, "loss": 4.3524, "step": 292500 }, { "epoch": 0.55, "learning_rate": 0.0002450021398484837, "loss": 4.3419, "step": 293000 }, { "epoch": 0.55, "learning_rate": 0.0002449082868448122, "loss": 4.364, "step": 293500 }, { "epoch": 0.55, "learning_rate": 0.00024481443384114063, "loss": 4.3592, "step": 294000 }, { "epoch": 0.55, "learning_rate": 0.0002447205808374691, "loss": 4.3825, "step": 294500 }, { "epoch": 0.55, "learning_rate": 0.0002446267278337976, "loss": 4.3526, "step": 295000 }, { "epoch": 0.55, "learning_rate": 0.00024453287483012603, "loss": 4.3585, "step": 295500 }, { "epoch": 0.56, "learning_rate": 0.0002444390218264545, "loss": 4.3622, "step": 296000 }, { "epoch": 0.56, "learning_rate": 0.000244345168822783, "loss": 4.3534, "step": 296500 }, { "epoch": 0.56, "learning_rate": 0.00024425131581911144, "loss": 4.353, "step": 297000 }, { "epoch": 0.56, "learning_rate": 0.00024415746281543994, "loss": 4.3559, "step": 297500 }, { "epoch": 0.56, "learning_rate": 0.00024406360981176837, "loss": 4.3668, "step": 298000 }, { "epoch": 0.56, "learning_rate": 0.00024396975680809687, "loss": 4.346, "step": 298500 }, { "epoch": 0.56, "learning_rate": 0.00024387590380442532, "loss": 4.367, "step": 299000 }, { "epoch": 0.56, "learning_rate": 0.0002437820508007538, "loss": 4.3727, "step": 299500 }, { "epoch": 0.56, "learning_rate": 0.00024368819779708228, "loss": 4.3396, "step": 300000 }, { "epoch": 0.56, "learning_rate": 0.00024359434479341075, "loss": 4.3482, "step": 300500 }, { "epoch": 0.56, "learning_rate": 0.0002435004917897392, "loss": 4.3492, "step": 301000 }, { "epoch": 0.57, "learning_rate": 0.0002434066387860677, "loss": 4.3645, "step": 301500 }, { "epoch": 0.57, "learning_rate": 0.00024331278578239616, "loss": 4.3599, "step": 302000 }, { "epoch": 0.57, "learning_rate": 0.0002432189327787246, "loss": 4.3476, "step": 302500 }, { "epoch": 0.57, "learning_rate": 0.00024312507977505311, "loss": 4.3586, "step": 303000 }, { "epoch": 0.57, "learning_rate": 0.00024303122677138156, "loss": 4.3416, "step": 303500 }, { "epoch": 0.57, "learning_rate": 0.00024293737376771004, "loss": 4.3826, "step": 304000 }, { "epoch": 0.57, "learning_rate": 0.00024284352076403852, "loss": 4.3317, "step": 304500 }, { "epoch": 0.57, "learning_rate": 0.000242749667760367, "loss": 4.3539, "step": 305000 }, { "epoch": 0.57, "learning_rate": 0.00024265581475669545, "loss": 4.344, "step": 305500 }, { "epoch": 0.57, "learning_rate": 0.00024256196175302392, "loss": 4.3666, "step": 306000 }, { "epoch": 0.58, "learning_rate": 0.0002424681087493524, "loss": 4.3372, "step": 306500 }, { "epoch": 0.58, "learning_rate": 0.00024237425574568085, "loss": 4.3413, "step": 307000 }, { "epoch": 0.58, "learning_rate": 0.00024228040274200936, "loss": 4.346, "step": 307500 }, { "epoch": 0.58, "learning_rate": 0.0002421865497383378, "loss": 4.3614, "step": 308000 }, { "epoch": 0.58, "learning_rate": 0.00024209269673466626, "loss": 4.3508, "step": 308500 }, { "epoch": 0.58, "learning_rate": 0.00024199884373099473, "loss": 4.3532, "step": 309000 }, { "epoch": 0.58, "learning_rate": 0.0002419049907273232, "loss": 4.3417, "step": 309500 }, { "epoch": 0.58, "learning_rate": 0.0002418111377236517, "loss": 4.3452, "step": 310000 }, { "epoch": 0.58, "learning_rate": 0.00024171728471998014, "loss": 4.3506, "step": 310500 }, { "epoch": 0.58, "learning_rate": 0.00024162343171630864, "loss": 4.3387, "step": 311000 }, { "epoch": 0.58, "learning_rate": 0.0002415295787126371, "loss": 4.35, "step": 311500 }, { "epoch": 0.59, "learning_rate": 0.00024143572570896555, "loss": 4.3511, "step": 312000 }, { "epoch": 0.59, "learning_rate": 0.00024134187270529405, "loss": 4.3422, "step": 312500 }, { "epoch": 0.59, "learning_rate": 0.0002412480197016225, "loss": 4.3391, "step": 313000 }, { "epoch": 0.59, "learning_rate": 0.00024115416669795098, "loss": 4.3499, "step": 313500 }, { "epoch": 0.59, "learning_rate": 0.00024106031369427945, "loss": 4.3613, "step": 314000 }, { "epoch": 0.59, "learning_rate": 0.00024096646069060793, "loss": 4.3589, "step": 314500 }, { "epoch": 0.59, "learning_rate": 0.00024087260768693638, "loss": 4.3397, "step": 315000 }, { "epoch": 0.59, "learning_rate": 0.0002407787546832649, "loss": 4.3506, "step": 315500 }, { "epoch": 0.59, "learning_rate": 0.00024068490167959334, "loss": 4.3429, "step": 316000 }, { "epoch": 0.59, "learning_rate": 0.0002405910486759218, "loss": 4.3441, "step": 316500 }, { "epoch": 0.6, "learning_rate": 0.0002404971956722503, "loss": 4.3449, "step": 317000 }, { "epoch": 0.6, "learning_rate": 0.00024040334266857874, "loss": 4.3546, "step": 317500 }, { "epoch": 0.6, "learning_rate": 0.00024030948966490722, "loss": 4.3526, "step": 318000 }, { "epoch": 0.6, "learning_rate": 0.0002402156366612357, "loss": 4.3328, "step": 318500 }, { "epoch": 0.6, "learning_rate": 0.00024012178365756415, "loss": 4.3409, "step": 319000 }, { "epoch": 0.6, "learning_rate": 0.00024002793065389262, "loss": 4.3399, "step": 319500 }, { "epoch": 0.6, "learning_rate": 0.0002399340776502211, "loss": 4.3447, "step": 320000 }, { "epoch": 0.6, "learning_rate": 0.00023984022464654958, "loss": 4.3372, "step": 320500 }, { "epoch": 0.6, "learning_rate": 0.00023974637164287803, "loss": 4.3523, "step": 321000 }, { "epoch": 0.6, "learning_rate": 0.00023965251863920648, "loss": 4.3183, "step": 321500 }, { "epoch": 0.6, "learning_rate": 0.00023955866563553498, "loss": 4.3426, "step": 322000 }, { "epoch": 0.61, "learning_rate": 0.00023946481263186344, "loss": 4.3426, "step": 322500 }, { "epoch": 0.61, "learning_rate": 0.0002393709596281919, "loss": 4.3506, "step": 323000 }, { "epoch": 0.61, "learning_rate": 0.0002392771066245204, "loss": 4.3224, "step": 323500 }, { "epoch": 0.61, "learning_rate": 0.00023918325362084887, "loss": 4.3441, "step": 324000 }, { "epoch": 0.61, "learning_rate": 0.00023908940061717732, "loss": 4.3336, "step": 324500 }, { "epoch": 0.61, "learning_rate": 0.00023899554761350582, "loss": 4.3337, "step": 325000 }, { "epoch": 0.61, "learning_rate": 0.00023890169460983427, "loss": 4.3369, "step": 325500 }, { "epoch": 0.61, "learning_rate": 0.00023880784160616272, "loss": 4.3413, "step": 326000 }, { "epoch": 0.61, "learning_rate": 0.00023871398860249123, "loss": 4.328, "step": 326500 }, { "epoch": 0.61, "learning_rate": 0.00023862013559881968, "loss": 4.3425, "step": 327000 }, { "epoch": 0.61, "learning_rate": 0.00023852628259514816, "loss": 4.3429, "step": 327500 }, { "epoch": 0.62, "learning_rate": 0.00023843242959147663, "loss": 4.3412, "step": 328000 }, { "epoch": 0.62, "learning_rate": 0.0002383385765878051, "loss": 4.3334, "step": 328500 }, { "epoch": 0.62, "learning_rate": 0.00023824472358413356, "loss": 4.3338, "step": 329000 }, { "epoch": 0.62, "learning_rate": 0.00023815087058046204, "loss": 4.3422, "step": 329500 }, { "epoch": 0.62, "learning_rate": 0.00023805701757679052, "loss": 4.3341, "step": 330000 }, { "epoch": 0.62, "learning_rate": 0.00023796316457311897, "loss": 4.339, "step": 330500 }, { "epoch": 0.62, "learning_rate": 0.00023786931156944747, "loss": 4.3386, "step": 331000 }, { "epoch": 0.62, "learning_rate": 0.00023777545856577592, "loss": 4.333, "step": 331500 }, { "epoch": 0.62, "learning_rate": 0.00023768160556210437, "loss": 4.3387, "step": 332000 }, { "epoch": 0.62, "learning_rate": 0.00023758775255843288, "loss": 4.3313, "step": 332500 }, { "epoch": 0.63, "learning_rate": 0.00023749389955476133, "loss": 4.3317, "step": 333000 }, { "epoch": 0.63, "learning_rate": 0.0002374000465510898, "loss": 4.3209, "step": 333500 }, { "epoch": 0.63, "learning_rate": 0.00023730619354741825, "loss": 4.331, "step": 334000 }, { "epoch": 0.63, "learning_rate": 0.00023721234054374676, "loss": 4.3382, "step": 334500 }, { "epoch": 0.63, "learning_rate": 0.0002371184875400752, "loss": 4.3364, "step": 335000 }, { "epoch": 0.63, "learning_rate": 0.00023702463453640366, "loss": 4.3246, "step": 335500 }, { "epoch": 0.63, "learning_rate": 0.00023693078153273216, "loss": 4.3123, "step": 336000 }, { "epoch": 0.63, "learning_rate": 0.0002368369285290606, "loss": 4.3352, "step": 336500 }, { "epoch": 0.63, "learning_rate": 0.0002367430755253891, "loss": 4.3253, "step": 337000 }, { "epoch": 0.63, "learning_rate": 0.00023664922252171757, "loss": 4.3184, "step": 337500 }, { "epoch": 0.63, "learning_rate": 0.00023655536951804605, "loss": 4.3325, "step": 338000 }, { "epoch": 0.64, "learning_rate": 0.0002364615165143745, "loss": 4.3392, "step": 338500 }, { "epoch": 0.64, "learning_rate": 0.000236367663510703, "loss": 4.3291, "step": 339000 }, { "epoch": 0.64, "learning_rate": 0.00023627381050703145, "loss": 4.3066, "step": 339500 }, { "epoch": 0.64, "learning_rate": 0.0002361799575033599, "loss": 4.3231, "step": 340000 }, { "epoch": 0.64, "learning_rate": 0.0002360861044996884, "loss": 4.3297, "step": 340500 }, { "epoch": 0.64, "learning_rate": 0.00023599225149601686, "loss": 4.3105, "step": 341000 }, { "epoch": 0.64, "learning_rate": 0.00023589839849234533, "loss": 4.3169, "step": 341500 }, { "epoch": 0.64, "learning_rate": 0.0002358045454886738, "loss": 4.3291, "step": 342000 }, { "epoch": 0.64, "learning_rate": 0.00023571069248500226, "loss": 4.3137, "step": 342500 }, { "epoch": 0.64, "learning_rate": 0.00023561683948133074, "loss": 4.3295, "step": 343000 }, { "epoch": 0.64, "learning_rate": 0.00023552298647765922, "loss": 4.3266, "step": 343500 }, { "epoch": 0.65, "learning_rate": 0.0002354291334739877, "loss": 4.3203, "step": 344000 }, { "epoch": 0.65, "learning_rate": 0.00023533528047031614, "loss": 4.3302, "step": 344500 }, { "epoch": 0.65, "learning_rate": 0.0002352414274666446, "loss": 4.3419, "step": 345000 }, { "epoch": 0.65, "learning_rate": 0.0002351475744629731, "loss": 4.3287, "step": 345500 }, { "epoch": 0.65, "learning_rate": 0.00023505372145930155, "loss": 4.3181, "step": 346000 }, { "epoch": 0.65, "learning_rate": 0.00023495986845563003, "loss": 4.3305, "step": 346500 }, { "epoch": 0.65, "learning_rate": 0.0002348660154519585, "loss": 4.3143, "step": 347000 }, { "epoch": 0.65, "learning_rate": 0.00023477216244828698, "loss": 4.3286, "step": 347500 }, { "epoch": 0.65, "learning_rate": 0.00023467830944461543, "loss": 4.3343, "step": 348000 }, { "epoch": 0.65, "learning_rate": 0.00023458445644094394, "loss": 4.3125, "step": 348500 }, { "epoch": 0.66, "learning_rate": 0.00023449060343727239, "loss": 4.3239, "step": 349000 }, { "epoch": 0.66, "learning_rate": 0.00023439675043360084, "loss": 4.3225, "step": 349500 }, { "epoch": 0.66, "learning_rate": 0.00023430289742992934, "loss": 4.3198, "step": 350000 }, { "epoch": 0.66, "learning_rate": 0.0002342090444262578, "loss": 4.3288, "step": 350500 }, { "epoch": 0.66, "learning_rate": 0.00023411519142258627, "loss": 4.327, "step": 351000 }, { "epoch": 0.66, "learning_rate": 0.00023402133841891475, "loss": 4.322, "step": 351500 }, { "epoch": 0.66, "learning_rate": 0.00023392748541524322, "loss": 4.3258, "step": 352000 }, { "epoch": 0.66, "learning_rate": 0.00023383363241157167, "loss": 4.33, "step": 352500 }, { "epoch": 0.66, "learning_rate": 0.00023373977940790015, "loss": 4.3214, "step": 353000 }, { "epoch": 0.66, "learning_rate": 0.00023364592640422863, "loss": 4.3336, "step": 353500 }, { "epoch": 0.66, "learning_rate": 0.00023355207340055708, "loss": 4.3066, "step": 354000 }, { "epoch": 0.67, "learning_rate": 0.00023345822039688558, "loss": 4.3172, "step": 354500 }, { "epoch": 0.67, "learning_rate": 0.00023336436739321403, "loss": 4.3198, "step": 355000 }, { "epoch": 0.67, "learning_rate": 0.00023327051438954248, "loss": 4.3232, "step": 355500 }, { "epoch": 0.67, "learning_rate": 0.000233176661385871, "loss": 4.3227, "step": 356000 }, { "epoch": 0.67, "learning_rate": 0.00023308280838219944, "loss": 4.3282, "step": 356500 }, { "epoch": 0.67, "learning_rate": 0.00023298895537852792, "loss": 4.3354, "step": 357000 }, { "epoch": 0.67, "learning_rate": 0.00023289510237485637, "loss": 4.3167, "step": 357500 }, { "epoch": 0.67, "learning_rate": 0.00023280124937118487, "loss": 4.3128, "step": 358000 }, { "epoch": 0.67, "learning_rate": 0.00023270739636751332, "loss": 4.3079, "step": 358500 }, { "epoch": 0.67, "learning_rate": 0.00023261354336384177, "loss": 4.3187, "step": 359000 }, { "epoch": 0.67, "learning_rate": 0.00023251969036017028, "loss": 4.3335, "step": 359500 }, { "epoch": 0.68, "learning_rate": 0.00023242583735649873, "loss": 4.3001, "step": 360000 }, { "epoch": 0.68, "learning_rate": 0.0002323319843528272, "loss": 4.311, "step": 360500 }, { "epoch": 0.68, "learning_rate": 0.00023223813134915568, "loss": 4.3033, "step": 361000 }, { "epoch": 0.68, "learning_rate": 0.00023214427834548416, "loss": 4.3062, "step": 361500 }, { "epoch": 0.68, "learning_rate": 0.0002320504253418126, "loss": 4.337, "step": 362000 }, { "epoch": 0.68, "learning_rate": 0.00023195657233814111, "loss": 4.3261, "step": 362500 }, { "epoch": 0.68, "learning_rate": 0.00023186271933446956, "loss": 4.3073, "step": 363000 }, { "epoch": 0.68, "learning_rate": 0.00023176886633079801, "loss": 4.3152, "step": 363500 }, { "epoch": 0.68, "learning_rate": 0.00023167501332712652, "loss": 4.3326, "step": 364000 }, { "epoch": 0.68, "learning_rate": 0.00023158116032345497, "loss": 4.3189, "step": 364500 }, { "epoch": 0.69, "learning_rate": 0.00023148730731978342, "loss": 4.3118, "step": 365000 }, { "epoch": 0.69, "learning_rate": 0.00023139345431611192, "loss": 4.3262, "step": 365500 }, { "epoch": 0.69, "learning_rate": 0.00023129960131244037, "loss": 4.3084, "step": 366000 }, { "epoch": 0.69, "learning_rate": 0.00023120574830876885, "loss": 4.3094, "step": 366500 }, { "epoch": 0.69, "learning_rate": 0.00023111189530509733, "loss": 4.3039, "step": 367000 }, { "epoch": 0.69, "learning_rate": 0.0002310180423014258, "loss": 4.3015, "step": 367500 }, { "epoch": 0.69, "learning_rate": 0.00023092418929775426, "loss": 4.2926, "step": 368000 }, { "epoch": 0.69, "learning_rate": 0.00023083033629408276, "loss": 4.3182, "step": 368500 }, { "epoch": 0.69, "learning_rate": 0.0002307364832904112, "loss": 4.315, "step": 369000 }, { "epoch": 0.69, "learning_rate": 0.00023064263028673966, "loss": 4.3044, "step": 369500 }, { "epoch": 0.69, "learning_rate": 0.00023054877728306814, "loss": 4.289, "step": 370000 }, { "epoch": 0.7, "learning_rate": 0.00023045492427939662, "loss": 4.323, "step": 370500 }, { "epoch": 0.7, "learning_rate": 0.0002303610712757251, "loss": 4.3132, "step": 371000 }, { "epoch": 0.7, "learning_rate": 0.00023026721827205354, "loss": 4.3132, "step": 371500 }, { "epoch": 0.7, "learning_rate": 0.00023017336526838205, "loss": 4.306, "step": 372000 }, { "epoch": 0.7, "learning_rate": 0.0002300795122647105, "loss": 4.2933, "step": 372500 }, { "epoch": 0.7, "learning_rate": 0.00022998565926103895, "loss": 4.2984, "step": 373000 }, { "epoch": 0.7, "learning_rate": 0.00022989180625736745, "loss": 4.2908, "step": 373500 }, { "epoch": 0.7, "learning_rate": 0.0002297979532536959, "loss": 4.3171, "step": 374000 }, { "epoch": 0.7, "learning_rate": 0.00022970410025002438, "loss": 4.3105, "step": 374500 }, { "epoch": 0.7, "learning_rate": 0.00022961024724635286, "loss": 4.3084, "step": 375000 }, { "epoch": 0.7, "learning_rate": 0.0002295163942426813, "loss": 4.3002, "step": 375500 }, { "epoch": 0.71, "learning_rate": 0.0002294225412390098, "loss": 4.2929, "step": 376000 }, { "epoch": 0.71, "learning_rate": 0.00022932868823533826, "loss": 4.3018, "step": 376500 }, { "epoch": 0.71, "learning_rate": 0.00022923483523166674, "loss": 4.3052, "step": 377000 }, { "epoch": 0.71, "learning_rate": 0.0002291409822279952, "loss": 4.314, "step": 377500 }, { "epoch": 0.71, "learning_rate": 0.0002290471292243237, "loss": 4.3259, "step": 378000 }, { "epoch": 0.71, "learning_rate": 0.00022895327622065215, "loss": 4.3064, "step": 378500 }, { "epoch": 0.71, "learning_rate": 0.0002288594232169806, "loss": 4.3098, "step": 379000 }, { "epoch": 0.71, "learning_rate": 0.0002287655702133091, "loss": 4.3057, "step": 379500 }, { "epoch": 0.71, "learning_rate": 0.00022867171720963755, "loss": 4.3241, "step": 380000 }, { "epoch": 0.71, "learning_rate": 0.00022857786420596603, "loss": 4.3116, "step": 380500 }, { "epoch": 0.72, "learning_rate": 0.0002284840112022945, "loss": 4.3033, "step": 381000 }, { "epoch": 0.72, "learning_rate": 0.00022839015819862298, "loss": 4.3052, "step": 381500 }, { "epoch": 0.72, "learning_rate": 0.00022829630519495143, "loss": 4.2986, "step": 382000 }, { "epoch": 0.72, "learning_rate": 0.00022820245219127989, "loss": 4.3329, "step": 382500 }, { "epoch": 0.72, "learning_rate": 0.0002281085991876084, "loss": 4.3184, "step": 383000 }, { "epoch": 0.72, "learning_rate": 0.00022801474618393684, "loss": 4.3221, "step": 383500 }, { "epoch": 0.72, "learning_rate": 0.00022792089318026532, "loss": 4.2981, "step": 384000 }, { "epoch": 0.72, "learning_rate": 0.0002278270401765938, "loss": 4.3068, "step": 384500 }, { "epoch": 0.72, "learning_rate": 0.00022773318717292227, "loss": 4.2975, "step": 385000 }, { "epoch": 0.72, "learning_rate": 0.00022763933416925072, "loss": 4.3087, "step": 385500 }, { "epoch": 0.72, "learning_rate": 0.0002275454811655792, "loss": 4.3093, "step": 386000 }, { "epoch": 0.73, "learning_rate": 0.00022745162816190768, "loss": 4.3002, "step": 386500 }, { "epoch": 0.73, "learning_rate": 0.00022735777515823613, "loss": 4.2916, "step": 387000 }, { "epoch": 0.73, "learning_rate": 0.00022726392215456463, "loss": 4.3075, "step": 387500 }, { "epoch": 0.73, "learning_rate": 0.00022717006915089308, "loss": 4.3241, "step": 388000 }, { "epoch": 0.73, "learning_rate": 0.00022707621614722153, "loss": 4.3041, "step": 388500 }, { "epoch": 0.73, "learning_rate": 0.00022698236314355004, "loss": 4.2962, "step": 389000 }, { "epoch": 0.73, "learning_rate": 0.0002268885101398785, "loss": 4.2888, "step": 389500 }, { "epoch": 0.73, "learning_rate": 0.00022679465713620696, "loss": 4.2749, "step": 390000 }, { "epoch": 0.73, "learning_rate": 0.00022670080413253544, "loss": 4.2951, "step": 390500 }, { "epoch": 0.73, "learning_rate": 0.00022660695112886392, "loss": 4.3075, "step": 391000 }, { "epoch": 0.73, "learning_rate": 0.00022651309812519237, "loss": 4.3103, "step": 391500 }, { "epoch": 0.74, "learning_rate": 0.00022641924512152087, "loss": 4.3059, "step": 392000 }, { "epoch": 0.74, "learning_rate": 0.00022632539211784932, "loss": 4.2875, "step": 392500 }, { "epoch": 0.74, "learning_rate": 0.00022623153911417778, "loss": 4.3118, "step": 393000 }, { "epoch": 0.74, "learning_rate": 0.00022613768611050625, "loss": 4.3154, "step": 393500 }, { "epoch": 0.74, "learning_rate": 0.00022604383310683473, "loss": 4.3082, "step": 394000 }, { "epoch": 0.74, "learning_rate": 0.0002259499801031632, "loss": 4.318, "step": 394500 }, { "epoch": 0.74, "learning_rate": 0.00022585612709949166, "loss": 4.3045, "step": 395000 }, { "epoch": 0.74, "learning_rate": 0.00022576227409582016, "loss": 4.3018, "step": 395500 }, { "epoch": 0.74, "learning_rate": 0.0002256684210921486, "loss": 4.3095, "step": 396000 }, { "epoch": 0.74, "learning_rate": 0.00022557456808847706, "loss": 4.3, "step": 396500 }, { "epoch": 0.75, "learning_rate": 0.00022548071508480557, "loss": 4.3044, "step": 397000 }, { "epoch": 0.75, "learning_rate": 0.00022538686208113402, "loss": 4.2872, "step": 397500 }, { "epoch": 0.75, "learning_rate": 0.0002252930090774625, "loss": 4.3034, "step": 398000 }, { "epoch": 0.75, "learning_rate": 0.00022519915607379097, "loss": 4.2817, "step": 398500 }, { "epoch": 0.75, "learning_rate": 0.00022510530307011942, "loss": 4.2997, "step": 399000 }, { "epoch": 0.75, "learning_rate": 0.0002250114500664479, "loss": 4.3049, "step": 399500 }, { "epoch": 0.75, "learning_rate": 0.00022491759706277638, "loss": 4.2894, "step": 400000 }, { "epoch": 0.75, "learning_rate": 0.00022482374405910486, "loss": 4.2934, "step": 400500 }, { "epoch": 0.75, "learning_rate": 0.0002247298910554333, "loss": 4.2779, "step": 401000 }, { "epoch": 0.75, "learning_rate": 0.0002246360380517618, "loss": 4.2995, "step": 401500 }, { "epoch": 0.75, "learning_rate": 0.00022454218504809026, "loss": 4.2822, "step": 402000 }, { "epoch": 0.76, "learning_rate": 0.0002244483320444187, "loss": 4.3028, "step": 402500 }, { "epoch": 0.76, "learning_rate": 0.00022435447904074722, "loss": 4.2893, "step": 403000 }, { "epoch": 0.76, "learning_rate": 0.00022426062603707567, "loss": 4.2889, "step": 403500 }, { "epoch": 0.76, "learning_rate": 0.00022416677303340414, "loss": 4.2974, "step": 404000 }, { "epoch": 0.76, "learning_rate": 0.00022407292002973262, "loss": 4.2914, "step": 404500 }, { "epoch": 0.76, "learning_rate": 0.0002239790670260611, "loss": 4.283, "step": 405000 }, { "epoch": 0.76, "learning_rate": 0.00022388521402238955, "loss": 4.3096, "step": 405500 }, { "epoch": 0.76, "learning_rate": 0.000223791361018718, "loss": 4.3023, "step": 406000 }, { "epoch": 0.76, "learning_rate": 0.0002236975080150465, "loss": 4.3092, "step": 406500 }, { "epoch": 0.76, "learning_rate": 0.00022360365501137495, "loss": 4.2833, "step": 407000 }, { "epoch": 0.76, "learning_rate": 0.00022350980200770343, "loss": 4.2847, "step": 407500 }, { "epoch": 0.77, "learning_rate": 0.0002234159490040319, "loss": 4.3017, "step": 408000 }, { "epoch": 0.77, "learning_rate": 0.00022332209600036039, "loss": 4.2833, "step": 408500 }, { "epoch": 0.77, "learning_rate": 0.00022322824299668884, "loss": 4.2866, "step": 409000 }, { "epoch": 0.77, "learning_rate": 0.0002231343899930173, "loss": 4.2772, "step": 409500 }, { "epoch": 0.77, "learning_rate": 0.0002230405369893458, "loss": 4.2986, "step": 410000 }, { "epoch": 0.77, "learning_rate": 0.00022294668398567424, "loss": 4.2921, "step": 410500 }, { "epoch": 0.77, "learning_rate": 0.00022285283098200275, "loss": 4.2811, "step": 411000 }, { "epoch": 0.77, "learning_rate": 0.0002227589779783312, "loss": 4.2975, "step": 411500 }, { "epoch": 0.77, "learning_rate": 0.00022266512497465965, "loss": 4.2839, "step": 412000 }, { "epoch": 0.77, "learning_rate": 0.00022257127197098815, "loss": 4.2911, "step": 412500 }, { "epoch": 0.78, "learning_rate": 0.0002224774189673166, "loss": 4.2886, "step": 413000 }, { "epoch": 0.78, "learning_rate": 0.00022238356596364508, "loss": 4.2952, "step": 413500 }, { "epoch": 0.78, "learning_rate": 0.00022228971295997356, "loss": 4.2975, "step": 414000 }, { "epoch": 0.78, "learning_rate": 0.00022219585995630203, "loss": 4.2894, "step": 414500 }, { "epoch": 0.78, "learning_rate": 0.00022210200695263048, "loss": 4.292, "step": 415000 }, { "epoch": 0.78, "learning_rate": 0.000222008153948959, "loss": 4.2854, "step": 415500 }, { "epoch": 0.78, "learning_rate": 0.00022191430094528744, "loss": 4.2782, "step": 416000 }, { "epoch": 0.78, "learning_rate": 0.0002218204479416159, "loss": 4.2933, "step": 416500 }, { "epoch": 0.78, "learning_rate": 0.0002217265949379444, "loss": 4.2914, "step": 417000 }, { "epoch": 0.78, "learning_rate": 0.00022163274193427284, "loss": 4.2903, "step": 417500 }, { "epoch": 0.78, "learning_rate": 0.00022153888893060132, "loss": 4.2945, "step": 418000 }, { "epoch": 0.79, "learning_rate": 0.00022144503592692977, "loss": 4.2849, "step": 418500 }, { "epoch": 0.79, "learning_rate": 0.00022135118292325828, "loss": 4.2944, "step": 419000 }, { "epoch": 0.79, "learning_rate": 0.00022125732991958673, "loss": 4.2872, "step": 419500 }, { "epoch": 0.79, "learning_rate": 0.00022116347691591518, "loss": 4.2767, "step": 420000 }, { "epoch": 0.79, "learning_rate": 0.00022106962391224368, "loss": 4.2938, "step": 420500 }, { "epoch": 0.79, "learning_rate": 0.00022097577090857213, "loss": 4.2677, "step": 421000 }, { "epoch": 0.79, "learning_rate": 0.0002208819179049006, "loss": 4.2734, "step": 421500 }, { "epoch": 0.79, "learning_rate": 0.00022078806490122909, "loss": 4.2786, "step": 422000 }, { "epoch": 0.79, "learning_rate": 0.00022069421189755754, "loss": 4.2816, "step": 422500 }, { "epoch": 0.79, "learning_rate": 0.00022060035889388601, "loss": 4.2743, "step": 423000 }, { "epoch": 0.79, "learning_rate": 0.0002205065058902145, "loss": 4.2944, "step": 423500 }, { "epoch": 0.8, "learning_rate": 0.00022041265288654297, "loss": 4.2991, "step": 424000 }, { "epoch": 0.8, "learning_rate": 0.00022031879988287142, "loss": 4.2912, "step": 424500 }, { "epoch": 0.8, "learning_rate": 0.00022022494687919992, "loss": 4.2999, "step": 425000 }, { "epoch": 0.8, "learning_rate": 0.00022013109387552837, "loss": 4.2885, "step": 425500 }, { "epoch": 0.8, "learning_rate": 0.00022003724087185682, "loss": 4.2875, "step": 426000 }, { "epoch": 0.8, "learning_rate": 0.00021994338786818533, "loss": 4.3016, "step": 426500 }, { "epoch": 0.8, "learning_rate": 0.00021984953486451378, "loss": 4.2757, "step": 427000 }, { "epoch": 0.8, "learning_rate": 0.00021975568186084226, "loss": 4.2645, "step": 427500 }, { "epoch": 0.8, "learning_rate": 0.00021966182885717073, "loss": 4.2858, "step": 428000 }, { "epoch": 0.8, "learning_rate": 0.0002195679758534992, "loss": 4.2878, "step": 428500 }, { "epoch": 0.81, "learning_rate": 0.00021947412284982766, "loss": 4.271, "step": 429000 }, { "epoch": 0.81, "learning_rate": 0.00021938026984615617, "loss": 4.3062, "step": 429500 }, { "epoch": 0.81, "learning_rate": 0.00021928641684248462, "loss": 4.2894, "step": 430000 }, { "epoch": 0.81, "learning_rate": 0.00021919256383881307, "loss": 4.2885, "step": 430500 }, { "epoch": 0.81, "learning_rate": 0.00021909871083514154, "loss": 4.2785, "step": 431000 }, { "epoch": 0.81, "learning_rate": 0.00021900485783147002, "loss": 4.2897, "step": 431500 }, { "epoch": 0.81, "learning_rate": 0.0002189110048277985, "loss": 4.2903, "step": 432000 }, { "epoch": 0.81, "learning_rate": 0.00021881715182412695, "loss": 4.2806, "step": 432500 }, { "epoch": 0.81, "learning_rate": 0.00021872329882045543, "loss": 4.2764, "step": 433000 }, { "epoch": 0.81, "learning_rate": 0.0002186294458167839, "loss": 4.2873, "step": 433500 }, { "epoch": 0.81, "learning_rate": 0.00021853559281311235, "loss": 4.2777, "step": 434000 }, { "epoch": 0.82, "learning_rate": 0.00021844173980944086, "loss": 4.2766, "step": 434500 }, { "epoch": 0.82, "learning_rate": 0.0002183478868057693, "loss": 4.2807, "step": 435000 }, { "epoch": 0.82, "learning_rate": 0.00021825403380209776, "loss": 4.2786, "step": 435500 }, { "epoch": 0.82, "learning_rate": 0.00021816018079842626, "loss": 4.2924, "step": 436000 }, { "epoch": 0.82, "learning_rate": 0.00021806632779475471, "loss": 4.2792, "step": 436500 }, { "epoch": 0.82, "learning_rate": 0.0002179724747910832, "loss": 4.284, "step": 437000 }, { "epoch": 0.82, "learning_rate": 0.00021787862178741167, "loss": 4.2709, "step": 437500 }, { "epoch": 0.82, "learning_rate": 0.00021778476878374015, "loss": 4.2788, "step": 438000 }, { "epoch": 0.82, "learning_rate": 0.0002176909157800686, "loss": 4.2804, "step": 438500 }, { "epoch": 0.82, "learning_rate": 0.0002175970627763971, "loss": 4.2787, "step": 439000 }, { "epoch": 0.82, "learning_rate": 0.00021750320977272555, "loss": 4.2891, "step": 439500 }, { "epoch": 0.83, "learning_rate": 0.000217409356769054, "loss": 4.2787, "step": 440000 }, { "epoch": 0.83, "learning_rate": 0.0002173155037653825, "loss": 4.2757, "step": 440500 }, { "epoch": 0.83, "learning_rate": 0.00021722165076171096, "loss": 4.2793, "step": 441000 }, { "epoch": 0.83, "learning_rate": 0.00021712779775803943, "loss": 4.2771, "step": 441500 }, { "epoch": 0.83, "learning_rate": 0.00021703394475436788, "loss": 4.2874, "step": 442000 }, { "epoch": 0.83, "learning_rate": 0.0002169400917506964, "loss": 4.2867, "step": 442500 }, { "epoch": 0.83, "learning_rate": 0.00021684623874702484, "loss": 4.293, "step": 443000 }, { "epoch": 0.83, "learning_rate": 0.0002167523857433533, "loss": 4.2633, "step": 443500 }, { "epoch": 0.83, "learning_rate": 0.0002166585327396818, "loss": 4.2578, "step": 444000 }, { "epoch": 0.83, "learning_rate": 0.00021656467973601024, "loss": 4.2501, "step": 444500 }, { "epoch": 0.84, "learning_rate": 0.00021647082673233872, "loss": 4.2755, "step": 445000 }, { "epoch": 0.84, "learning_rate": 0.0002163769737286672, "loss": 4.2611, "step": 445500 }, { "epoch": 0.84, "learning_rate": 0.00021628312072499565, "loss": 4.2594, "step": 446000 }, { "epoch": 0.84, "learning_rate": 0.00021618926772132413, "loss": 4.2783, "step": 446500 }, { "epoch": 0.84, "learning_rate": 0.0002160954147176526, "loss": 4.2789, "step": 447000 }, { "epoch": 0.84, "learning_rate": 0.00021600156171398108, "loss": 4.2734, "step": 447500 }, { "epoch": 0.84, "learning_rate": 0.00021590770871030953, "loss": 4.2652, "step": 448000 }, { "epoch": 0.84, "learning_rate": 0.00021581385570663804, "loss": 4.2554, "step": 448500 }, { "epoch": 0.84, "learning_rate": 0.0002157200027029665, "loss": 4.2737, "step": 449000 }, { "epoch": 0.84, "learning_rate": 0.00021562614969929494, "loss": 4.2768, "step": 449500 }, { "epoch": 0.84, "learning_rate": 0.00021553229669562344, "loss": 4.2797, "step": 450000 }, { "epoch": 0.85, "learning_rate": 0.0002154384436919519, "loss": 4.2645, "step": 450500 }, { "epoch": 0.85, "learning_rate": 0.00021534459068828037, "loss": 4.2993, "step": 451000 }, { "epoch": 0.85, "learning_rate": 0.00021525073768460885, "loss": 4.2764, "step": 451500 }, { "epoch": 0.85, "learning_rate": 0.00021515688468093732, "loss": 4.2728, "step": 452000 }, { "epoch": 0.85, "learning_rate": 0.00021506303167726577, "loss": 4.2797, "step": 452500 }, { "epoch": 0.85, "learning_rate": 0.00021496917867359428, "loss": 4.2753, "step": 453000 }, { "epoch": 0.85, "learning_rate": 0.00021487532566992273, "loss": 4.2689, "step": 453500 }, { "epoch": 0.85, "learning_rate": 0.00021478147266625118, "loss": 4.2779, "step": 454000 }, { "epoch": 0.85, "learning_rate": 0.00021468761966257966, "loss": 4.2649, "step": 454500 }, { "epoch": 0.85, "learning_rate": 0.00021459376665890813, "loss": 4.2652, "step": 455000 }, { "epoch": 0.86, "learning_rate": 0.0002144999136552366, "loss": 4.2896, "step": 455500 }, { "epoch": 0.86, "learning_rate": 0.00021440606065156506, "loss": 4.2937, "step": 456000 }, { "epoch": 0.86, "learning_rate": 0.00021431220764789354, "loss": 4.2737, "step": 456500 }, { "epoch": 0.86, "learning_rate": 0.00021421835464422202, "loss": 4.2747, "step": 457000 }, { "epoch": 0.86, "learning_rate": 0.00021412450164055047, "loss": 4.2596, "step": 457500 }, { "epoch": 0.86, "learning_rate": 0.00021403064863687897, "loss": 4.2499, "step": 458000 }, { "epoch": 0.86, "learning_rate": 0.00021393679563320742, "loss": 4.2678, "step": 458500 }, { "epoch": 0.86, "learning_rate": 0.00021384294262953587, "loss": 4.2618, "step": 459000 }, { "epoch": 0.86, "learning_rate": 0.00021374908962586438, "loss": 4.2678, "step": 459500 }, { "epoch": 0.86, "learning_rate": 0.00021365523662219283, "loss": 4.2614, "step": 460000 }, { "epoch": 0.86, "learning_rate": 0.0002135613836185213, "loss": 4.2823, "step": 460500 }, { "epoch": 0.87, "learning_rate": 0.00021346753061484978, "loss": 4.2742, "step": 461000 }, { "epoch": 0.87, "learning_rate": 0.00021337367761117826, "loss": 4.2699, "step": 461500 }, { "epoch": 0.87, "learning_rate": 0.0002132798246075067, "loss": 4.2681, "step": 462000 }, { "epoch": 0.87, "learning_rate": 0.00021318597160383521, "loss": 4.2648, "step": 462500 }, { "epoch": 0.87, "learning_rate": 0.00021309211860016366, "loss": 4.277, "step": 463000 }, { "epoch": 0.87, "learning_rate": 0.00021299826559649212, "loss": 4.2745, "step": 463500 }, { "epoch": 0.87, "learning_rate": 0.00021290441259282062, "loss": 4.2811, "step": 464000 }, { "epoch": 0.87, "learning_rate": 0.00021281055958914907, "loss": 4.2692, "step": 464500 }, { "epoch": 0.87, "learning_rate": 0.00021271670658547755, "loss": 4.2891, "step": 465000 }, { "epoch": 0.87, "learning_rate": 0.00021262285358180602, "loss": 4.2878, "step": 465500 }, { "epoch": 0.87, "learning_rate": 0.0002125290005781345, "loss": 4.265, "step": 466000 }, { "epoch": 0.88, "learning_rate": 0.00021243514757446295, "loss": 4.2714, "step": 466500 }, { "epoch": 0.88, "learning_rate": 0.0002123412945707914, "loss": 4.2806, "step": 467000 }, { "epoch": 0.88, "learning_rate": 0.0002122474415671199, "loss": 4.2564, "step": 467500 }, { "epoch": 0.88, "learning_rate": 0.00021215358856344836, "loss": 4.2518, "step": 468000 }, { "epoch": 0.88, "learning_rate": 0.00021205973555977684, "loss": 4.282, "step": 468500 }, { "epoch": 0.88, "learning_rate": 0.0002119658825561053, "loss": 4.2601, "step": 469000 }, { "epoch": 0.88, "learning_rate": 0.00021187202955243376, "loss": 4.2715, "step": 469500 }, { "epoch": 0.88, "learning_rate": 0.00021177817654876224, "loss": 4.2638, "step": 470000 }, { "epoch": 0.88, "learning_rate": 0.00021168432354509072, "loss": 4.2539, "step": 470500 }, { "epoch": 0.88, "learning_rate": 0.0002115904705414192, "loss": 4.27, "step": 471000 }, { "epoch": 0.89, "learning_rate": 0.00021149661753774765, "loss": 4.2664, "step": 471500 }, { "epoch": 0.89, "learning_rate": 0.00021140276453407615, "loss": 4.2567, "step": 472000 }, { "epoch": 0.89, "learning_rate": 0.0002113089115304046, "loss": 4.271, "step": 472500 }, { "epoch": 0.89, "learning_rate": 0.00021121505852673305, "loss": 4.2487, "step": 473000 }, { "epoch": 0.89, "learning_rate": 0.00021112120552306156, "loss": 4.2477, "step": 473500 }, { "epoch": 0.89, "learning_rate": 0.00021102735251939, "loss": 4.2482, "step": 474000 }, { "epoch": 0.89, "learning_rate": 0.00021093349951571848, "loss": 4.2735, "step": 474500 }, { "epoch": 0.89, "learning_rate": 0.00021083964651204696, "loss": 4.2625, "step": 475000 }, { "epoch": 0.89, "learning_rate": 0.00021074579350837544, "loss": 4.2746, "step": 475500 }, { "epoch": 0.89, "learning_rate": 0.0002106519405047039, "loss": 4.2719, "step": 476000 }, { "epoch": 0.89, "learning_rate": 0.0002105580875010324, "loss": 4.2586, "step": 476500 }, { "epoch": 0.9, "learning_rate": 0.00021046423449736084, "loss": 4.2766, "step": 477000 }, { "epoch": 0.9, "learning_rate": 0.0002103703814936893, "loss": 4.2502, "step": 477500 }, { "epoch": 0.9, "learning_rate": 0.0002102765284900178, "loss": 4.2608, "step": 478000 }, { "epoch": 0.9, "learning_rate": 0.00021018267548634625, "loss": 4.2689, "step": 478500 }, { "epoch": 0.9, "learning_rate": 0.00021008882248267473, "loss": 4.2725, "step": 479000 }, { "epoch": 0.9, "learning_rate": 0.00020999496947900318, "loss": 4.2519, "step": 479500 }, { "epoch": 0.9, "learning_rate": 0.00020990111647533165, "loss": 4.2506, "step": 480000 }, { "epoch": 0.9, "learning_rate": 0.00020980726347166013, "loss": 4.2739, "step": 480500 }, { "epoch": 0.9, "learning_rate": 0.00020971341046798858, "loss": 4.2557, "step": 481000 }, { "epoch": 0.9, "learning_rate": 0.00020961955746431709, "loss": 4.2609, "step": 481500 }, { "epoch": 0.9, "learning_rate": 0.00020952570446064554, "loss": 4.2623, "step": 482000 }, { "epoch": 0.91, "learning_rate": 0.00020943185145697399, "loss": 4.276, "step": 482500 }, { "epoch": 0.91, "learning_rate": 0.0002093379984533025, "loss": 4.2485, "step": 483000 }, { "epoch": 0.91, "learning_rate": 0.00020924414544963094, "loss": 4.2676, "step": 483500 }, { "epoch": 0.91, "learning_rate": 0.00020915029244595942, "loss": 4.2527, "step": 484000 }, { "epoch": 0.91, "learning_rate": 0.0002090564394422879, "loss": 4.2803, "step": 484500 }, { "epoch": 0.91, "learning_rate": 0.00020896258643861637, "loss": 4.2584, "step": 485000 }, { "epoch": 0.91, "learning_rate": 0.00020886873343494482, "loss": 4.2635, "step": 485500 }, { "epoch": 0.91, "learning_rate": 0.00020877488043127333, "loss": 4.2583, "step": 486000 }, { "epoch": 0.91, "learning_rate": 0.00020868102742760178, "loss": 4.2783, "step": 486500 }, { "epoch": 0.91, "learning_rate": 0.00020858717442393023, "loss": 4.2579, "step": 487000 }, { "epoch": 0.92, "learning_rate": 0.00020849332142025873, "loss": 4.2727, "step": 487500 }, { "epoch": 0.92, "learning_rate": 0.00020839946841658718, "loss": 4.2704, "step": 488000 }, { "epoch": 0.92, "learning_rate": 0.00020830561541291566, "loss": 4.2684, "step": 488500 }, { "epoch": 0.92, "learning_rate": 0.00020821176240924414, "loss": 4.2596, "step": 489000 }, { "epoch": 0.92, "learning_rate": 0.00020811790940557262, "loss": 4.2519, "step": 489500 }, { "epoch": 0.92, "learning_rate": 0.00020802405640190107, "loss": 4.2665, "step": 490000 }, { "epoch": 0.92, "learning_rate": 0.00020793020339822952, "loss": 4.2481, "step": 490500 }, { "epoch": 0.92, "learning_rate": 0.00020783635039455802, "loss": 4.2664, "step": 491000 }, { "epoch": 0.92, "learning_rate": 0.00020774249739088647, "loss": 4.2443, "step": 491500 }, { "epoch": 0.92, "learning_rate": 0.00020764864438721495, "loss": 4.2506, "step": 492000 }, { "epoch": 0.92, "learning_rate": 0.00020755479138354343, "loss": 4.2729, "step": 492500 }, { "epoch": 0.93, "learning_rate": 0.00020746093837987188, "loss": 4.2578, "step": 493000 }, { "epoch": 0.93, "learning_rate": 0.00020736708537620035, "loss": 4.2583, "step": 493500 }, { "epoch": 0.93, "learning_rate": 0.00020727323237252883, "loss": 4.2613, "step": 494000 }, { "epoch": 0.93, "learning_rate": 0.0002071793793688573, "loss": 4.2652, "step": 494500 }, { "epoch": 0.93, "learning_rate": 0.00020708552636518576, "loss": 4.2533, "step": 495000 }, { "epoch": 0.93, "learning_rate": 0.00020699167336151426, "loss": 4.2594, "step": 495500 }, { "epoch": 0.93, "learning_rate": 0.00020689782035784271, "loss": 4.2733, "step": 496000 }, { "epoch": 0.93, "learning_rate": 0.00020680396735417116, "loss": 4.2422, "step": 496500 }, { "epoch": 0.93, "learning_rate": 0.00020671011435049967, "loss": 4.2414, "step": 497000 }, { "epoch": 0.93, "learning_rate": 0.00020661626134682812, "loss": 4.2533, "step": 497500 }, { "epoch": 0.93, "learning_rate": 0.0002065224083431566, "loss": 4.2763, "step": 498000 }, { "epoch": 0.94, "learning_rate": 0.00020642855533948507, "loss": 4.2528, "step": 498500 }, { "epoch": 0.94, "learning_rate": 0.00020633470233581355, "loss": 4.2631, "step": 499000 }, { "epoch": 0.94, "learning_rate": 0.000206240849332142, "loss": 4.245, "step": 499500 }, { "epoch": 0.94, "learning_rate": 0.0002061469963284705, "loss": 4.2573, "step": 500000 }, { "epoch": 0.94, "learning_rate": 0.00020605314332479896, "loss": 4.2552, "step": 500500 }, { "epoch": 0.94, "learning_rate": 0.0002059592903211274, "loss": 4.265, "step": 501000 }, { "epoch": 0.94, "learning_rate": 0.0002058654373174559, "loss": 4.2606, "step": 501500 }, { "epoch": 0.94, "learning_rate": 0.00020577158431378436, "loss": 4.2641, "step": 502000 }, { "epoch": 0.94, "learning_rate": 0.00020567773131011284, "loss": 4.2382, "step": 502500 }, { "epoch": 0.94, "learning_rate": 0.0002055838783064413, "loss": 4.2601, "step": 503000 }, { "epoch": 0.95, "learning_rate": 0.00020549002530276977, "loss": 4.2627, "step": 503500 }, { "epoch": 0.95, "learning_rate": 0.00020539617229909824, "loss": 4.2453, "step": 504000 }, { "epoch": 0.95, "learning_rate": 0.0002053023192954267, "loss": 4.2559, "step": 504500 }, { "epoch": 0.95, "learning_rate": 0.0002052084662917552, "loss": 4.2667, "step": 505000 }, { "epoch": 0.95, "learning_rate": 0.00020511461328808365, "loss": 4.2508, "step": 505500 }, { "epoch": 0.95, "learning_rate": 0.0002050207602844121, "loss": 4.2531, "step": 506000 }, { "epoch": 0.95, "learning_rate": 0.0002049269072807406, "loss": 4.2831, "step": 506500 }, { "epoch": 0.95, "learning_rate": 0.00020483305427706905, "loss": 4.2682, "step": 507000 }, { "epoch": 0.95, "learning_rate": 0.00020473920127339753, "loss": 4.2628, "step": 507500 }, { "epoch": 0.95, "learning_rate": 0.000204645348269726, "loss": 4.2403, "step": 508000 }, { "epoch": 0.95, "learning_rate": 0.0002045514952660545, "loss": 4.2521, "step": 508500 }, { "epoch": 0.96, "learning_rate": 0.00020445764226238294, "loss": 4.2387, "step": 509000 }, { "epoch": 0.96, "learning_rate": 0.00020436378925871144, "loss": 4.2724, "step": 509500 }, { "epoch": 0.96, "learning_rate": 0.0002042699362550399, "loss": 4.2577, "step": 510000 }, { "epoch": 0.96, "learning_rate": 0.00020417608325136834, "loss": 4.2621, "step": 510500 }, { "epoch": 0.96, "learning_rate": 0.00020408223024769685, "loss": 4.248, "step": 511000 }, { "epoch": 0.96, "learning_rate": 0.0002039883772440253, "loss": 4.2503, "step": 511500 }, { "epoch": 0.96, "learning_rate": 0.00020389452424035377, "loss": 4.2477, "step": 512000 }, { "epoch": 0.96, "learning_rate": 0.00020380067123668225, "loss": 4.2293, "step": 512500 }, { "epoch": 0.96, "learning_rate": 0.00020370681823301073, "loss": 4.2378, "step": 513000 }, { "epoch": 0.96, "learning_rate": 0.00020361296522933918, "loss": 4.2499, "step": 513500 }, { "epoch": 0.96, "learning_rate": 0.00020351911222566766, "loss": 4.2397, "step": 514000 }, { "epoch": 0.97, "learning_rate": 0.00020342525922199613, "loss": 4.2635, "step": 514500 }, { "epoch": 0.97, "learning_rate": 0.00020333140621832458, "loss": 4.24, "step": 515000 }, { "epoch": 0.97, "learning_rate": 0.00020323755321465303, "loss": 4.2453, "step": 515500 }, { "epoch": 0.97, "learning_rate": 0.00020314370021098154, "loss": 4.2573, "step": 516000 }, { "epoch": 0.97, "learning_rate": 0.00020304984720731, "loss": 4.249, "step": 516500 }, { "epoch": 0.97, "learning_rate": 0.00020295599420363847, "loss": 4.2683, "step": 517000 }, { "epoch": 0.97, "learning_rate": 0.00020286214119996694, "loss": 4.2641, "step": 517500 }, { "epoch": 0.97, "learning_rate": 0.00020276828819629542, "loss": 4.255, "step": 518000 }, { "epoch": 0.97, "learning_rate": 0.00020267443519262387, "loss": 4.2394, "step": 518500 }, { "epoch": 0.97, "learning_rate": 0.00020258058218895238, "loss": 4.2414, "step": 519000 }, { "epoch": 0.98, "learning_rate": 0.00020248672918528083, "loss": 4.2488, "step": 519500 }, { "epoch": 0.98, "learning_rate": 0.00020239287618160928, "loss": 4.2479, "step": 520000 }, { "epoch": 0.98, "learning_rate": 0.00020229902317793778, "loss": 4.2705, "step": 520500 }, { "epoch": 0.98, "learning_rate": 0.00020220517017426623, "loss": 4.2416, "step": 521000 }, { "epoch": 0.98, "learning_rate": 0.0002021113171705947, "loss": 4.2642, "step": 521500 }, { "epoch": 0.98, "learning_rate": 0.0002020174641669232, "loss": 4.2613, "step": 522000 }, { "epoch": 0.98, "learning_rate": 0.00020192361116325166, "loss": 4.2429, "step": 522500 }, { "epoch": 0.98, "learning_rate": 0.00020182975815958011, "loss": 4.2542, "step": 523000 }, { "epoch": 0.98, "learning_rate": 0.00020173590515590862, "loss": 4.244, "step": 523500 }, { "epoch": 0.98, "learning_rate": 0.00020164205215223707, "loss": 4.2483, "step": 524000 }, { "epoch": 0.98, "learning_rate": 0.00020154819914856552, "loss": 4.2476, "step": 524500 }, { "epoch": 0.99, "learning_rate": 0.00020145434614489402, "loss": 4.2525, "step": 525000 }, { "epoch": 0.99, "learning_rate": 0.00020136049314122247, "loss": 4.2519, "step": 525500 }, { "epoch": 0.99, "learning_rate": 0.00020126664013755092, "loss": 4.2441, "step": 526000 }, { "epoch": 0.99, "learning_rate": 0.00020117278713387943, "loss": 4.2348, "step": 526500 }, { "epoch": 0.99, "learning_rate": 0.00020107893413020788, "loss": 4.2528, "step": 527000 }, { "epoch": 0.99, "learning_rate": 0.00020098508112653636, "loss": 4.2333, "step": 527500 }, { "epoch": 0.99, "learning_rate": 0.0002008912281228648, "loss": 4.2449, "step": 528000 }, { "epoch": 0.99, "learning_rate": 0.0002007973751191933, "loss": 4.2469, "step": 528500 }, { "epoch": 0.99, "learning_rate": 0.00020070352211552176, "loss": 4.2464, "step": 529000 }, { "epoch": 0.99, "learning_rate": 0.0002006096691118502, "loss": 4.2355, "step": 529500 }, { "epoch": 0.99, "learning_rate": 0.00020051581610817872, "loss": 4.2236, "step": 530000 }, { "epoch": 1.0, "learning_rate": 0.00020042196310450717, "loss": 4.2422, "step": 530500 }, { "epoch": 1.0, "learning_rate": 0.00020032811010083564, "loss": 4.2556, "step": 531000 }, { "epoch": 1.0, "learning_rate": 0.00020023425709716412, "loss": 4.2515, "step": 531500 }, { "epoch": 1.0, "learning_rate": 0.0002001404040934926, "loss": 4.2271, "step": 532000 }, { "epoch": 1.0, "learning_rate": 0.00020004655108982105, "loss": 4.2359, "step": 532500 }, { "epoch": 1.0, "learning_rate": 0.00019995269808614955, "loss": 4.2297, "step": 533000 }, { "epoch": 1.0, "learning_rate": 0.000199858845082478, "loss": 4.2488, "step": 533500 }, { "epoch": 1.0, "learning_rate": 0.00019976499207880646, "loss": 4.2258, "step": 534000 }, { "epoch": 1.0, "learning_rate": 0.00019967113907513496, "loss": 4.2224, "step": 534500 }, { "epoch": 1.0, "learning_rate": 0.0001995772860714634, "loss": 4.2312, "step": 535000 }, { "epoch": 1.01, "learning_rate": 0.0001994834330677919, "loss": 4.2321, "step": 535500 }, { "epoch": 1.01, "learning_rate": 0.00019938958006412036, "loss": 4.2091, "step": 536000 }, { "epoch": 1.01, "learning_rate": 0.00019929572706044882, "loss": 4.224, "step": 536500 }, { "epoch": 1.01, "learning_rate": 0.0001992018740567773, "loss": 4.2276, "step": 537000 }, { "epoch": 1.01, "learning_rate": 0.00019910802105310577, "loss": 4.259, "step": 537500 }, { "epoch": 1.01, "learning_rate": 0.00019901416804943425, "loss": 4.2215, "step": 538000 }, { "epoch": 1.01, "learning_rate": 0.0001989203150457627, "loss": 4.2088, "step": 538500 }, { "epoch": 1.01, "learning_rate": 0.00019882646204209115, "loss": 4.2335, "step": 539000 }, { "epoch": 1.01, "learning_rate": 0.00019873260903841965, "loss": 4.2255, "step": 539500 }, { "epoch": 1.01, "learning_rate": 0.0001986387560347481, "loss": 4.2327, "step": 540000 }, { "epoch": 1.01, "learning_rate": 0.00019854490303107658, "loss": 4.2239, "step": 540500 }, { "epoch": 1.02, "learning_rate": 0.00019845105002740506, "loss": 4.2246, "step": 541000 }, { "epoch": 1.02, "learning_rate": 0.00019835719702373354, "loss": 4.2427, "step": 541500 }, { "epoch": 1.02, "learning_rate": 0.00019826334402006199, "loss": 4.2361, "step": 542000 }, { "epoch": 1.02, "learning_rate": 0.0001981694910163905, "loss": 4.2247, "step": 542500 }, { "epoch": 1.02, "learning_rate": 0.00019807563801271894, "loss": 4.2346, "step": 543000 }, { "epoch": 1.02, "learning_rate": 0.0001979817850090474, "loss": 4.2269, "step": 543500 }, { "epoch": 1.02, "learning_rate": 0.0001978879320053759, "loss": 4.2321, "step": 544000 }, { "epoch": 1.02, "learning_rate": 0.00019779407900170435, "loss": 4.2198, "step": 544500 }, { "epoch": 1.02, "learning_rate": 0.00019770022599803282, "loss": 4.2388, "step": 545000 }, { "epoch": 1.02, "learning_rate": 0.0001976063729943613, "loss": 4.23, "step": 545500 }, { "epoch": 1.02, "learning_rate": 0.00019751251999068978, "loss": 4.2329, "step": 546000 }, { "epoch": 1.03, "learning_rate": 0.00019741866698701823, "loss": 4.2312, "step": 546500 }, { "epoch": 1.03, "learning_rate": 0.0001973248139833467, "loss": 4.2313, "step": 547000 }, { "epoch": 1.03, "learning_rate": 0.00019723096097967518, "loss": 4.2346, "step": 547500 }, { "epoch": 1.03, "learning_rate": 0.00019713710797600363, "loss": 4.243, "step": 548000 }, { "epoch": 1.03, "learning_rate": 0.00019704325497233214, "loss": 4.2174, "step": 548500 }, { "epoch": 1.03, "learning_rate": 0.0001969494019686606, "loss": 4.2352, "step": 549000 }, { "epoch": 1.03, "learning_rate": 0.00019685554896498904, "loss": 4.2285, "step": 549500 }, { "epoch": 1.03, "learning_rate": 0.00019676169596131754, "loss": 4.2227, "step": 550000 }, { "epoch": 1.03, "learning_rate": 0.000196667842957646, "loss": 4.215, "step": 550500 }, { "epoch": 1.03, "learning_rate": 0.00019657398995397447, "loss": 4.2237, "step": 551000 }, { "epoch": 1.04, "learning_rate": 0.00019648013695030292, "loss": 4.2057, "step": 551500 }, { "epoch": 1.04, "learning_rate": 0.00019638628394663143, "loss": 4.2184, "step": 552000 }, { "epoch": 1.04, "learning_rate": 0.00019629243094295988, "loss": 4.2099, "step": 552500 }, { "epoch": 1.04, "learning_rate": 0.00019619857793928833, "loss": 4.2225, "step": 553000 }, { "epoch": 1.04, "learning_rate": 0.00019610472493561683, "loss": 4.2369, "step": 553500 }, { "epoch": 1.04, "learning_rate": 0.00019601087193194528, "loss": 4.2086, "step": 554000 }, { "epoch": 1.04, "learning_rate": 0.00019591701892827376, "loss": 4.2253, "step": 554500 }, { "epoch": 1.04, "learning_rate": 0.00019582316592460224, "loss": 4.2251, "step": 555000 }, { "epoch": 1.04, "learning_rate": 0.0001957293129209307, "loss": 4.2226, "step": 555500 }, { "epoch": 1.04, "learning_rate": 0.00019563545991725916, "loss": 4.2374, "step": 556000 }, { "epoch": 1.04, "learning_rate": 0.00019554160691358767, "loss": 4.2233, "step": 556500 }, { "epoch": 1.05, "learning_rate": 0.00019544775390991612, "loss": 4.228, "step": 557000 }, { "epoch": 1.05, "learning_rate": 0.00019535390090624457, "loss": 4.2256, "step": 557500 }, { "epoch": 1.05, "learning_rate": 0.00019526004790257307, "loss": 4.2303, "step": 558000 }, { "epoch": 1.05, "learning_rate": 0.00019516619489890152, "loss": 4.2313, "step": 558500 }, { "epoch": 1.05, "learning_rate": 0.00019507234189523, "loss": 4.2416, "step": 559000 }, { "epoch": 1.05, "learning_rate": 0.00019497848889155848, "loss": 4.2075, "step": 559500 }, { "epoch": 1.05, "learning_rate": 0.00019488463588788693, "loss": 4.2282, "step": 560000 }, { "epoch": 1.05, "learning_rate": 0.0001947907828842154, "loss": 4.2386, "step": 560500 }, { "epoch": 1.05, "learning_rate": 0.00019469692988054388, "loss": 4.1995, "step": 561000 }, { "epoch": 1.05, "learning_rate": 0.00019460307687687236, "loss": 4.2351, "step": 561500 }, { "epoch": 1.05, "learning_rate": 0.0001945092238732008, "loss": 4.2391, "step": 562000 }, { "epoch": 1.06, "learning_rate": 0.00019441537086952932, "loss": 4.2291, "step": 562500 }, { "epoch": 1.06, "learning_rate": 0.00019432151786585777, "loss": 4.2197, "step": 563000 }, { "epoch": 1.06, "learning_rate": 0.00019422766486218622, "loss": 4.2284, "step": 563500 }, { "epoch": 1.06, "learning_rate": 0.0001941338118585147, "loss": 4.24, "step": 564000 }, { "epoch": 1.06, "learning_rate": 0.00019403995885484317, "loss": 4.2367, "step": 564500 }, { "epoch": 1.06, "learning_rate": 0.00019394610585117165, "loss": 4.2241, "step": 565000 }, { "epoch": 1.06, "learning_rate": 0.0001938522528475001, "loss": 4.2227, "step": 565500 }, { "epoch": 1.06, "learning_rate": 0.0001937583998438286, "loss": 4.2124, "step": 566000 }, { "epoch": 1.06, "learning_rate": 0.00019366454684015705, "loss": 4.2236, "step": 566500 }, { "epoch": 1.06, "learning_rate": 0.0001935706938364855, "loss": 4.2193, "step": 567000 }, { "epoch": 1.07, "learning_rate": 0.000193476840832814, "loss": 4.2251, "step": 567500 }, { "epoch": 1.07, "learning_rate": 0.00019338298782914246, "loss": 4.2165, "step": 568000 }, { "epoch": 1.07, "learning_rate": 0.00019328913482547094, "loss": 4.2178, "step": 568500 }, { "epoch": 1.07, "learning_rate": 0.00019319528182179941, "loss": 4.2291, "step": 569000 }, { "epoch": 1.07, "learning_rate": 0.0001931014288181279, "loss": 4.2278, "step": 569500 }, { "epoch": 1.07, "learning_rate": 0.00019300757581445634, "loss": 4.2115, "step": 570000 }, { "epoch": 1.07, "learning_rate": 0.00019291372281078482, "loss": 4.2327, "step": 570500 }, { "epoch": 1.07, "learning_rate": 0.0001928198698071133, "loss": 4.2181, "step": 571000 }, { "epoch": 1.07, "learning_rate": 0.00019272601680344175, "loss": 4.2258, "step": 571500 }, { "epoch": 1.07, "learning_rate": 0.00019263216379977025, "loss": 4.2262, "step": 572000 }, { "epoch": 1.07, "learning_rate": 0.0001925383107960987, "loss": 4.2313, "step": 572500 }, { "epoch": 1.08, "learning_rate": 0.00019244445779242715, "loss": 4.2121, "step": 573000 }, { "epoch": 1.08, "learning_rate": 0.00019235060478875566, "loss": 4.2348, "step": 573500 }, { "epoch": 1.08, "learning_rate": 0.0001922567517850841, "loss": 4.2235, "step": 574000 }, { "epoch": 1.08, "learning_rate": 0.00019216289878141258, "loss": 4.2236, "step": 574500 }, { "epoch": 1.08, "learning_rate": 0.00019206904577774103, "loss": 4.2327, "step": 575000 }, { "epoch": 1.08, "learning_rate": 0.00019197519277406954, "loss": 4.2335, "step": 575500 }, { "epoch": 1.08, "learning_rate": 0.000191881339770398, "loss": 4.2289, "step": 576000 }, { "epoch": 1.08, "learning_rate": 0.00019178748676672644, "loss": 4.2433, "step": 576500 }, { "epoch": 1.08, "learning_rate": 0.00019169363376305494, "loss": 4.2198, "step": 577000 }, { "epoch": 1.08, "learning_rate": 0.0001915997807593834, "loss": 4.2281, "step": 577500 }, { "epoch": 1.08, "learning_rate": 0.00019150592775571187, "loss": 4.2201, "step": 578000 }, { "epoch": 1.09, "learning_rate": 0.00019141207475204035, "loss": 4.2255, "step": 578500 }, { "epoch": 1.09, "learning_rate": 0.00019131822174836883, "loss": 4.2203, "step": 579000 }, { "epoch": 1.09, "learning_rate": 0.00019122436874469728, "loss": 4.2217, "step": 579500 }, { "epoch": 1.09, "learning_rate": 0.00019113051574102578, "loss": 4.223, "step": 580000 }, { "epoch": 1.09, "learning_rate": 0.00019103666273735423, "loss": 4.2309, "step": 580500 }, { "epoch": 1.09, "learning_rate": 0.00019094280973368268, "loss": 4.2395, "step": 581000 }, { "epoch": 1.09, "learning_rate": 0.00019084895673001119, "loss": 4.2214, "step": 581500 }, { "epoch": 1.09, "learning_rate": 0.00019075510372633964, "loss": 4.2269, "step": 582000 }, { "epoch": 1.09, "learning_rate": 0.00019066125072266811, "loss": 4.1937, "step": 582500 }, { "epoch": 1.09, "learning_rate": 0.0001905673977189966, "loss": 4.2311, "step": 583000 }, { "epoch": 1.1, "learning_rate": 0.00019047354471532504, "loss": 4.2116, "step": 583500 }, { "epoch": 1.1, "learning_rate": 0.00019037969171165352, "loss": 4.2534, "step": 584000 }, { "epoch": 1.1, "learning_rate": 0.000190285838707982, "loss": 4.2268, "step": 584500 }, { "epoch": 1.1, "learning_rate": 0.00019019198570431047, "loss": 4.2081, "step": 585000 }, { "epoch": 1.1, "learning_rate": 0.00019009813270063892, "loss": 4.2207, "step": 585500 }, { "epoch": 1.1, "learning_rate": 0.00019000427969696743, "loss": 4.2161, "step": 586000 }, { "epoch": 1.1, "learning_rate": 0.00018991042669329588, "loss": 4.2327, "step": 586500 }, { "epoch": 1.1, "learning_rate": 0.00018981657368962433, "loss": 4.2175, "step": 587000 }, { "epoch": 1.1, "learning_rate": 0.0001897227206859528, "loss": 4.2299, "step": 587500 }, { "epoch": 1.1, "learning_rate": 0.00018962886768228128, "loss": 4.2199, "step": 588000 }, { "epoch": 1.1, "learning_rate": 0.00018953501467860976, "loss": 4.2339, "step": 588500 }, { "epoch": 1.11, "learning_rate": 0.0001894411616749382, "loss": 4.2331, "step": 589000 }, { "epoch": 1.11, "learning_rate": 0.00018934730867126672, "loss": 4.2203, "step": 589500 }, { "epoch": 1.11, "learning_rate": 0.00018925345566759517, "loss": 4.2066, "step": 590000 }, { "epoch": 1.11, "learning_rate": 0.00018915960266392362, "loss": 4.2226, "step": 590500 }, { "epoch": 1.11, "learning_rate": 0.00018906574966025212, "loss": 4.2288, "step": 591000 }, { "epoch": 1.11, "learning_rate": 0.00018897189665658057, "loss": 4.2285, "step": 591500 }, { "epoch": 1.11, "learning_rate": 0.00018887804365290905, "loss": 4.1987, "step": 592000 }, { "epoch": 1.11, "learning_rate": 0.00018878419064923753, "loss": 4.2278, "step": 592500 }, { "epoch": 1.11, "learning_rate": 0.000188690337645566, "loss": 4.209, "step": 593000 }, { "epoch": 1.11, "learning_rate": 0.00018859648464189445, "loss": 4.233, "step": 593500 }, { "epoch": 1.11, "learning_rate": 0.00018850263163822293, "loss": 4.2264, "step": 594000 }, { "epoch": 1.12, "learning_rate": 0.0001884087786345514, "loss": 4.2198, "step": 594500 }, { "epoch": 1.12, "learning_rate": 0.00018831492563087986, "loss": 4.2152, "step": 595000 }, { "epoch": 1.12, "learning_rate": 0.00018822107262720836, "loss": 4.2281, "step": 595500 }, { "epoch": 1.12, "learning_rate": 0.00018812721962353681, "loss": 4.215, "step": 596000 }, { "epoch": 1.12, "learning_rate": 0.00018803336661986526, "loss": 4.238, "step": 596500 }, { "epoch": 1.12, "learning_rate": 0.00018793951361619377, "loss": 4.2216, "step": 597000 }, { "epoch": 1.12, "learning_rate": 0.00018784566061252222, "loss": 4.2376, "step": 597500 }, { "epoch": 1.12, "learning_rate": 0.0001877518076088507, "loss": 4.2052, "step": 598000 }, { "epoch": 1.12, "learning_rate": 0.00018765795460517917, "loss": 4.2181, "step": 598500 }, { "epoch": 1.12, "learning_rate": 0.00018756410160150765, "loss": 4.2172, "step": 599000 }, { "epoch": 1.13, "learning_rate": 0.0001874702485978361, "loss": 4.2144, "step": 599500 }, { "epoch": 1.13, "learning_rate": 0.00018737639559416455, "loss": 4.2155, "step": 600000 }, { "epoch": 1.13, "learning_rate": 0.00018728254259049306, "loss": 4.2061, "step": 600500 }, { "epoch": 1.13, "learning_rate": 0.0001871886895868215, "loss": 4.2325, "step": 601000 }, { "epoch": 1.13, "learning_rate": 0.00018709483658314998, "loss": 4.203, "step": 601500 }, { "epoch": 1.13, "learning_rate": 0.00018700098357947846, "loss": 4.2066, "step": 602000 }, { "epoch": 1.13, "learning_rate": 0.00018690713057580694, "loss": 4.2303, "step": 602500 }, { "epoch": 1.13, "learning_rate": 0.0001868132775721354, "loss": 4.2242, "step": 603000 }, { "epoch": 1.13, "learning_rate": 0.0001867194245684639, "loss": 4.2095, "step": 603500 }, { "epoch": 1.13, "learning_rate": 0.00018662557156479234, "loss": 4.2156, "step": 604000 }, { "epoch": 1.13, "learning_rate": 0.0001865317185611208, "loss": 4.2042, "step": 604500 }, { "epoch": 1.14, "learning_rate": 0.0001864378655574493, "loss": 4.2106, "step": 605000 }, { "epoch": 1.14, "learning_rate": 0.00018634401255377775, "loss": 4.208, "step": 605500 }, { "epoch": 1.14, "learning_rate": 0.00018625015955010623, "loss": 4.2211, "step": 606000 }, { "epoch": 1.14, "learning_rate": 0.0001861563065464347, "loss": 4.2134, "step": 606500 }, { "epoch": 1.14, "learning_rate": 0.00018606245354276316, "loss": 4.225, "step": 607000 }, { "epoch": 1.14, "learning_rate": 0.00018596860053909163, "loss": 4.2293, "step": 607500 }, { "epoch": 1.14, "learning_rate": 0.0001858747475354201, "loss": 4.2237, "step": 608000 }, { "epoch": 1.14, "learning_rate": 0.0001857808945317486, "loss": 4.2054, "step": 608500 }, { "epoch": 1.14, "learning_rate": 0.00018568704152807704, "loss": 4.2071, "step": 609000 }, { "epoch": 1.14, "learning_rate": 0.00018559318852440554, "loss": 4.2144, "step": 609500 }, { "epoch": 1.15, "learning_rate": 0.000185499335520734, "loss": 4.2135, "step": 610000 }, { "epoch": 1.15, "learning_rate": 0.00018540548251706244, "loss": 4.2218, "step": 610500 }, { "epoch": 1.15, "learning_rate": 0.00018531162951339095, "loss": 4.2164, "step": 611000 }, { "epoch": 1.15, "learning_rate": 0.0001852177765097194, "loss": 4.224, "step": 611500 }, { "epoch": 1.15, "learning_rate": 0.00018512392350604788, "loss": 4.2092, "step": 612000 }, { "epoch": 1.15, "learning_rate": 0.00018503007050237633, "loss": 4.2103, "step": 612500 }, { "epoch": 1.15, "learning_rate": 0.00018493621749870483, "loss": 4.2111, "step": 613000 }, { "epoch": 1.15, "learning_rate": 0.00018484236449503328, "loss": 4.2203, "step": 613500 }, { "epoch": 1.15, "learning_rate": 0.00018474851149136173, "loss": 4.2308, "step": 614000 }, { "epoch": 1.15, "learning_rate": 0.00018465465848769024, "loss": 4.2132, "step": 614500 }, { "epoch": 1.15, "learning_rate": 0.00018456080548401869, "loss": 4.2204, "step": 615000 }, { "epoch": 1.16, "learning_rate": 0.00018446695248034716, "loss": 4.2039, "step": 615500 }, { "epoch": 1.16, "learning_rate": 0.00018437309947667564, "loss": 4.2223, "step": 616000 }, { "epoch": 1.16, "learning_rate": 0.00018427924647300412, "loss": 4.2024, "step": 616500 }, { "epoch": 1.16, "learning_rate": 0.00018418539346933257, "loss": 4.1935, "step": 617000 }, { "epoch": 1.16, "learning_rate": 0.00018409154046566105, "loss": 4.214, "step": 617500 }, { "epoch": 1.16, "learning_rate": 0.00018399768746198952, "loss": 4.226, "step": 618000 }, { "epoch": 1.16, "learning_rate": 0.00018390383445831797, "loss": 4.2093, "step": 618500 }, { "epoch": 1.16, "learning_rate": 0.00018380998145464648, "loss": 4.2317, "step": 619000 }, { "epoch": 1.16, "learning_rate": 0.00018371612845097493, "loss": 4.2188, "step": 619500 }, { "epoch": 1.16, "learning_rate": 0.00018362227544730338, "loss": 4.2172, "step": 620000 }, { "epoch": 1.16, "learning_rate": 0.00018352842244363188, "loss": 4.2123, "step": 620500 }, { "epoch": 1.17, "learning_rate": 0.00018343456943996033, "loss": 4.2063, "step": 621000 }, { "epoch": 1.17, "learning_rate": 0.0001833407164362888, "loss": 4.2137, "step": 621500 }, { "epoch": 1.17, "learning_rate": 0.0001832468634326173, "loss": 4.2104, "step": 622000 }, { "epoch": 1.17, "learning_rate": 0.00018315301042894577, "loss": 4.2196, "step": 622500 }, { "epoch": 1.17, "learning_rate": 0.00018305915742527422, "loss": 4.2209, "step": 623000 }, { "epoch": 1.17, "learning_rate": 0.00018296530442160267, "loss": 4.2221, "step": 623500 }, { "epoch": 1.17, "learning_rate": 0.00018287145141793117, "loss": 4.1967, "step": 624000 }, { "epoch": 1.17, "learning_rate": 0.00018277759841425962, "loss": 4.2119, "step": 624500 }, { "epoch": 1.17, "learning_rate": 0.0001826837454105881, "loss": 4.2446, "step": 625000 }, { "epoch": 1.17, "learning_rate": 0.00018258989240691658, "loss": 4.2099, "step": 625500 }, { "epoch": 1.18, "learning_rate": 0.00018249603940324505, "loss": 4.2067, "step": 626000 }, { "epoch": 1.18, "learning_rate": 0.0001824021863995735, "loss": 4.2227, "step": 626500 }, { "epoch": 1.18, "learning_rate": 0.000182308333395902, "loss": 4.2201, "step": 627000 }, { "epoch": 1.18, "learning_rate": 0.00018221448039223046, "loss": 4.209, "step": 627500 }, { "epoch": 1.18, "learning_rate": 0.0001821206273885589, "loss": 4.2072, "step": 628000 }, { "epoch": 1.18, "learning_rate": 0.0001820267743848874, "loss": 4.1922, "step": 628500 }, { "epoch": 1.18, "learning_rate": 0.00018193292138121586, "loss": 4.2127, "step": 629000 }, { "epoch": 1.18, "learning_rate": 0.00018183906837754434, "loss": 4.2231, "step": 629500 }, { "epoch": 1.18, "learning_rate": 0.00018174521537387282, "loss": 4.2032, "step": 630000 }, { "epoch": 1.18, "learning_rate": 0.00018165136237020127, "loss": 4.2178, "step": 630500 }, { "epoch": 1.18, "learning_rate": 0.00018155750936652975, "loss": 4.2204, "step": 631000 }, { "epoch": 1.19, "learning_rate": 0.00018146365636285822, "loss": 4.2148, "step": 631500 }, { "epoch": 1.19, "learning_rate": 0.0001813698033591867, "loss": 4.2049, "step": 632000 }, { "epoch": 1.19, "learning_rate": 0.00018127595035551515, "loss": 4.2093, "step": 632500 }, { "epoch": 1.19, "learning_rate": 0.00018118209735184366, "loss": 4.2281, "step": 633000 }, { "epoch": 1.19, "learning_rate": 0.0001810882443481721, "loss": 4.1951, "step": 633500 }, { "epoch": 1.19, "learning_rate": 0.00018099439134450056, "loss": 4.2293, "step": 634000 }, { "epoch": 1.19, "learning_rate": 0.00018090053834082906, "loss": 4.1989, "step": 634500 }, { "epoch": 1.19, "learning_rate": 0.0001808066853371575, "loss": 4.2071, "step": 635000 }, { "epoch": 1.19, "learning_rate": 0.000180712832333486, "loss": 4.2136, "step": 635500 }, { "epoch": 1.19, "learning_rate": 0.00018061897932981444, "loss": 4.2002, "step": 636000 }, { "epoch": 1.19, "learning_rate": 0.00018052512632614294, "loss": 4.213, "step": 636500 }, { "epoch": 1.2, "learning_rate": 0.0001804312733224714, "loss": 4.1991, "step": 637000 }, { "epoch": 1.2, "learning_rate": 0.00018033742031879984, "loss": 4.2209, "step": 637500 }, { "epoch": 1.2, "learning_rate": 0.00018024356731512835, "loss": 4.2063, "step": 638000 }, { "epoch": 1.2, "learning_rate": 0.0001801497143114568, "loss": 4.182, "step": 638500 }, { "epoch": 1.2, "learning_rate": 0.00018005586130778528, "loss": 4.2142, "step": 639000 }, { "epoch": 1.2, "learning_rate": 0.00017996200830411375, "loss": 4.2075, "step": 639500 }, { "epoch": 1.2, "learning_rate": 0.00017986815530044223, "loss": 4.2225, "step": 640000 }, { "epoch": 1.2, "learning_rate": 0.00017977430229677068, "loss": 4.1956, "step": 640500 }, { "epoch": 1.2, "learning_rate": 0.00017968044929309916, "loss": 4.2198, "step": 641000 }, { "epoch": 1.2, "learning_rate": 0.00017958659628942764, "loss": 4.2175, "step": 641500 }, { "epoch": 1.21, "learning_rate": 0.0001794927432857561, "loss": 4.2163, "step": 642000 }, { "epoch": 1.21, "learning_rate": 0.0001793988902820846, "loss": 4.2066, "step": 642500 }, { "epoch": 1.21, "learning_rate": 0.00017930503727841304, "loss": 4.1946, "step": 643000 }, { "epoch": 1.21, "learning_rate": 0.0001792111842747415, "loss": 4.203, "step": 643500 }, { "epoch": 1.21, "learning_rate": 0.00017911733127107, "loss": 4.2222, "step": 644000 }, { "epoch": 1.21, "learning_rate": 0.00017902347826739845, "loss": 4.2074, "step": 644500 }, { "epoch": 1.21, "learning_rate": 0.00017892962526372692, "loss": 4.2008, "step": 645000 }, { "epoch": 1.21, "learning_rate": 0.0001788357722600554, "loss": 4.1977, "step": 645500 }, { "epoch": 1.21, "learning_rate": 0.00017874191925638388, "loss": 4.2094, "step": 646000 }, { "epoch": 1.21, "learning_rate": 0.00017864806625271233, "loss": 4.1833, "step": 646500 }, { "epoch": 1.21, "learning_rate": 0.00017855421324904083, "loss": 4.2017, "step": 647000 }, { "epoch": 1.22, "learning_rate": 0.00017846036024536928, "loss": 4.1936, "step": 647500 }, { "epoch": 1.22, "learning_rate": 0.00017836650724169773, "loss": 4.1945, "step": 648000 }, { "epoch": 1.22, "learning_rate": 0.0001782726542380262, "loss": 4.2016, "step": 648500 }, { "epoch": 1.22, "learning_rate": 0.0001781788012343547, "loss": 4.2021, "step": 649000 }, { "epoch": 1.22, "learning_rate": 0.00017808494823068317, "loss": 4.2121, "step": 649500 }, { "epoch": 1.22, "learning_rate": 0.00017799109522701162, "loss": 4.2016, "step": 650000 }, { "epoch": 1.22, "learning_rate": 0.00017789724222334012, "loss": 4.1895, "step": 650500 }, { "epoch": 1.22, "learning_rate": 0.00017780338921966857, "loss": 4.2204, "step": 651000 }, { "epoch": 1.22, "learning_rate": 0.00017770953621599702, "loss": 4.1967, "step": 651500 }, { "epoch": 1.22, "learning_rate": 0.00017761568321232553, "loss": 4.1896, "step": 652000 }, { "epoch": 1.22, "learning_rate": 0.00017752183020865398, "loss": 4.2053, "step": 652500 }, { "epoch": 1.23, "learning_rate": 0.00017742797720498245, "loss": 4.217, "step": 653000 }, { "epoch": 1.23, "learning_rate": 0.00017733412420131093, "loss": 4.2119, "step": 653500 }, { "epoch": 1.23, "learning_rate": 0.00017724027119763938, "loss": 4.1935, "step": 654000 }, { "epoch": 1.23, "learning_rate": 0.00017714641819396786, "loss": 4.2168, "step": 654500 }, { "epoch": 1.23, "learning_rate": 0.00017705256519029634, "loss": 4.2104, "step": 655000 }, { "epoch": 1.23, "learning_rate": 0.00017695871218662481, "loss": 4.2074, "step": 655500 }, { "epoch": 1.23, "learning_rate": 0.00017686485918295326, "loss": 4.1953, "step": 656000 }, { "epoch": 1.23, "learning_rate": 0.00017677100617928177, "loss": 4.2014, "step": 656500 }, { "epoch": 1.23, "learning_rate": 0.00017667715317561022, "loss": 4.2013, "step": 657000 }, { "epoch": 1.23, "learning_rate": 0.00017658330017193867, "loss": 4.1982, "step": 657500 }, { "epoch": 1.24, "learning_rate": 0.00017648944716826717, "loss": 4.1911, "step": 658000 }, { "epoch": 1.24, "learning_rate": 0.00017639559416459562, "loss": 4.2104, "step": 658500 }, { "epoch": 1.24, "learning_rate": 0.0001763017411609241, "loss": 4.2123, "step": 659000 }, { "epoch": 1.24, "learning_rate": 0.00017620788815725258, "loss": 4.2062, "step": 659500 }, { "epoch": 1.24, "learning_rate": 0.00017611403515358106, "loss": 4.2148, "step": 660000 }, { "epoch": 1.24, "learning_rate": 0.0001760201821499095, "loss": 4.1897, "step": 660500 }, { "epoch": 1.24, "learning_rate": 0.00017592632914623796, "loss": 4.1842, "step": 661000 }, { "epoch": 1.24, "learning_rate": 0.00017583247614256646, "loss": 4.1976, "step": 661500 }, { "epoch": 1.24, "learning_rate": 0.0001757386231388949, "loss": 4.1973, "step": 662000 }, { "epoch": 1.24, "learning_rate": 0.0001756447701352234, "loss": 4.1975, "step": 662500 }, { "epoch": 1.24, "learning_rate": 0.00017555091713155187, "loss": 4.1829, "step": 663000 }, { "epoch": 1.25, "learning_rate": 0.00017545706412788034, "loss": 4.2162, "step": 663500 }, { "epoch": 1.25, "learning_rate": 0.0001753632111242088, "loss": 4.1734, "step": 664000 }, { "epoch": 1.25, "learning_rate": 0.00017526935812053727, "loss": 4.2041, "step": 664500 }, { "epoch": 1.25, "learning_rate": 0.00017517550511686575, "loss": 4.1929, "step": 665000 }, { "epoch": 1.25, "learning_rate": 0.0001750816521131942, "loss": 4.2058, "step": 665500 }, { "epoch": 1.25, "learning_rate": 0.0001749877991095227, "loss": 4.1985, "step": 666000 }, { "epoch": 1.25, "learning_rate": 0.00017489394610585115, "loss": 4.2067, "step": 666500 }, { "epoch": 1.25, "learning_rate": 0.0001748000931021796, "loss": 4.1944, "step": 667000 }, { "epoch": 1.25, "learning_rate": 0.0001747062400985081, "loss": 4.2133, "step": 667500 }, { "epoch": 1.25, "learning_rate": 0.00017461238709483656, "loss": 4.1892, "step": 668000 }, { "epoch": 1.25, "learning_rate": 0.00017451853409116504, "loss": 4.2097, "step": 668500 }, { "epoch": 1.26, "learning_rate": 0.00017442468108749351, "loss": 4.2034, "step": 669000 }, { "epoch": 1.26, "learning_rate": 0.000174330828083822, "loss": 4.1851, "step": 669500 }, { "epoch": 1.26, "learning_rate": 0.00017423697508015044, "loss": 4.212, "step": 670000 }, { "epoch": 1.26, "learning_rate": 0.00017414312207647895, "loss": 4.2136, "step": 670500 }, { "epoch": 1.26, "learning_rate": 0.0001740492690728074, "loss": 4.1989, "step": 671000 }, { "epoch": 1.26, "learning_rate": 0.00017395541606913585, "loss": 4.2107, "step": 671500 }, { "epoch": 1.26, "learning_rate": 0.00017386156306546432, "loss": 4.1927, "step": 672000 }, { "epoch": 1.26, "learning_rate": 0.0001737677100617928, "loss": 4.1823, "step": 672500 }, { "epoch": 1.26, "learning_rate": 0.00017367385705812128, "loss": 4.1967, "step": 673000 }, { "epoch": 1.26, "learning_rate": 0.00017358000405444973, "loss": 4.1905, "step": 673500 }, { "epoch": 1.27, "learning_rate": 0.00017348615105077823, "loss": 4.2168, "step": 674000 }, { "epoch": 1.27, "learning_rate": 0.00017339229804710668, "loss": 4.1814, "step": 674500 }, { "epoch": 1.27, "learning_rate": 0.00017329844504343514, "loss": 4.2118, "step": 675000 }, { "epoch": 1.27, "learning_rate": 0.00017320459203976364, "loss": 4.2121, "step": 675500 }, { "epoch": 1.27, "learning_rate": 0.0001731107390360921, "loss": 4.192, "step": 676000 }, { "epoch": 1.27, "learning_rate": 0.00017301688603242057, "loss": 4.1885, "step": 676500 }, { "epoch": 1.27, "learning_rate": 0.00017292303302874904, "loss": 4.2103, "step": 677000 }, { "epoch": 1.27, "learning_rate": 0.0001728291800250775, "loss": 4.1964, "step": 677500 }, { "epoch": 1.27, "learning_rate": 0.00017273532702140597, "loss": 4.1836, "step": 678000 }, { "epoch": 1.27, "learning_rate": 0.00017264147401773445, "loss": 4.2167, "step": 678500 }, { "epoch": 1.27, "learning_rate": 0.00017254762101406293, "loss": 4.1908, "step": 679000 }, { "epoch": 1.28, "learning_rate": 0.00017245376801039138, "loss": 4.2083, "step": 679500 }, { "epoch": 1.28, "learning_rate": 0.00017235991500671988, "loss": 4.2022, "step": 680000 }, { "epoch": 1.28, "learning_rate": 0.00017226606200304833, "loss": 4.1904, "step": 680500 }, { "epoch": 1.28, "learning_rate": 0.00017217220899937678, "loss": 4.2128, "step": 681000 }, { "epoch": 1.28, "learning_rate": 0.0001720783559957053, "loss": 4.2046, "step": 681500 }, { "epoch": 1.28, "learning_rate": 0.00017198450299203374, "loss": 4.2045, "step": 682000 }, { "epoch": 1.28, "learning_rate": 0.00017189064998836222, "loss": 4.2121, "step": 682500 }, { "epoch": 1.28, "learning_rate": 0.0001717967969846907, "loss": 4.1857, "step": 683000 }, { "epoch": 1.28, "learning_rate": 0.00017170294398101917, "loss": 4.2036, "step": 683500 }, { "epoch": 1.28, "learning_rate": 0.00017160909097734762, "loss": 4.2028, "step": 684000 }, { "epoch": 1.28, "learning_rate": 0.00017151523797367607, "loss": 4.1933, "step": 684500 }, { "epoch": 1.29, "learning_rate": 0.00017142138497000458, "loss": 4.1952, "step": 685000 }, { "epoch": 1.29, "learning_rate": 0.00017132753196633303, "loss": 4.1993, "step": 685500 }, { "epoch": 1.29, "learning_rate": 0.0001712336789626615, "loss": 4.187, "step": 686000 }, { "epoch": 1.29, "learning_rate": 0.00017113982595898998, "loss": 4.2031, "step": 686500 }, { "epoch": 1.29, "learning_rate": 0.00017104597295531846, "loss": 4.198, "step": 687000 }, { "epoch": 1.29, "learning_rate": 0.0001709521199516469, "loss": 4.2016, "step": 687500 }, { "epoch": 1.29, "learning_rate": 0.00017085826694797539, "loss": 4.2017, "step": 688000 }, { "epoch": 1.29, "learning_rate": 0.00017076441394430386, "loss": 4.1881, "step": 688500 }, { "epoch": 1.29, "learning_rate": 0.0001706705609406323, "loss": 4.1924, "step": 689000 }, { "epoch": 1.29, "learning_rate": 0.00017057670793696082, "loss": 4.1897, "step": 689500 }, { "epoch": 1.3, "learning_rate": 0.00017048285493328927, "loss": 4.1903, "step": 690000 }, { "epoch": 1.3, "learning_rate": 0.00017038900192961772, "loss": 4.1983, "step": 690500 }, { "epoch": 1.3, "learning_rate": 0.00017029514892594622, "loss": 4.199, "step": 691000 }, { "epoch": 1.3, "learning_rate": 0.00017020129592227467, "loss": 4.216, "step": 691500 }, { "epoch": 1.3, "learning_rate": 0.00017010744291860315, "loss": 4.1864, "step": 692000 }, { "epoch": 1.3, "learning_rate": 0.00017001358991493163, "loss": 4.1839, "step": 692500 }, { "epoch": 1.3, "learning_rate": 0.0001699197369112601, "loss": 4.2082, "step": 693000 }, { "epoch": 1.3, "learning_rate": 0.00016982588390758856, "loss": 4.1988, "step": 693500 }, { "epoch": 1.3, "learning_rate": 0.00016973203090391706, "loss": 4.2104, "step": 694000 }, { "epoch": 1.3, "learning_rate": 0.0001696381779002455, "loss": 4.1891, "step": 694500 }, { "epoch": 1.3, "learning_rate": 0.00016954432489657396, "loss": 4.1901, "step": 695000 }, { "epoch": 1.31, "learning_rate": 0.00016945047189290247, "loss": 4.1977, "step": 695500 }, { "epoch": 1.31, "learning_rate": 0.00016935661888923092, "loss": 4.1946, "step": 696000 }, { "epoch": 1.31, "learning_rate": 0.0001692627658855594, "loss": 4.1983, "step": 696500 }, { "epoch": 1.31, "learning_rate": 0.00016916891288188784, "loss": 4.1952, "step": 697000 }, { "epoch": 1.31, "learning_rate": 0.00016907505987821635, "loss": 4.2005, "step": 697500 }, { "epoch": 1.31, "learning_rate": 0.0001689812068745448, "loss": 4.1933, "step": 698000 }, { "epoch": 1.31, "learning_rate": 0.00016888735387087325, "loss": 4.2106, "step": 698500 }, { "epoch": 1.31, "learning_rate": 0.00016879350086720175, "loss": 4.1811, "step": 699000 }, { "epoch": 1.31, "learning_rate": 0.0001686996478635302, "loss": 4.1948, "step": 699500 }, { "epoch": 1.31, "learning_rate": 0.00016860579485985865, "loss": 4.2029, "step": 700000 }, { "epoch": 1.31, "learning_rate": 0.00016851194185618716, "loss": 4.1943, "step": 700500 }, { "epoch": 1.32, "learning_rate": 0.0001684180888525156, "loss": 4.185, "step": 701000 }, { "epoch": 1.32, "learning_rate": 0.00016832423584884409, "loss": 4.1702, "step": 701500 }, { "epoch": 1.32, "learning_rate": 0.00016823038284517256, "loss": 4.1864, "step": 702000 }, { "epoch": 1.32, "learning_rate": 0.00016813652984150104, "loss": 4.1979, "step": 702500 }, { "epoch": 1.32, "learning_rate": 0.0001680426768378295, "loss": 4.1825, "step": 703000 }, { "epoch": 1.32, "learning_rate": 0.000167948823834158, "loss": 4.2068, "step": 703500 }, { "epoch": 1.32, "learning_rate": 0.00016785497083048645, "loss": 4.1958, "step": 704000 }, { "epoch": 1.32, "learning_rate": 0.0001677611178268149, "loss": 4.1965, "step": 704500 }, { "epoch": 1.32, "learning_rate": 0.0001676672648231434, "loss": 4.1962, "step": 705000 }, { "epoch": 1.32, "learning_rate": 0.00016757341181947185, "loss": 4.1855, "step": 705500 }, { "epoch": 1.33, "learning_rate": 0.00016747955881580033, "loss": 4.1735, "step": 706000 }, { "epoch": 1.33, "learning_rate": 0.0001673857058121288, "loss": 4.1977, "step": 706500 }, { "epoch": 1.33, "learning_rate": 0.00016729185280845728, "loss": 4.1997, "step": 707000 }, { "epoch": 1.33, "learning_rate": 0.00016719799980478573, "loss": 4.1971, "step": 707500 }, { "epoch": 1.33, "learning_rate": 0.00016710414680111424, "loss": 4.1821, "step": 708000 }, { "epoch": 1.33, "learning_rate": 0.0001670102937974427, "loss": 4.1996, "step": 708500 }, { "epoch": 1.33, "learning_rate": 0.00016691644079377114, "loss": 4.1816, "step": 709000 }, { "epoch": 1.33, "learning_rate": 0.00016682258779009962, "loss": 4.1945, "step": 709500 }, { "epoch": 1.33, "learning_rate": 0.0001667287347864281, "loss": 4.2, "step": 710000 }, { "epoch": 1.33, "learning_rate": 0.00016663488178275654, "loss": 4.1884, "step": 710500 }, { "epoch": 1.33, "learning_rate": 0.00016654102877908502, "loss": 4.2082, "step": 711000 }, { "epoch": 1.34, "learning_rate": 0.0001664471757754135, "loss": 4.1925, "step": 711500 }, { "epoch": 1.34, "learning_rate": 0.00016635332277174198, "loss": 4.1997, "step": 712000 }, { "epoch": 1.34, "learning_rate": 0.00016625946976807043, "loss": 4.178, "step": 712500 }, { "epoch": 1.34, "learning_rate": 0.00016616561676439893, "loss": 4.1777, "step": 713000 }, { "epoch": 1.34, "learning_rate": 0.00016607176376072738, "loss": 4.1983, "step": 713500 }, { "epoch": 1.34, "learning_rate": 0.00016597791075705583, "loss": 4.1841, "step": 714000 }, { "epoch": 1.34, "learning_rate": 0.00016588405775338434, "loss": 4.192, "step": 714500 }, { "epoch": 1.34, "learning_rate": 0.0001657902047497128, "loss": 4.1998, "step": 715000 }, { "epoch": 1.34, "learning_rate": 0.00016569635174604126, "loss": 4.1924, "step": 715500 }, { "epoch": 1.34, "learning_rate": 0.00016560249874236974, "loss": 4.1828, "step": 716000 }, { "epoch": 1.34, "learning_rate": 0.00016550864573869822, "loss": 4.1877, "step": 716500 }, { "epoch": 1.35, "learning_rate": 0.00016541479273502667, "loss": 4.182, "step": 717000 }, { "epoch": 1.35, "learning_rate": 0.00016532093973135517, "loss": 4.1934, "step": 717500 }, { "epoch": 1.35, "learning_rate": 0.00016522708672768362, "loss": 4.21, "step": 718000 }, { "epoch": 1.35, "learning_rate": 0.00016513323372401207, "loss": 4.1916, "step": 718500 }, { "epoch": 1.35, "learning_rate": 0.00016503938072034058, "loss": 4.1703, "step": 719000 }, { "epoch": 1.35, "learning_rate": 0.00016494552771666903, "loss": 4.1835, "step": 719500 }, { "epoch": 1.35, "learning_rate": 0.0001648516747129975, "loss": 4.1846, "step": 720000 }, { "epoch": 1.35, "learning_rate": 0.00016475782170932596, "loss": 4.1917, "step": 720500 }, { "epoch": 1.35, "learning_rate": 0.00016466396870565443, "loss": 4.1941, "step": 721000 }, { "epoch": 1.35, "learning_rate": 0.0001645701157019829, "loss": 4.1991, "step": 721500 }, { "epoch": 1.36, "learning_rate": 0.00016447626269831136, "loss": 4.1985, "step": 722000 }, { "epoch": 1.36, "learning_rate": 0.00016438240969463987, "loss": 4.193, "step": 722500 }, { "epoch": 1.36, "learning_rate": 0.00016428855669096832, "loss": 4.1767, "step": 723000 }, { "epoch": 1.36, "learning_rate": 0.00016419470368729677, "loss": 4.1779, "step": 723500 }, { "epoch": 1.36, "learning_rate": 0.00016410085068362527, "loss": 4.2041, "step": 724000 }, { "epoch": 1.36, "learning_rate": 0.00016400699767995372, "loss": 4.1821, "step": 724500 }, { "epoch": 1.36, "learning_rate": 0.0001639131446762822, "loss": 4.2033, "step": 725000 }, { "epoch": 1.36, "learning_rate": 0.00016381929167261068, "loss": 4.1786, "step": 725500 }, { "epoch": 1.36, "learning_rate": 0.00016372543866893915, "loss": 4.1741, "step": 726000 }, { "epoch": 1.36, "learning_rate": 0.0001636315856652676, "loss": 4.2011, "step": 726500 }, { "epoch": 1.36, "learning_rate": 0.0001635377326615961, "loss": 4.1889, "step": 727000 }, { "epoch": 1.37, "learning_rate": 0.00016344387965792456, "loss": 4.1788, "step": 727500 }, { "epoch": 1.37, "learning_rate": 0.000163350026654253, "loss": 4.1782, "step": 728000 }, { "epoch": 1.37, "learning_rate": 0.00016325617365058151, "loss": 4.1972, "step": 728500 }, { "epoch": 1.37, "learning_rate": 0.00016316232064690996, "loss": 4.1815, "step": 729000 }, { "epoch": 1.37, "learning_rate": 0.00016306846764323844, "loss": 4.1941, "step": 729500 }, { "epoch": 1.37, "learning_rate": 0.00016297461463956692, "loss": 4.2002, "step": 730000 }, { "epoch": 1.37, "learning_rate": 0.0001628807616358954, "loss": 4.1908, "step": 730500 }, { "epoch": 1.37, "learning_rate": 0.00016278690863222385, "loss": 4.1763, "step": 731000 }, { "epoch": 1.37, "learning_rate": 0.00016269305562855232, "loss": 4.1936, "step": 731500 }, { "epoch": 1.37, "learning_rate": 0.0001625992026248808, "loss": 4.1784, "step": 732000 }, { "epoch": 1.37, "learning_rate": 0.00016250534962120925, "loss": 4.1885, "step": 732500 }, { "epoch": 1.38, "learning_rate": 0.00016241149661753773, "loss": 4.1771, "step": 733000 }, { "epoch": 1.38, "learning_rate": 0.0001623176436138662, "loss": 4.191, "step": 733500 }, { "epoch": 1.38, "learning_rate": 0.00016222379061019466, "loss": 4.1733, "step": 734000 }, { "epoch": 1.38, "learning_rate": 0.00016212993760652313, "loss": 4.1905, "step": 734500 }, { "epoch": 1.38, "learning_rate": 0.0001620360846028516, "loss": 4.1791, "step": 735000 }, { "epoch": 1.38, "learning_rate": 0.0001619422315991801, "loss": 4.1753, "step": 735500 }, { "epoch": 1.38, "learning_rate": 0.00016184837859550854, "loss": 4.1822, "step": 736000 }, { "epoch": 1.38, "learning_rate": 0.00016175452559183704, "loss": 4.1787, "step": 736500 }, { "epoch": 1.38, "learning_rate": 0.0001616606725881655, "loss": 4.1932, "step": 737000 }, { "epoch": 1.38, "learning_rate": 0.00016156681958449394, "loss": 4.1827, "step": 737500 }, { "epoch": 1.39, "learning_rate": 0.00016147296658082245, "loss": 4.1915, "step": 738000 }, { "epoch": 1.39, "learning_rate": 0.0001613791135771509, "loss": 4.1811, "step": 738500 }, { "epoch": 1.39, "learning_rate": 0.00016128526057347938, "loss": 4.1798, "step": 739000 }, { "epoch": 1.39, "learning_rate": 0.00016119140756980785, "loss": 4.1899, "step": 739500 }, { "epoch": 1.39, "learning_rate": 0.00016109755456613633, "loss": 4.2125, "step": 740000 }, { "epoch": 1.39, "learning_rate": 0.00016100370156246478, "loss": 4.1874, "step": 740500 }, { "epoch": 1.39, "learning_rate": 0.0001609098485587933, "loss": 4.1687, "step": 741000 }, { "epoch": 1.39, "learning_rate": 0.00016081599555512174, "loss": 4.1913, "step": 741500 }, { "epoch": 1.39, "learning_rate": 0.0001607221425514502, "loss": 4.1759, "step": 742000 }, { "epoch": 1.39, "learning_rate": 0.0001606282895477787, "loss": 4.1903, "step": 742500 }, { "epoch": 1.39, "learning_rate": 0.00016053443654410714, "loss": 4.1879, "step": 743000 }, { "epoch": 1.4, "learning_rate": 0.00016044058354043562, "loss": 4.1911, "step": 743500 }, { "epoch": 1.4, "learning_rate": 0.0001603467305367641, "loss": 4.1772, "step": 744000 }, { "epoch": 1.4, "learning_rate": 0.00016025287753309255, "loss": 4.2081, "step": 744500 }, { "epoch": 1.4, "learning_rate": 0.00016015902452942102, "loss": 4.1775, "step": 745000 }, { "epoch": 1.4, "learning_rate": 0.00016006517152574948, "loss": 4.2105, "step": 745500 }, { "epoch": 1.4, "learning_rate": 0.00015997131852207798, "loss": 4.193, "step": 746000 }, { "epoch": 1.4, "learning_rate": 0.00015987746551840643, "loss": 4.1761, "step": 746500 }, { "epoch": 1.4, "learning_rate": 0.00015978361251473488, "loss": 4.1868, "step": 747000 }, { "epoch": 1.4, "learning_rate": 0.00015968975951106338, "loss": 4.171, "step": 747500 }, { "epoch": 1.4, "learning_rate": 0.00015959590650739184, "loss": 4.1779, "step": 748000 }, { "epoch": 1.4, "learning_rate": 0.0001595020535037203, "loss": 4.1982, "step": 748500 }, { "epoch": 1.41, "learning_rate": 0.0001594082005000488, "loss": 4.186, "step": 749000 }, { "epoch": 1.41, "learning_rate": 0.00015931434749637727, "loss": 4.1693, "step": 749500 }, { "epoch": 1.41, "learning_rate": 0.00015922049449270572, "loss": 4.1854, "step": 750000 }, { "epoch": 1.41, "learning_rate": 0.00015912664148903422, "loss": 4.1853, "step": 750500 }, { "epoch": 1.41, "learning_rate": 0.00015903278848536267, "loss": 4.1884, "step": 751000 }, { "epoch": 1.41, "learning_rate": 0.00015893893548169112, "loss": 4.1863, "step": 751500 }, { "epoch": 1.41, "learning_rate": 0.00015884508247801963, "loss": 4.192, "step": 752000 }, { "epoch": 1.41, "learning_rate": 0.00015875122947434808, "loss": 4.1714, "step": 752500 }, { "epoch": 1.41, "learning_rate": 0.00015865737647067656, "loss": 4.1813, "step": 753000 }, { "epoch": 1.41, "learning_rate": 0.00015856352346700503, "loss": 4.1751, "step": 753500 }, { "epoch": 1.42, "learning_rate": 0.0001584696704633335, "loss": 4.1841, "step": 754000 }, { "epoch": 1.42, "learning_rate": 0.00015837581745966196, "loss": 4.1959, "step": 754500 }, { "epoch": 1.42, "learning_rate": 0.00015828196445599044, "loss": 4.1777, "step": 755000 }, { "epoch": 1.42, "learning_rate": 0.00015818811145231892, "loss": 4.1776, "step": 755500 }, { "epoch": 1.42, "learning_rate": 0.00015809425844864737, "loss": 4.1805, "step": 756000 }, { "epoch": 1.42, "learning_rate": 0.00015800040544497584, "loss": 4.1792, "step": 756500 }, { "epoch": 1.42, "learning_rate": 0.00015790655244130432, "loss": 4.1975, "step": 757000 }, { "epoch": 1.42, "learning_rate": 0.00015781269943763277, "loss": 4.1828, "step": 757500 }, { "epoch": 1.42, "learning_rate": 0.00015771884643396125, "loss": 4.182, "step": 758000 }, { "epoch": 1.42, "learning_rate": 0.00015762499343028973, "loss": 4.1866, "step": 758500 }, { "epoch": 1.42, "learning_rate": 0.0001575311404266182, "loss": 4.1729, "step": 759000 }, { "epoch": 1.43, "learning_rate": 0.00015743728742294665, "loss": 4.1691, "step": 759500 }, { "epoch": 1.43, "learning_rate": 0.00015734343441927516, "loss": 4.1738, "step": 760000 }, { "epoch": 1.43, "learning_rate": 0.0001572495814156036, "loss": 4.186, "step": 760500 }, { "epoch": 1.43, "learning_rate": 0.00015715572841193206, "loss": 4.1774, "step": 761000 }, { "epoch": 1.43, "learning_rate": 0.00015706187540826056, "loss": 4.1862, "step": 761500 }, { "epoch": 1.43, "learning_rate": 0.000156968022404589, "loss": 4.1813, "step": 762000 }, { "epoch": 1.43, "learning_rate": 0.0001568741694009175, "loss": 4.1995, "step": 762500 }, { "epoch": 1.43, "learning_rate": 0.00015678031639724597, "loss": 4.1895, "step": 763000 }, { "epoch": 1.43, "learning_rate": 0.00015668646339357445, "loss": 4.1856, "step": 763500 }, { "epoch": 1.43, "learning_rate": 0.0001565926103899029, "loss": 4.1886, "step": 764000 }, { "epoch": 1.44, "learning_rate": 0.0001564987573862314, "loss": 4.185, "step": 764500 }, { "epoch": 1.44, "learning_rate": 0.00015640490438255985, "loss": 4.1698, "step": 765000 }, { "epoch": 1.44, "learning_rate": 0.0001563110513788883, "loss": 4.1734, "step": 765500 }, { "epoch": 1.44, "learning_rate": 0.0001562171983752168, "loss": 4.1807, "step": 766000 }, { "epoch": 1.44, "learning_rate": 0.00015612334537154526, "loss": 4.1649, "step": 766500 }, { "epoch": 1.44, "learning_rate": 0.00015602949236787373, "loss": 4.1836, "step": 767000 }, { "epoch": 1.44, "learning_rate": 0.0001559356393642022, "loss": 4.1665, "step": 767500 }, { "epoch": 1.44, "learning_rate": 0.00015584178636053066, "loss": 4.1743, "step": 768000 }, { "epoch": 1.44, "learning_rate": 0.00015574793335685914, "loss": 4.1779, "step": 768500 }, { "epoch": 1.44, "learning_rate": 0.0001556540803531876, "loss": 4.1824, "step": 769000 }, { "epoch": 1.44, "learning_rate": 0.0001555602273495161, "loss": 4.19, "step": 769500 }, { "epoch": 1.45, "learning_rate": 0.00015546637434584454, "loss": 4.1761, "step": 770000 }, { "epoch": 1.45, "learning_rate": 0.000155372521342173, "loss": 4.1614, "step": 770500 }, { "epoch": 1.45, "learning_rate": 0.0001552786683385015, "loss": 4.1856, "step": 771000 }, { "epoch": 1.45, "learning_rate": 0.00015518481533482995, "loss": 4.1783, "step": 771500 }, { "epoch": 1.45, "learning_rate": 0.00015509096233115843, "loss": 4.1799, "step": 772000 }, { "epoch": 1.45, "learning_rate": 0.0001549971093274869, "loss": 4.1716, "step": 772500 }, { "epoch": 1.45, "learning_rate": 0.00015490325632381538, "loss": 4.1699, "step": 773000 }, { "epoch": 1.45, "learning_rate": 0.00015480940332014383, "loss": 4.181, "step": 773500 }, { "epoch": 1.45, "learning_rate": 0.00015471555031647234, "loss": 4.1692, "step": 774000 }, { "epoch": 1.45, "learning_rate": 0.00015462169731280079, "loss": 4.1682, "step": 774500 }, { "epoch": 1.45, "learning_rate": 0.00015452784430912924, "loss": 4.1863, "step": 775000 }, { "epoch": 1.46, "learning_rate": 0.00015443399130545774, "loss": 4.1728, "step": 775500 }, { "epoch": 1.46, "learning_rate": 0.0001543401383017862, "loss": 4.1685, "step": 776000 }, { "epoch": 1.46, "learning_rate": 0.00015424628529811467, "loss": 4.1935, "step": 776500 }, { "epoch": 1.46, "learning_rate": 0.00015415243229444315, "loss": 4.1843, "step": 777000 }, { "epoch": 1.46, "learning_rate": 0.00015405857929077162, "loss": 4.1879, "step": 777500 }, { "epoch": 1.46, "learning_rate": 0.00015396472628710007, "loss": 4.1884, "step": 778000 }, { "epoch": 1.46, "learning_rate": 0.00015387087328342855, "loss": 4.1629, "step": 778500 }, { "epoch": 1.46, "learning_rate": 0.00015377702027975703, "loss": 4.1926, "step": 779000 }, { "epoch": 1.46, "learning_rate": 0.00015368316727608548, "loss": 4.174, "step": 779500 }, { "epoch": 1.46, "learning_rate": 0.00015358931427241398, "loss": 4.1655, "step": 780000 }, { "epoch": 1.47, "learning_rate": 0.00015349546126874243, "loss": 4.1828, "step": 780500 }, { "epoch": 1.47, "learning_rate": 0.00015340160826507088, "loss": 4.1699, "step": 781000 }, { "epoch": 1.47, "learning_rate": 0.00015330775526139936, "loss": 4.1737, "step": 781500 }, { "epoch": 1.47, "learning_rate": 0.00015321390225772784, "loss": 4.1799, "step": 782000 }, { "epoch": 1.47, "learning_rate": 0.00015312004925405632, "loss": 4.1621, "step": 782500 }, { "epoch": 1.47, "learning_rate": 0.00015302619625038477, "loss": 4.1555, "step": 783000 }, { "epoch": 1.47, "learning_rate": 0.00015293234324671327, "loss": 4.1955, "step": 783500 }, { "epoch": 1.47, "learning_rate": 0.00015283849024304172, "loss": 4.1794, "step": 784000 }, { "epoch": 1.47, "learning_rate": 0.00015274463723937017, "loss": 4.1778, "step": 784500 }, { "epoch": 1.47, "learning_rate": 0.00015265078423569868, "loss": 4.1704, "step": 785000 }, { "epoch": 1.47, "learning_rate": 0.00015255693123202713, "loss": 4.1734, "step": 785500 }, { "epoch": 1.48, "learning_rate": 0.0001524630782283556, "loss": 4.1817, "step": 786000 }, { "epoch": 1.48, "learning_rate": 0.00015236922522468408, "loss": 4.172, "step": 786500 }, { "epoch": 1.48, "learning_rate": 0.00015227537222101256, "loss": 4.168, "step": 787000 }, { "epoch": 1.48, "learning_rate": 0.000152181519217341, "loss": 4.1887, "step": 787500 }, { "epoch": 1.48, "learning_rate": 0.0001520876662136695, "loss": 4.1689, "step": 788000 }, { "epoch": 1.48, "learning_rate": 0.00015199381320999796, "loss": 4.1787, "step": 788500 }, { "epoch": 1.48, "learning_rate": 0.00015189996020632641, "loss": 4.1706, "step": 789000 }, { "epoch": 1.48, "learning_rate": 0.00015180610720265492, "loss": 4.1888, "step": 789500 }, { "epoch": 1.48, "learning_rate": 0.00015171225419898337, "loss": 4.1842, "step": 790000 }, { "epoch": 1.48, "learning_rate": 0.00015161840119531185, "loss": 4.1832, "step": 790500 }, { "epoch": 1.48, "learning_rate": 0.00015152454819164032, "loss": 4.165, "step": 791000 }, { "epoch": 1.49, "learning_rate": 0.00015143069518796877, "loss": 4.1736, "step": 791500 }, { "epoch": 1.49, "learning_rate": 0.00015133684218429725, "loss": 4.1654, "step": 792000 }, { "epoch": 1.49, "learning_rate": 0.00015124298918062573, "loss": 4.1936, "step": 792500 }, { "epoch": 1.49, "learning_rate": 0.0001511491361769542, "loss": 4.1598, "step": 793000 }, { "epoch": 1.49, "learning_rate": 0.00015105528317328266, "loss": 4.1692, "step": 793500 }, { "epoch": 1.49, "learning_rate": 0.0001509614301696111, "loss": 4.1788, "step": 794000 }, { "epoch": 1.49, "learning_rate": 0.0001508675771659396, "loss": 4.1794, "step": 794500 }, { "epoch": 1.49, "learning_rate": 0.00015077372416226806, "loss": 4.1873, "step": 795000 }, { "epoch": 1.49, "learning_rate": 0.00015067987115859654, "loss": 4.1755, "step": 795500 }, { "epoch": 1.49, "learning_rate": 0.00015058601815492502, "loss": 4.1757, "step": 796000 }, { "epoch": 1.5, "learning_rate": 0.0001504921651512535, "loss": 4.1948, "step": 796500 }, { "epoch": 1.5, "learning_rate": 0.00015039831214758194, "loss": 4.1691, "step": 797000 }, { "epoch": 1.5, "learning_rate": 0.00015030445914391045, "loss": 4.1865, "step": 797500 }, { "epoch": 1.5, "learning_rate": 0.0001502106061402389, "loss": 4.1511, "step": 798000 }, { "epoch": 1.5, "learning_rate": 0.00015011675313656735, "loss": 4.1769, "step": 798500 }, { "epoch": 1.5, "learning_rate": 0.00015002290013289585, "loss": 4.1835, "step": 799000 }, { "epoch": 1.5, "learning_rate": 0.0001499290471292243, "loss": 4.1644, "step": 799500 }, { "epoch": 1.5, "learning_rate": 0.00014983519412555278, "loss": 4.1547, "step": 800000 }, { "epoch": 1.5, "learning_rate": 0.00014974134112188126, "loss": 4.1744, "step": 800500 }, { "epoch": 1.5, "learning_rate": 0.00014964748811820974, "loss": 4.1791, "step": 801000 }, { "epoch": 1.5, "learning_rate": 0.00014955363511453821, "loss": 4.1763, "step": 801500 }, { "epoch": 1.51, "learning_rate": 0.00014945978211086666, "loss": 4.1831, "step": 802000 }, { "epoch": 1.51, "learning_rate": 0.00014936592910719514, "loss": 4.1795, "step": 802500 }, { "epoch": 1.51, "learning_rate": 0.0001492720761035236, "loss": 4.1889, "step": 803000 }, { "epoch": 1.51, "learning_rate": 0.00014917822309985207, "loss": 4.1814, "step": 803500 }, { "epoch": 1.51, "learning_rate": 0.00014908437009618055, "loss": 4.1669, "step": 804000 }, { "epoch": 1.51, "learning_rate": 0.000148990517092509, "loss": 4.1695, "step": 804500 }, { "epoch": 1.51, "learning_rate": 0.00014889666408883747, "loss": 4.1616, "step": 805000 }, { "epoch": 1.51, "learning_rate": 0.00014880281108516595, "loss": 4.1777, "step": 805500 }, { "epoch": 1.51, "learning_rate": 0.00014870895808149443, "loss": 4.1715, "step": 806000 }, { "epoch": 1.51, "learning_rate": 0.0001486151050778229, "loss": 4.181, "step": 806500 }, { "epoch": 1.51, "learning_rate": 0.00014852125207415138, "loss": 4.1551, "step": 807000 }, { "epoch": 1.52, "learning_rate": 0.00014842739907047983, "loss": 4.1772, "step": 807500 }, { "epoch": 1.52, "learning_rate": 0.0001483335460668083, "loss": 4.1738, "step": 808000 }, { "epoch": 1.52, "learning_rate": 0.00014823969306313676, "loss": 4.1522, "step": 808500 }, { "epoch": 1.52, "learning_rate": 0.00014814584005946524, "loss": 4.1626, "step": 809000 }, { "epoch": 1.52, "learning_rate": 0.00014805198705579372, "loss": 4.164, "step": 809500 }, { "epoch": 1.52, "learning_rate": 0.0001479581340521222, "loss": 4.1861, "step": 810000 }, { "epoch": 1.52, "learning_rate": 0.00014786428104845067, "loss": 4.161, "step": 810500 }, { "epoch": 1.52, "learning_rate": 0.00014777042804477915, "loss": 4.1602, "step": 811000 }, { "epoch": 1.52, "learning_rate": 0.0001476765750411076, "loss": 4.1881, "step": 811500 }, { "epoch": 1.52, "learning_rate": 0.00014758272203743608, "loss": 4.1601, "step": 812000 }, { "epoch": 1.53, "learning_rate": 0.00014748886903376455, "loss": 4.1681, "step": 812500 }, { "epoch": 1.53, "learning_rate": 0.000147395016030093, "loss": 4.1703, "step": 813000 }, { "epoch": 1.53, "learning_rate": 0.00014730116302642148, "loss": 4.1818, "step": 813500 }, { "epoch": 1.53, "learning_rate": 0.00014720731002274996, "loss": 4.154, "step": 814000 }, { "epoch": 1.53, "learning_rate": 0.0001471134570190784, "loss": 4.1879, "step": 814500 }, { "epoch": 1.53, "learning_rate": 0.0001470196040154069, "loss": 4.1641, "step": 815000 }, { "epoch": 1.53, "learning_rate": 0.00014692575101173536, "loss": 4.1616, "step": 815500 }, { "epoch": 1.53, "learning_rate": 0.00014683189800806384, "loss": 4.18, "step": 816000 }, { "epoch": 1.53, "learning_rate": 0.00014673804500439232, "loss": 4.1826, "step": 816500 }, { "epoch": 1.53, "learning_rate": 0.00014664419200072077, "loss": 4.164, "step": 817000 }, { "epoch": 1.53, "learning_rate": 0.00014655033899704925, "loss": 4.1907, "step": 817500 }, { "epoch": 1.54, "learning_rate": 0.00014645648599337772, "loss": 4.1632, "step": 818000 }, { "epoch": 1.54, "learning_rate": 0.00014636263298970618, "loss": 4.1794, "step": 818500 }, { "epoch": 1.54, "learning_rate": 0.00014626877998603465, "loss": 4.1945, "step": 819000 }, { "epoch": 1.54, "learning_rate": 0.00014617492698236313, "loss": 4.1681, "step": 819500 }, { "epoch": 1.54, "learning_rate": 0.0001460810739786916, "loss": 4.1524, "step": 820000 }, { "epoch": 1.54, "learning_rate": 0.00014598722097502008, "loss": 4.1832, "step": 820500 }, { "epoch": 1.54, "learning_rate": 0.00014589336797134854, "loss": 4.1577, "step": 821000 }, { "epoch": 1.54, "learning_rate": 0.000145799514967677, "loss": 4.1752, "step": 821500 }, { "epoch": 1.54, "learning_rate": 0.0001457056619640055, "loss": 4.1663, "step": 822000 }, { "epoch": 1.54, "learning_rate": 0.00014561180896033394, "loss": 4.1744, "step": 822500 }, { "epoch": 1.54, "learning_rate": 0.00014551795595666242, "loss": 4.1754, "step": 823000 }, { "epoch": 1.55, "learning_rate": 0.0001454241029529909, "loss": 4.1776, "step": 823500 }, { "epoch": 1.55, "learning_rate": 0.00014533024994931937, "loss": 4.195, "step": 824000 }, { "epoch": 1.55, "learning_rate": 0.00014523639694564785, "loss": 4.1709, "step": 824500 }, { "epoch": 1.55, "learning_rate": 0.0001451425439419763, "loss": 4.176, "step": 825000 }, { "epoch": 1.55, "learning_rate": 0.00014504869093830478, "loss": 4.1478, "step": 825500 }, { "epoch": 1.55, "learning_rate": 0.00014495483793463326, "loss": 4.1835, "step": 826000 }, { "epoch": 1.55, "learning_rate": 0.0001448609849309617, "loss": 4.1651, "step": 826500 }, { "epoch": 1.55, "learning_rate": 0.00014476713192729018, "loss": 4.1713, "step": 827000 }, { "epoch": 1.55, "learning_rate": 0.00014467327892361866, "loss": 4.1648, "step": 827500 }, { "epoch": 1.55, "learning_rate": 0.0001445794259199471, "loss": 4.151, "step": 828000 }, { "epoch": 1.56, "learning_rate": 0.0001444855729162756, "loss": 4.1574, "step": 828500 }, { "epoch": 1.56, "learning_rate": 0.00014439171991260407, "loss": 4.1701, "step": 829000 }, { "epoch": 1.56, "learning_rate": 0.00014429786690893254, "loss": 4.1571, "step": 829500 }, { "epoch": 1.56, "learning_rate": 0.00014420401390526102, "loss": 4.1607, "step": 830000 }, { "epoch": 1.56, "learning_rate": 0.0001441101609015895, "loss": 4.164, "step": 830500 }, { "epoch": 1.56, "learning_rate": 0.00014401630789791795, "loss": 4.1665, "step": 831000 }, { "epoch": 1.56, "learning_rate": 0.00014392245489424643, "loss": 4.1664, "step": 831500 }, { "epoch": 1.56, "learning_rate": 0.0001438286018905749, "loss": 4.167, "step": 832000 }, { "epoch": 1.56, "learning_rate": 0.00014373474888690335, "loss": 4.1545, "step": 832500 }, { "epoch": 1.56, "learning_rate": 0.00014364089588323183, "loss": 4.1721, "step": 833000 }, { "epoch": 1.56, "learning_rate": 0.0001435470428795603, "loss": 4.166, "step": 833500 }, { "epoch": 1.57, "learning_rate": 0.00014345318987588879, "loss": 4.1592, "step": 834000 }, { "epoch": 1.57, "learning_rate": 0.00014335933687221726, "loss": 4.1648, "step": 834500 }, { "epoch": 1.57, "learning_rate": 0.0001432654838685457, "loss": 4.1613, "step": 835000 }, { "epoch": 1.57, "learning_rate": 0.0001431716308648742, "loss": 4.1674, "step": 835500 }, { "epoch": 1.57, "learning_rate": 0.00014307777786120267, "loss": 4.1785, "step": 836000 }, { "epoch": 1.57, "learning_rate": 0.00014298392485753112, "loss": 4.1788, "step": 836500 }, { "epoch": 1.57, "learning_rate": 0.0001428900718538596, "loss": 4.1522, "step": 837000 }, { "epoch": 1.57, "learning_rate": 0.00014279621885018807, "loss": 4.1723, "step": 837500 }, { "epoch": 1.57, "learning_rate": 0.00014270236584651652, "loss": 4.1717, "step": 838000 }, { "epoch": 1.57, "learning_rate": 0.000142608512842845, "loss": 4.1589, "step": 838500 }, { "epoch": 1.57, "learning_rate": 0.00014251465983917348, "loss": 4.1685, "step": 839000 }, { "epoch": 1.58, "learning_rate": 0.00014242080683550196, "loss": 4.1608, "step": 839500 }, { "epoch": 1.58, "learning_rate": 0.00014232695383183043, "loss": 4.157, "step": 840000 }, { "epoch": 1.58, "learning_rate": 0.00014223310082815888, "loss": 4.1499, "step": 840500 }, { "epoch": 1.58, "learning_rate": 0.00014213924782448736, "loss": 4.1432, "step": 841000 }, { "epoch": 1.58, "learning_rate": 0.00014204539482081584, "loss": 4.1689, "step": 841500 }, { "epoch": 1.58, "learning_rate": 0.0001419515418171443, "loss": 4.1645, "step": 842000 }, { "epoch": 1.58, "learning_rate": 0.00014185768881347277, "loss": 4.1648, "step": 842500 }, { "epoch": 1.58, "learning_rate": 0.00014176383580980124, "loss": 4.1825, "step": 843000 }, { "epoch": 1.58, "learning_rate": 0.00014166998280612972, "loss": 4.1707, "step": 843500 }, { "epoch": 1.58, "learning_rate": 0.0001415761298024582, "loss": 4.1767, "step": 844000 }, { "epoch": 1.59, "learning_rate": 0.00014148227679878665, "loss": 4.1638, "step": 844500 }, { "epoch": 1.59, "learning_rate": 0.00014138842379511513, "loss": 4.1606, "step": 845000 }, { "epoch": 1.59, "learning_rate": 0.0001412945707914436, "loss": 4.1743, "step": 845500 }, { "epoch": 1.59, "learning_rate": 0.00014120071778777205, "loss": 4.1715, "step": 846000 }, { "epoch": 1.59, "learning_rate": 0.00014110686478410053, "loss": 4.16, "step": 846500 }, { "epoch": 1.59, "learning_rate": 0.000141013011780429, "loss": 4.1645, "step": 847000 }, { "epoch": 1.59, "learning_rate": 0.00014091915877675749, "loss": 4.1855, "step": 847500 }, { "epoch": 1.59, "learning_rate": 0.00014082530577308596, "loss": 4.1694, "step": 848000 }, { "epoch": 1.59, "learning_rate": 0.00014073145276941441, "loss": 4.1787, "step": 848500 }, { "epoch": 1.59, "learning_rate": 0.0001406375997657429, "loss": 4.153, "step": 849000 }, { "epoch": 1.59, "learning_rate": 0.00014054374676207137, "loss": 4.1593, "step": 849500 }, { "epoch": 1.6, "learning_rate": 0.00014044989375839985, "loss": 4.1566, "step": 850000 }, { "epoch": 1.6, "learning_rate": 0.0001403560407547283, "loss": 4.1693, "step": 850500 }, { "epoch": 1.6, "learning_rate": 0.00014026218775105677, "loss": 4.1543, "step": 851000 }, { "epoch": 1.6, "learning_rate": 0.00014016833474738522, "loss": 4.1569, "step": 851500 }, { "epoch": 1.6, "learning_rate": 0.0001400744817437137, "loss": 4.1569, "step": 852000 }, { "epoch": 1.6, "learning_rate": 0.00013998062874004218, "loss": 4.1807, "step": 852500 }, { "epoch": 1.6, "learning_rate": 0.00013988677573637066, "loss": 4.1469, "step": 853000 }, { "epoch": 1.6, "learning_rate": 0.00013979292273269913, "loss": 4.1802, "step": 853500 }, { "epoch": 1.6, "learning_rate": 0.0001396990697290276, "loss": 4.1752, "step": 854000 }, { "epoch": 1.6, "learning_rate": 0.00013960521672535606, "loss": 4.1669, "step": 854500 }, { "epoch": 1.6, "learning_rate": 0.00013951136372168454, "loss": 4.1645, "step": 855000 }, { "epoch": 1.61, "learning_rate": 0.00013941751071801302, "loss": 4.1575, "step": 855500 }, { "epoch": 1.61, "learning_rate": 0.00013932365771434147, "loss": 4.1702, "step": 856000 }, { "epoch": 1.61, "learning_rate": 0.00013922980471066994, "loss": 4.142, "step": 856500 }, { "epoch": 1.61, "learning_rate": 0.00013913595170699842, "loss": 4.149, "step": 857000 }, { "epoch": 1.61, "learning_rate": 0.0001390420987033269, "loss": 4.144, "step": 857500 }, { "epoch": 1.61, "learning_rate": 0.00013894824569965538, "loss": 4.1731, "step": 858000 }, { "epoch": 1.61, "learning_rate": 0.00013885439269598383, "loss": 4.1451, "step": 858500 }, { "epoch": 1.61, "learning_rate": 0.0001387605396923123, "loss": 4.1602, "step": 859000 }, { "epoch": 1.61, "learning_rate": 0.00013866668668864078, "loss": 4.1705, "step": 859500 }, { "epoch": 1.61, "learning_rate": 0.00013857283368496923, "loss": 4.1884, "step": 860000 }, { "epoch": 1.62, "learning_rate": 0.0001384789806812977, "loss": 4.1839, "step": 860500 }, { "epoch": 1.62, "learning_rate": 0.00013838512767762619, "loss": 4.1571, "step": 861000 }, { "epoch": 1.62, "learning_rate": 0.00013829127467395464, "loss": 4.1432, "step": 861500 }, { "epoch": 1.62, "learning_rate": 0.00013819742167028311, "loss": 4.1425, "step": 862000 }, { "epoch": 1.62, "learning_rate": 0.0001381035686666116, "loss": 4.1456, "step": 862500 }, { "epoch": 1.62, "learning_rate": 0.00013800971566294007, "loss": 4.1528, "step": 863000 }, { "epoch": 1.62, "learning_rate": 0.00013791586265926855, "loss": 4.1685, "step": 863500 }, { "epoch": 1.62, "learning_rate": 0.000137822009655597, "loss": 4.1539, "step": 864000 }, { "epoch": 1.62, "learning_rate": 0.00013772815665192547, "loss": 4.171, "step": 864500 }, { "epoch": 1.62, "learning_rate": 0.00013763430364825395, "loss": 4.1472, "step": 865000 }, { "epoch": 1.62, "learning_rate": 0.0001375404506445824, "loss": 4.1517, "step": 865500 }, { "epoch": 1.63, "learning_rate": 0.00013744659764091088, "loss": 4.1509, "step": 866000 }, { "epoch": 1.63, "learning_rate": 0.00013735274463723936, "loss": 4.1623, "step": 866500 }, { "epoch": 1.63, "learning_rate": 0.00013725889163356783, "loss": 4.1633, "step": 867000 }, { "epoch": 1.63, "learning_rate": 0.0001371650386298963, "loss": 4.1658, "step": 867500 }, { "epoch": 1.63, "learning_rate": 0.0001370711856262248, "loss": 4.1715, "step": 868000 }, { "epoch": 1.63, "learning_rate": 0.00013697733262255324, "loss": 4.1552, "step": 868500 }, { "epoch": 1.63, "learning_rate": 0.00013688347961888172, "loss": 4.1719, "step": 869000 }, { "epoch": 1.63, "learning_rate": 0.00013678962661521017, "loss": 4.1733, "step": 869500 }, { "epoch": 1.63, "learning_rate": 0.00013669577361153864, "loss": 4.1662, "step": 870000 }, { "epoch": 1.63, "learning_rate": 0.00013660192060786712, "loss": 4.1711, "step": 870500 }, { "epoch": 1.63, "learning_rate": 0.0001365080676041956, "loss": 4.1749, "step": 871000 }, { "epoch": 1.64, "learning_rate": 0.00013641421460052408, "loss": 4.1627, "step": 871500 }, { "epoch": 1.64, "learning_rate": 0.00013632036159685253, "loss": 4.1471, "step": 872000 }, { "epoch": 1.64, "learning_rate": 0.000136226508593181, "loss": 4.1572, "step": 872500 }, { "epoch": 1.64, "learning_rate": 0.00013613265558950948, "loss": 4.1741, "step": 873000 }, { "epoch": 1.64, "learning_rate": 0.00013603880258583796, "loss": 4.1635, "step": 873500 }, { "epoch": 1.64, "learning_rate": 0.0001359449495821664, "loss": 4.1609, "step": 874000 }, { "epoch": 1.64, "learning_rate": 0.0001358510965784949, "loss": 4.1593, "step": 874500 }, { "epoch": 1.64, "learning_rate": 0.00013575724357482334, "loss": 4.1554, "step": 875000 }, { "epoch": 1.64, "learning_rate": 0.00013566339057115181, "loss": 4.1531, "step": 875500 }, { "epoch": 1.64, "learning_rate": 0.0001355695375674803, "loss": 4.1634, "step": 876000 }, { "epoch": 1.65, "learning_rate": 0.00013547568456380877, "loss": 4.1664, "step": 876500 }, { "epoch": 1.65, "learning_rate": 0.00013538183156013725, "loss": 4.143, "step": 877000 }, { "epoch": 1.65, "learning_rate": 0.00013528797855646572, "loss": 4.1369, "step": 877500 }, { "epoch": 1.65, "learning_rate": 0.00013519412555279417, "loss": 4.149, "step": 878000 }, { "epoch": 1.65, "learning_rate": 0.00013510027254912265, "loss": 4.1502, "step": 878500 }, { "epoch": 1.65, "learning_rate": 0.00013500641954545113, "loss": 4.1547, "step": 879000 }, { "epoch": 1.65, "learning_rate": 0.00013491256654177958, "loss": 4.1675, "step": 879500 }, { "epoch": 1.65, "learning_rate": 0.00013481871353810806, "loss": 4.1688, "step": 880000 }, { "epoch": 1.65, "learning_rate": 0.00013472486053443653, "loss": 4.1762, "step": 880500 }, { "epoch": 1.65, "learning_rate": 0.000134631007530765, "loss": 4.1329, "step": 881000 }, { "epoch": 1.65, "learning_rate": 0.0001345371545270935, "loss": 4.1668, "step": 881500 }, { "epoch": 1.66, "learning_rate": 0.00013444330152342194, "loss": 4.1542, "step": 882000 }, { "epoch": 1.66, "learning_rate": 0.00013434944851975042, "loss": 4.1452, "step": 882500 }, { "epoch": 1.66, "learning_rate": 0.0001342555955160789, "loss": 4.1658, "step": 883000 }, { "epoch": 1.66, "learning_rate": 0.00013416174251240734, "loss": 4.1588, "step": 883500 }, { "epoch": 1.66, "learning_rate": 0.00013406788950873582, "loss": 4.1566, "step": 884000 }, { "epoch": 1.66, "learning_rate": 0.0001339740365050643, "loss": 4.1473, "step": 884500 }, { "epoch": 1.66, "learning_rate": 0.00013388018350139275, "loss": 4.1476, "step": 885000 }, { "epoch": 1.66, "learning_rate": 0.00013378633049772123, "loss": 4.1725, "step": 885500 }, { "epoch": 1.66, "learning_rate": 0.0001336924774940497, "loss": 4.1553, "step": 886000 }, { "epoch": 1.66, "learning_rate": 0.00013359862449037818, "loss": 4.161, "step": 886500 }, { "epoch": 1.66, "learning_rate": 0.00013350477148670666, "loss": 4.1336, "step": 887000 }, { "epoch": 1.67, "learning_rate": 0.0001334109184830351, "loss": 4.1471, "step": 887500 }, { "epoch": 1.67, "learning_rate": 0.0001333170654793636, "loss": 4.1533, "step": 888000 }, { "epoch": 1.67, "learning_rate": 0.00013322321247569206, "loss": 4.1537, "step": 888500 }, { "epoch": 1.67, "learning_rate": 0.00013312935947202052, "loss": 4.1666, "step": 889000 }, { "epoch": 1.67, "learning_rate": 0.000133035506468349, "loss": 4.1584, "step": 889500 }, { "epoch": 1.67, "learning_rate": 0.00013294165346467747, "loss": 4.1578, "step": 890000 }, { "epoch": 1.67, "learning_rate": 0.00013284780046100595, "loss": 4.1589, "step": 890500 }, { "epoch": 1.67, "learning_rate": 0.00013275394745733442, "loss": 4.1773, "step": 891000 }, { "epoch": 1.67, "learning_rate": 0.0001326600944536629, "loss": 4.1608, "step": 891500 }, { "epoch": 1.67, "learning_rate": 0.00013256624144999135, "loss": 4.1466, "step": 892000 }, { "epoch": 1.68, "learning_rate": 0.00013247238844631983, "loss": 4.1565, "step": 892500 }, { "epoch": 1.68, "learning_rate": 0.00013237853544264828, "loss": 4.1605, "step": 893000 }, { "epoch": 1.68, "learning_rate": 0.00013228468243897676, "loss": 4.1482, "step": 893500 }, { "epoch": 1.68, "learning_rate": 0.00013219082943530524, "loss": 4.1617, "step": 894000 }, { "epoch": 1.68, "learning_rate": 0.0001320969764316337, "loss": 4.1526, "step": 894500 }, { "epoch": 1.68, "learning_rate": 0.00013200312342796216, "loss": 4.1671, "step": 895000 }, { "epoch": 1.68, "learning_rate": 0.00013190927042429064, "loss": 4.1766, "step": 895500 }, { "epoch": 1.68, "learning_rate": 0.00013181541742061912, "loss": 4.1628, "step": 896000 }, { "epoch": 1.68, "learning_rate": 0.0001317215644169476, "loss": 4.1754, "step": 896500 }, { "epoch": 1.68, "learning_rate": 0.00013162771141327607, "loss": 4.1571, "step": 897000 }, { "epoch": 1.68, "learning_rate": 0.00013153385840960452, "loss": 4.1459, "step": 897500 }, { "epoch": 1.69, "learning_rate": 0.000131440005405933, "loss": 4.1665, "step": 898000 }, { "epoch": 1.69, "learning_rate": 0.00013134615240226148, "loss": 4.1266, "step": 898500 }, { "epoch": 1.69, "learning_rate": 0.00013125229939858993, "loss": 4.1453, "step": 899000 }, { "epoch": 1.69, "learning_rate": 0.0001311584463949184, "loss": 4.1517, "step": 899500 }, { "epoch": 1.69, "learning_rate": 0.00013106459339124688, "loss": 4.1438, "step": 900000 }, { "epoch": 1.69, "learning_rate": 0.00013097074038757536, "loss": 4.1525, "step": 900500 }, { "epoch": 1.69, "learning_rate": 0.00013087688738390384, "loss": 4.1584, "step": 901000 }, { "epoch": 1.69, "learning_rate": 0.0001307830343802323, "loss": 4.1575, "step": 901500 }, { "epoch": 1.69, "learning_rate": 0.00013068918137656077, "loss": 4.146, "step": 902000 }, { "epoch": 1.69, "learning_rate": 0.00013059532837288924, "loss": 4.1607, "step": 902500 }, { "epoch": 1.69, "learning_rate": 0.0001305014753692177, "loss": 4.1482, "step": 903000 }, { "epoch": 1.7, "learning_rate": 0.00013040762236554617, "loss": 4.1457, "step": 903500 }, { "epoch": 1.7, "learning_rate": 0.00013031376936187465, "loss": 4.1477, "step": 904000 }, { "epoch": 1.7, "learning_rate": 0.00013021991635820313, "loss": 4.1523, "step": 904500 }, { "epoch": 1.7, "learning_rate": 0.0001301260633545316, "loss": 4.1349, "step": 905000 }, { "epoch": 1.7, "learning_rate": 0.00013003221035086005, "loss": 4.147, "step": 905500 }, { "epoch": 1.7, "learning_rate": 0.00012993835734718853, "loss": 4.1542, "step": 906000 }, { "epoch": 1.7, "learning_rate": 0.000129844504343517, "loss": 4.1514, "step": 906500 }, { "epoch": 1.7, "learning_rate": 0.00012975065133984546, "loss": 4.1542, "step": 907000 }, { "epoch": 1.7, "learning_rate": 0.00012965679833617394, "loss": 4.1556, "step": 907500 }, { "epoch": 1.7, "learning_rate": 0.0001295629453325024, "loss": 4.1406, "step": 908000 }, { "epoch": 1.71, "learning_rate": 0.00012946909232883086, "loss": 4.1554, "step": 908500 }, { "epoch": 1.71, "learning_rate": 0.00012937523932515934, "loss": 4.1343, "step": 909000 }, { "epoch": 1.71, "learning_rate": 0.00012928138632148782, "loss": 4.1464, "step": 909500 }, { "epoch": 1.71, "learning_rate": 0.0001291875333178163, "loss": 4.1572, "step": 910000 }, { "epoch": 1.71, "learning_rate": 0.00012909368031414477, "loss": 4.1512, "step": 910500 }, { "epoch": 1.71, "learning_rate": 0.00012899982731047322, "loss": 4.1333, "step": 911000 }, { "epoch": 1.71, "learning_rate": 0.0001289059743068017, "loss": 4.1432, "step": 911500 }, { "epoch": 1.71, "learning_rate": 0.00012881212130313018, "loss": 4.152, "step": 912000 }, { "epoch": 1.71, "learning_rate": 0.00012871826829945863, "loss": 4.1375, "step": 912500 }, { "epoch": 1.71, "learning_rate": 0.0001286244152957871, "loss": 4.1629, "step": 913000 }, { "epoch": 1.71, "learning_rate": 0.00012853056229211558, "loss": 4.1557, "step": 913500 }, { "epoch": 1.72, "learning_rate": 0.00012843670928844406, "loss": 4.149, "step": 914000 }, { "epoch": 1.72, "learning_rate": 0.00012834285628477254, "loss": 4.1455, "step": 914500 }, { "epoch": 1.72, "learning_rate": 0.00012824900328110102, "loss": 4.1546, "step": 915000 }, { "epoch": 1.72, "learning_rate": 0.00012815515027742947, "loss": 4.1494, "step": 915500 }, { "epoch": 1.72, "learning_rate": 0.00012806129727375794, "loss": 4.1478, "step": 916000 }, { "epoch": 1.72, "learning_rate": 0.00012796744427008642, "loss": 4.1756, "step": 916500 }, { "epoch": 1.72, "learning_rate": 0.00012787359126641487, "loss": 4.1373, "step": 917000 }, { "epoch": 1.72, "learning_rate": 0.00012777973826274335, "loss": 4.1653, "step": 917500 }, { "epoch": 1.72, "learning_rate": 0.00012768588525907183, "loss": 4.1449, "step": 918000 }, { "epoch": 1.72, "learning_rate": 0.00012759203225540028, "loss": 4.1569, "step": 918500 }, { "epoch": 1.73, "learning_rate": 0.00012749817925172875, "loss": 4.1697, "step": 919000 }, { "epoch": 1.73, "learning_rate": 0.00012740432624805723, "loss": 4.1371, "step": 919500 }, { "epoch": 1.73, "learning_rate": 0.0001273104732443857, "loss": 4.1391, "step": 920000 }, { "epoch": 1.73, "learning_rate": 0.00012721662024071419, "loss": 4.1325, "step": 920500 }, { "epoch": 1.73, "learning_rate": 0.00012712276723704264, "loss": 4.133, "step": 921000 }, { "epoch": 1.73, "learning_rate": 0.00012702891423337111, "loss": 4.1564, "step": 921500 }, { "epoch": 1.73, "learning_rate": 0.0001269350612296996, "loss": 4.1415, "step": 922000 }, { "epoch": 1.73, "learning_rate": 0.00012684120822602804, "loss": 4.1426, "step": 922500 }, { "epoch": 1.73, "learning_rate": 0.00012674735522235652, "loss": 4.1483, "step": 923000 }, { "epoch": 1.73, "learning_rate": 0.000126653502218685, "loss": 4.1296, "step": 923500 }, { "epoch": 1.73, "learning_rate": 0.00012655964921501347, "loss": 4.1324, "step": 924000 }, { "epoch": 1.74, "learning_rate": 0.00012646579621134195, "loss": 4.164, "step": 924500 }, { "epoch": 1.74, "learning_rate": 0.0001263719432076704, "loss": 4.1306, "step": 925000 }, { "epoch": 1.74, "learning_rate": 0.00012627809020399888, "loss": 4.1526, "step": 925500 }, { "epoch": 1.74, "learning_rate": 0.00012618423720032736, "loss": 4.1502, "step": 926000 }, { "epoch": 1.74, "learning_rate": 0.0001260903841966558, "loss": 4.1489, "step": 926500 }, { "epoch": 1.74, "learning_rate": 0.00012599653119298428, "loss": 4.1272, "step": 927000 }, { "epoch": 1.74, "learning_rate": 0.00012590267818931276, "loss": 4.1326, "step": 927500 }, { "epoch": 1.74, "learning_rate": 0.00012580882518564124, "loss": 4.1592, "step": 928000 }, { "epoch": 1.74, "learning_rate": 0.00012571497218196972, "loss": 4.1425, "step": 928500 }, { "epoch": 1.74, "learning_rate": 0.00012562111917829817, "loss": 4.1498, "step": 929000 }, { "epoch": 1.74, "learning_rate": 0.00012552726617462664, "loss": 4.145, "step": 929500 }, { "epoch": 1.75, "learning_rate": 0.00012543341317095512, "loss": 4.1473, "step": 930000 }, { "epoch": 1.75, "learning_rate": 0.00012533956016728357, "loss": 4.1499, "step": 930500 }, { "epoch": 1.75, "learning_rate": 0.00012524570716361205, "loss": 4.1531, "step": 931000 }, { "epoch": 1.75, "learning_rate": 0.00012515185415994053, "loss": 4.1434, "step": 931500 }, { "epoch": 1.75, "learning_rate": 0.00012505800115626898, "loss": 4.1528, "step": 932000 }, { "epoch": 1.75, "learning_rate": 0.00012496414815259745, "loss": 4.1428, "step": 932500 }, { "epoch": 1.75, "learning_rate": 0.00012487029514892593, "loss": 4.1576, "step": 933000 }, { "epoch": 1.75, "learning_rate": 0.0001247764421452544, "loss": 4.1558, "step": 933500 }, { "epoch": 1.75, "learning_rate": 0.00012468258914158289, "loss": 4.1521, "step": 934000 }, { "epoch": 1.75, "learning_rate": 0.00012458873613791136, "loss": 4.1473, "step": 934500 }, { "epoch": 1.76, "learning_rate": 0.00012449488313423981, "loss": 4.1403, "step": 935000 }, { "epoch": 1.76, "learning_rate": 0.0001244010301305683, "loss": 4.1466, "step": 935500 }, { "epoch": 1.76, "learning_rate": 0.00012430717712689674, "loss": 4.1479, "step": 936000 }, { "epoch": 1.76, "learning_rate": 0.00012421332412322522, "loss": 4.154, "step": 936500 }, { "epoch": 1.76, "learning_rate": 0.0001241194711195537, "loss": 4.1392, "step": 937000 }, { "epoch": 1.76, "learning_rate": 0.00012402561811588217, "loss": 4.1386, "step": 937500 }, { "epoch": 1.76, "learning_rate": 0.00012393176511221065, "loss": 4.1508, "step": 938000 }, { "epoch": 1.76, "learning_rate": 0.00012383791210853913, "loss": 4.1375, "step": 938500 }, { "epoch": 1.76, "learning_rate": 0.00012374405910486758, "loss": 4.157, "step": 939000 }, { "epoch": 1.76, "learning_rate": 0.00012365020610119606, "loss": 4.1461, "step": 939500 }, { "epoch": 1.76, "learning_rate": 0.00012355635309752453, "loss": 4.156, "step": 940000 }, { "epoch": 1.77, "learning_rate": 0.00012346250009385298, "loss": 4.1311, "step": 940500 }, { "epoch": 1.77, "learning_rate": 0.00012336864709018146, "loss": 4.1446, "step": 941000 }, { "epoch": 1.77, "learning_rate": 0.00012327479408650994, "loss": 4.1588, "step": 941500 }, { "epoch": 1.77, "learning_rate": 0.0001231809410828384, "loss": 4.1286, "step": 942000 }, { "epoch": 1.77, "learning_rate": 0.00012308708807916687, "loss": 4.1618, "step": 942500 }, { "epoch": 1.77, "learning_rate": 0.00012299323507549534, "loss": 4.1153, "step": 943000 }, { "epoch": 1.77, "learning_rate": 0.00012289938207182382, "loss": 4.1735, "step": 943500 }, { "epoch": 1.77, "learning_rate": 0.0001228055290681523, "loss": 4.1568, "step": 944000 }, { "epoch": 1.77, "learning_rate": 0.00012271167606448075, "loss": 4.1195, "step": 944500 }, { "epoch": 1.77, "learning_rate": 0.00012261782306080923, "loss": 4.1557, "step": 945000 }, { "epoch": 1.77, "learning_rate": 0.0001225239700571377, "loss": 4.1538, "step": 945500 }, { "epoch": 1.78, "learning_rate": 0.00012243011705346615, "loss": 4.1532, "step": 946000 }, { "epoch": 1.78, "learning_rate": 0.00012233626404979463, "loss": 4.1455, "step": 946500 }, { "epoch": 1.78, "learning_rate": 0.0001222424110461231, "loss": 4.1525, "step": 947000 }, { "epoch": 1.78, "learning_rate": 0.0001221485580424516, "loss": 4.1732, "step": 947500 }, { "epoch": 1.78, "learning_rate": 0.00012205470503878005, "loss": 4.1564, "step": 948000 }, { "epoch": 1.78, "learning_rate": 0.00012196085203510851, "loss": 4.1306, "step": 948500 }, { "epoch": 1.78, "learning_rate": 0.00012186699903143699, "loss": 4.1487, "step": 949000 }, { "epoch": 1.78, "learning_rate": 0.00012177314602776547, "loss": 4.1409, "step": 949500 }, { "epoch": 1.78, "learning_rate": 0.00012167929302409393, "loss": 4.1265, "step": 950000 }, { "epoch": 1.78, "learning_rate": 0.0001215854400204224, "loss": 4.1365, "step": 950500 }, { "epoch": 1.79, "learning_rate": 0.00012149158701675087, "loss": 4.1518, "step": 951000 }, { "epoch": 1.79, "learning_rate": 0.00012139773401307934, "loss": 4.1391, "step": 951500 }, { "epoch": 1.79, "learning_rate": 0.00012130388100940782, "loss": 4.1373, "step": 952000 }, { "epoch": 1.79, "learning_rate": 0.0001212100280057363, "loss": 4.1258, "step": 952500 }, { "epoch": 1.79, "learning_rate": 0.00012111617500206476, "loss": 4.1463, "step": 953000 }, { "epoch": 1.79, "learning_rate": 0.00012102232199839323, "loss": 4.1345, "step": 953500 }, { "epoch": 1.79, "learning_rate": 0.00012092846899472168, "loss": 4.1452, "step": 954000 }, { "epoch": 1.79, "learning_rate": 0.00012083461599105016, "loss": 4.1288, "step": 954500 }, { "epoch": 1.79, "learning_rate": 0.00012074076298737864, "loss": 4.1593, "step": 955000 }, { "epoch": 1.79, "learning_rate": 0.0001206469099837071, "loss": 4.1404, "step": 955500 }, { "epoch": 1.79, "learning_rate": 0.00012055305698003558, "loss": 4.1375, "step": 956000 }, { "epoch": 1.8, "learning_rate": 0.00012045920397636406, "loss": 4.1524, "step": 956500 }, { "epoch": 1.8, "learning_rate": 0.00012036535097269251, "loss": 4.1575, "step": 957000 }, { "epoch": 1.8, "learning_rate": 0.00012027149796902099, "loss": 4.1392, "step": 957500 }, { "epoch": 1.8, "learning_rate": 0.00012017764496534946, "loss": 4.1581, "step": 958000 }, { "epoch": 1.8, "learning_rate": 0.00012008379196167793, "loss": 4.136, "step": 958500 }, { "epoch": 1.8, "learning_rate": 0.0001199899389580064, "loss": 4.137, "step": 959000 }, { "epoch": 1.8, "learning_rate": 0.00011989608595433487, "loss": 4.1558, "step": 959500 }, { "epoch": 1.8, "learning_rate": 0.00011980223295066335, "loss": 4.154, "step": 960000 }, { "epoch": 1.8, "learning_rate": 0.00011970837994699182, "loss": 4.1409, "step": 960500 }, { "epoch": 1.8, "learning_rate": 0.00011961452694332027, "loss": 4.1322, "step": 961000 }, { "epoch": 1.8, "learning_rate": 0.00011952067393964875, "loss": 4.1709, "step": 961500 }, { "epoch": 1.81, "learning_rate": 0.00011942682093597723, "loss": 4.1564, "step": 962000 }, { "epoch": 1.81, "learning_rate": 0.00011933296793230569, "loss": 4.1596, "step": 962500 }, { "epoch": 1.81, "learning_rate": 0.00011923911492863417, "loss": 4.1356, "step": 963000 }, { "epoch": 1.81, "learning_rate": 0.00011914526192496265, "loss": 4.1275, "step": 963500 }, { "epoch": 1.81, "learning_rate": 0.0001190514089212911, "loss": 4.1554, "step": 964000 }, { "epoch": 1.81, "learning_rate": 0.00011895755591761958, "loss": 4.1327, "step": 964500 }, { "epoch": 1.81, "learning_rate": 0.00011886370291394805, "loss": 4.1317, "step": 965000 }, { "epoch": 1.81, "learning_rate": 0.00011876984991027652, "loss": 4.1325, "step": 965500 }, { "epoch": 1.81, "learning_rate": 0.000118675996906605, "loss": 4.1607, "step": 966000 }, { "epoch": 1.81, "learning_rate": 0.00011858214390293346, "loss": 4.1461, "step": 966500 }, { "epoch": 1.82, "learning_rate": 0.00011848829089926194, "loss": 4.1407, "step": 967000 }, { "epoch": 1.82, "learning_rate": 0.0001183944378955904, "loss": 4.1523, "step": 967500 }, { "epoch": 1.82, "learning_rate": 0.00011830058489191886, "loss": 4.1324, "step": 968000 }, { "epoch": 1.82, "learning_rate": 0.00011820673188824734, "loss": 4.1391, "step": 968500 }, { "epoch": 1.82, "learning_rate": 0.00011811287888457582, "loss": 4.1511, "step": 969000 }, { "epoch": 1.82, "learning_rate": 0.00011801902588090428, "loss": 4.1539, "step": 969500 }, { "epoch": 1.82, "learning_rate": 0.00011792517287723276, "loss": 4.1431, "step": 970000 }, { "epoch": 1.82, "learning_rate": 0.00011783131987356124, "loss": 4.1339, "step": 970500 }, { "epoch": 1.82, "learning_rate": 0.00011773746686988969, "loss": 4.145, "step": 971000 }, { "epoch": 1.82, "learning_rate": 0.00011764361386621816, "loss": 4.1357, "step": 971500 }, { "epoch": 1.82, "learning_rate": 0.00011754976086254663, "loss": 4.1461, "step": 972000 }, { "epoch": 1.83, "learning_rate": 0.0001174559078588751, "loss": 4.1355, "step": 972500 }, { "epoch": 1.83, "learning_rate": 0.00011736205485520358, "loss": 4.1309, "step": 973000 }, { "epoch": 1.83, "learning_rate": 0.00011726820185153203, "loss": 4.1298, "step": 973500 }, { "epoch": 1.83, "learning_rate": 0.00011717434884786051, "loss": 4.1269, "step": 974000 }, { "epoch": 1.83, "learning_rate": 0.00011708049584418899, "loss": 4.1468, "step": 974500 }, { "epoch": 1.83, "learning_rate": 0.00011698664284051745, "loss": 4.1476, "step": 975000 }, { "epoch": 1.83, "learning_rate": 0.00011689278983684593, "loss": 4.1415, "step": 975500 }, { "epoch": 1.83, "learning_rate": 0.0001167989368331744, "loss": 4.1372, "step": 976000 }, { "epoch": 1.83, "learning_rate": 0.00011670508382950287, "loss": 4.1227, "step": 976500 }, { "epoch": 1.83, "learning_rate": 0.00011661123082583135, "loss": 4.1526, "step": 977000 }, { "epoch": 1.83, "learning_rate": 0.0001165173778221598, "loss": 4.1404, "step": 977500 }, { "epoch": 1.84, "learning_rate": 0.00011642352481848828, "loss": 4.1542, "step": 978000 }, { "epoch": 1.84, "learning_rate": 0.00011632967181481675, "loss": 4.1278, "step": 978500 }, { "epoch": 1.84, "learning_rate": 0.00011623581881114522, "loss": 4.121, "step": 979000 }, { "epoch": 1.84, "learning_rate": 0.0001161419658074737, "loss": 4.1483, "step": 979500 }, { "epoch": 1.84, "learning_rate": 0.00011604811280380217, "loss": 4.135, "step": 980000 }, { "epoch": 1.84, "learning_rate": 0.00011595425980013062, "loss": 4.1195, "step": 980500 }, { "epoch": 1.84, "learning_rate": 0.0001158604067964591, "loss": 4.1515, "step": 981000 }, { "epoch": 1.84, "learning_rate": 0.00011576655379278758, "loss": 4.1385, "step": 981500 }, { "epoch": 1.84, "learning_rate": 0.00011567270078911604, "loss": 4.1471, "step": 982000 }, { "epoch": 1.84, "learning_rate": 0.00011557884778544452, "loss": 4.1415, "step": 982500 }, { "epoch": 1.85, "learning_rate": 0.000115484994781773, "loss": 4.1453, "step": 983000 }, { "epoch": 1.85, "learning_rate": 0.00011539114177810146, "loss": 4.1373, "step": 983500 }, { "epoch": 1.85, "learning_rate": 0.00011529728877442992, "loss": 4.1174, "step": 984000 }, { "epoch": 1.85, "learning_rate": 0.00011520343577075839, "loss": 4.1484, "step": 984500 }, { "epoch": 1.85, "learning_rate": 0.00011510958276708686, "loss": 4.1352, "step": 985000 }, { "epoch": 1.85, "learning_rate": 0.00011501572976341534, "loss": 4.1491, "step": 985500 }, { "epoch": 1.85, "learning_rate": 0.0001149218767597438, "loss": 4.1569, "step": 986000 }, { "epoch": 1.85, "learning_rate": 0.00011482802375607228, "loss": 4.1185, "step": 986500 }, { "epoch": 1.85, "learning_rate": 0.00011473417075240076, "loss": 4.1379, "step": 987000 }, { "epoch": 1.85, "learning_rate": 0.00011464031774872921, "loss": 4.1233, "step": 987500 }, { "epoch": 1.85, "learning_rate": 0.00011454646474505769, "loss": 4.124, "step": 988000 }, { "epoch": 1.86, "learning_rate": 0.00011445261174138617, "loss": 4.1467, "step": 988500 }, { "epoch": 1.86, "learning_rate": 0.00011435875873771463, "loss": 4.159, "step": 989000 }, { "epoch": 1.86, "learning_rate": 0.00011426490573404311, "loss": 4.138, "step": 989500 }, { "epoch": 1.86, "learning_rate": 0.00011417105273037157, "loss": 4.1624, "step": 990000 }, { "epoch": 1.86, "learning_rate": 0.00011407719972670003, "loss": 4.1434, "step": 990500 }, { "epoch": 1.86, "learning_rate": 0.00011398334672302851, "loss": 4.1239, "step": 991000 }, { "epoch": 1.86, "learning_rate": 0.00011388949371935698, "loss": 4.1452, "step": 991500 }, { "epoch": 1.86, "learning_rate": 0.00011379564071568545, "loss": 4.1212, "step": 992000 }, { "epoch": 1.86, "learning_rate": 0.00011370178771201393, "loss": 4.1346, "step": 992500 }, { "epoch": 1.86, "learning_rate": 0.0001136079347083424, "loss": 4.1433, "step": 993000 }, { "epoch": 1.86, "learning_rate": 0.00011351408170467087, "loss": 4.1496, "step": 993500 }, { "epoch": 1.87, "learning_rate": 0.00011342022870099935, "loss": 4.1362, "step": 994000 }, { "epoch": 1.87, "learning_rate": 0.0001133263756973278, "loss": 4.1491, "step": 994500 }, { "epoch": 1.87, "learning_rate": 0.00011323252269365628, "loss": 4.1326, "step": 995000 }, { "epoch": 1.87, "learning_rate": 0.00011313866968998475, "loss": 4.1242, "step": 995500 }, { "epoch": 1.87, "learning_rate": 0.00011304481668631322, "loss": 4.1487, "step": 996000 }, { "epoch": 1.87, "learning_rate": 0.0001129509636826417, "loss": 4.1224, "step": 996500 }, { "epoch": 1.87, "learning_rate": 0.00011285711067897015, "loss": 4.1172, "step": 997000 }, { "epoch": 1.87, "learning_rate": 0.00011276325767529862, "loss": 4.132, "step": 997500 }, { "epoch": 1.87, "learning_rate": 0.0001126694046716271, "loss": 4.1388, "step": 998000 }, { "epoch": 1.87, "learning_rate": 0.00011257555166795557, "loss": 4.1333, "step": 998500 }, { "epoch": 1.88, "learning_rate": 0.00011248169866428404, "loss": 4.142, "step": 999000 }, { "epoch": 1.88, "learning_rate": 0.00011238784566061252, "loss": 4.1385, "step": 999500 }, { "epoch": 1.88, "learning_rate": 0.00011229399265694098, "loss": 4.1118, "step": 1000000 }, { "epoch": 1.88, "learning_rate": 0.00011220013965326946, "loss": 4.1553, "step": 1000500 }, { "epoch": 1.88, "learning_rate": 0.00011210628664959793, "loss": 4.1319, "step": 1001000 }, { "epoch": 1.88, "learning_rate": 0.00011201243364592639, "loss": 4.1237, "step": 1001500 }, { "epoch": 1.88, "learning_rate": 0.00011191858064225487, "loss": 4.1383, "step": 1002000 }, { "epoch": 1.88, "learning_rate": 0.00011182472763858333, "loss": 4.1455, "step": 1002500 }, { "epoch": 1.88, "learning_rate": 0.00011173087463491181, "loss": 4.1282, "step": 1003000 }, { "epoch": 1.88, "learning_rate": 0.00011163702163124029, "loss": 4.1575, "step": 1003500 }, { "epoch": 1.88, "learning_rate": 0.00011154316862756874, "loss": 4.1647, "step": 1004000 }, { "epoch": 1.89, "learning_rate": 0.00011144931562389721, "loss": 4.1119, "step": 1004500 }, { "epoch": 1.89, "learning_rate": 0.00011135546262022569, "loss": 4.1236, "step": 1005000 }, { "epoch": 1.89, "learning_rate": 0.00011126160961655415, "loss": 4.1447, "step": 1005500 }, { "epoch": 1.89, "learning_rate": 0.00011116775661288263, "loss": 4.1236, "step": 1006000 }, { "epoch": 1.89, "learning_rate": 0.00011107390360921111, "loss": 4.1291, "step": 1006500 }, { "epoch": 1.89, "learning_rate": 0.00011098005060553957, "loss": 4.1438, "step": 1007000 }, { "epoch": 1.89, "learning_rate": 0.00011088619760186804, "loss": 4.1411, "step": 1007500 }, { "epoch": 1.89, "learning_rate": 0.0001107923445981965, "loss": 4.1428, "step": 1008000 }, { "epoch": 1.89, "learning_rate": 0.00011069849159452498, "loss": 4.1509, "step": 1008500 }, { "epoch": 1.89, "learning_rate": 0.00011060463859085346, "loss": 4.1412, "step": 1009000 }, { "epoch": 1.89, "learning_rate": 0.00011051078558718192, "loss": 4.1482, "step": 1009500 }, { "epoch": 1.9, "learning_rate": 0.0001104169325835104, "loss": 4.1383, "step": 1010000 }, { "epoch": 1.9, "learning_rate": 0.00011032307957983887, "loss": 4.1465, "step": 1010500 }, { "epoch": 1.9, "learning_rate": 0.00011022922657616732, "loss": 4.1444, "step": 1011000 }, { "epoch": 1.9, "learning_rate": 0.0001101353735724958, "loss": 4.1404, "step": 1011500 }, { "epoch": 1.9, "learning_rate": 0.00011004152056882428, "loss": 4.1512, "step": 1012000 }, { "epoch": 1.9, "learning_rate": 0.00010994766756515274, "loss": 4.1348, "step": 1012500 }, { "epoch": 1.9, "learning_rate": 0.00010985381456148122, "loss": 4.1288, "step": 1013000 }, { "epoch": 1.9, "learning_rate": 0.0001097599615578097, "loss": 4.1211, "step": 1013500 }, { "epoch": 1.9, "learning_rate": 0.00010966610855413815, "loss": 4.1403, "step": 1014000 }, { "epoch": 1.9, "learning_rate": 0.00010957225555046663, "loss": 4.1321, "step": 1014500 }, { "epoch": 1.91, "learning_rate": 0.00010947840254679509, "loss": 4.1273, "step": 1015000 }, { "epoch": 1.91, "learning_rate": 0.00010938454954312357, "loss": 4.1445, "step": 1015500 }, { "epoch": 1.91, "learning_rate": 0.00010929069653945204, "loss": 4.15, "step": 1016000 }, { "epoch": 1.91, "learning_rate": 0.00010919684353578051, "loss": 4.128, "step": 1016500 }, { "epoch": 1.91, "learning_rate": 0.00010910299053210899, "loss": 4.1393, "step": 1017000 }, { "epoch": 1.91, "learning_rate": 0.00010900913752843746, "loss": 4.1423, "step": 1017500 }, { "epoch": 1.91, "learning_rate": 0.00010891528452476591, "loss": 4.1301, "step": 1018000 }, { "epoch": 1.91, "learning_rate": 0.00010882143152109439, "loss": 4.1201, "step": 1018500 }, { "epoch": 1.91, "learning_rate": 0.00010872757851742287, "loss": 4.1287, "step": 1019000 }, { "epoch": 1.91, "learning_rate": 0.00010863372551375133, "loss": 4.1387, "step": 1019500 }, { "epoch": 1.91, "learning_rate": 0.00010853987251007981, "loss": 4.1147, "step": 1020000 }, { "epoch": 1.92, "learning_rate": 0.00010844601950640826, "loss": 4.1294, "step": 1020500 }, { "epoch": 1.92, "learning_rate": 0.00010835216650273674, "loss": 4.13, "step": 1021000 }, { "epoch": 1.92, "learning_rate": 0.00010825831349906521, "loss": 4.1342, "step": 1021500 }, { "epoch": 1.92, "learning_rate": 0.00010816446049539368, "loss": 4.1351, "step": 1022000 }, { "epoch": 1.92, "learning_rate": 0.00010807060749172216, "loss": 4.1385, "step": 1022500 }, { "epoch": 1.92, "learning_rate": 0.00010797675448805063, "loss": 4.1422, "step": 1023000 }, { "epoch": 1.92, "learning_rate": 0.0001078829014843791, "loss": 4.1245, "step": 1023500 }, { "epoch": 1.92, "learning_rate": 0.00010778904848070757, "loss": 4.1172, "step": 1024000 }, { "epoch": 1.92, "learning_rate": 0.00010769519547703604, "loss": 4.1479, "step": 1024500 }, { "epoch": 1.92, "learning_rate": 0.0001076013424733645, "loss": 4.1247, "step": 1025000 }, { "epoch": 1.92, "learning_rate": 0.00010750748946969298, "loss": 4.1183, "step": 1025500 }, { "epoch": 1.93, "learning_rate": 0.00010741363646602144, "loss": 4.1272, "step": 1026000 }, { "epoch": 1.93, "learning_rate": 0.00010731978346234992, "loss": 4.1143, "step": 1026500 }, { "epoch": 1.93, "learning_rate": 0.0001072259304586784, "loss": 4.1418, "step": 1027000 }, { "epoch": 1.93, "learning_rate": 0.00010713207745500685, "loss": 4.1353, "step": 1027500 }, { "epoch": 1.93, "learning_rate": 0.00010703822445133533, "loss": 4.1316, "step": 1028000 }, { "epoch": 1.93, "learning_rate": 0.0001069443714476638, "loss": 4.1255, "step": 1028500 }, { "epoch": 1.93, "learning_rate": 0.00010685051844399227, "loss": 4.119, "step": 1029000 }, { "epoch": 1.93, "learning_rate": 0.00010675666544032074, "loss": 4.1291, "step": 1029500 }, { "epoch": 1.93, "learning_rate": 0.00010666281243664922, "loss": 4.1499, "step": 1030000 }, { "epoch": 1.93, "learning_rate": 0.00010656895943297769, "loss": 4.1192, "step": 1030500 }, { "epoch": 1.94, "learning_rate": 0.00010647510642930615, "loss": 4.1316, "step": 1031000 }, { "epoch": 1.94, "learning_rate": 0.00010638125342563463, "loss": 4.1289, "step": 1031500 }, { "epoch": 1.94, "learning_rate": 0.00010628740042196309, "loss": 4.1488, "step": 1032000 }, { "epoch": 1.94, "learning_rate": 0.00010619354741829157, "loss": 4.1376, "step": 1032500 }, { "epoch": 1.94, "learning_rate": 0.00010609969441462003, "loss": 4.1437, "step": 1033000 }, { "epoch": 1.94, "learning_rate": 0.00010600584141094851, "loss": 4.134, "step": 1033500 }, { "epoch": 1.94, "learning_rate": 0.00010591198840727699, "loss": 4.1261, "step": 1034000 }, { "epoch": 1.94, "learning_rate": 0.00010581813540360544, "loss": 4.1293, "step": 1034500 }, { "epoch": 1.94, "learning_rate": 0.00010572428239993392, "loss": 4.1303, "step": 1035000 }, { "epoch": 1.94, "learning_rate": 0.00010563042939626239, "loss": 4.1335, "step": 1035500 }, { "epoch": 1.94, "learning_rate": 0.00010553657639259086, "loss": 4.1341, "step": 1036000 }, { "epoch": 1.95, "learning_rate": 0.00010544272338891933, "loss": 4.1472, "step": 1036500 }, { "epoch": 1.95, "learning_rate": 0.00010534887038524781, "loss": 4.1489, "step": 1037000 }, { "epoch": 1.95, "learning_rate": 0.00010525501738157626, "loss": 4.1246, "step": 1037500 }, { "epoch": 1.95, "learning_rate": 0.00010516116437790474, "loss": 4.1247, "step": 1038000 }, { "epoch": 1.95, "learning_rate": 0.0001050673113742332, "loss": 4.133, "step": 1038500 }, { "epoch": 1.95, "learning_rate": 0.00010497345837056168, "loss": 4.1452, "step": 1039000 }, { "epoch": 1.95, "learning_rate": 0.00010487960536689016, "loss": 4.1345, "step": 1039500 }, { "epoch": 1.95, "learning_rate": 0.00010478575236321862, "loss": 4.1238, "step": 1040000 }, { "epoch": 1.95, "learning_rate": 0.0001046918993595471, "loss": 4.1177, "step": 1040500 }, { "epoch": 1.95, "learning_rate": 0.00010459804635587558, "loss": 4.1273, "step": 1041000 }, { "epoch": 1.95, "learning_rate": 0.00010450419335220403, "loss": 4.1347, "step": 1041500 }, { "epoch": 1.96, "learning_rate": 0.0001044103403485325, "loss": 4.1266, "step": 1042000 }, { "epoch": 1.96, "learning_rate": 0.00010431648734486098, "loss": 4.1178, "step": 1042500 }, { "epoch": 1.96, "learning_rate": 0.00010422263434118945, "loss": 4.1322, "step": 1043000 }, { "epoch": 1.96, "learning_rate": 0.00010412878133751792, "loss": 4.147, "step": 1043500 }, { "epoch": 1.96, "learning_rate": 0.00010403492833384637, "loss": 4.1121, "step": 1044000 }, { "epoch": 1.96, "learning_rate": 0.00010394107533017485, "loss": 4.1315, "step": 1044500 }, { "epoch": 1.96, "learning_rate": 0.00010384722232650333, "loss": 4.1236, "step": 1045000 }, { "epoch": 1.96, "learning_rate": 0.00010375336932283179, "loss": 4.1159, "step": 1045500 }, { "epoch": 1.96, "learning_rate": 0.00010365951631916027, "loss": 4.1236, "step": 1046000 }, { "epoch": 1.96, "learning_rate": 0.00010356566331548875, "loss": 4.1284, "step": 1046500 }, { "epoch": 1.97, "learning_rate": 0.00010347181031181721, "loss": 4.135, "step": 1047000 }, { "epoch": 1.97, "learning_rate": 0.00010337795730814569, "loss": 4.1498, "step": 1047500 }, { "epoch": 1.97, "learning_rate": 0.00010328410430447415, "loss": 4.1342, "step": 1048000 }, { "epoch": 1.97, "learning_rate": 0.00010319025130080262, "loss": 4.1236, "step": 1048500 }, { "epoch": 1.97, "learning_rate": 0.00010309639829713109, "loss": 4.1221, "step": 1049000 }, { "epoch": 1.97, "learning_rate": 0.00010300254529345957, "loss": 4.1553, "step": 1049500 }, { "epoch": 1.97, "learning_rate": 0.00010290869228978803, "loss": 4.1391, "step": 1050000 }, { "epoch": 1.97, "learning_rate": 0.00010281483928611651, "loss": 4.1206, "step": 1050500 }, { "epoch": 1.97, "learning_rate": 0.00010272098628244496, "loss": 4.1113, "step": 1051000 }, { "epoch": 1.97, "learning_rate": 0.00010262713327877344, "loss": 4.1332, "step": 1051500 }, { "epoch": 1.97, "learning_rate": 0.00010253328027510192, "loss": 4.1186, "step": 1052000 }, { "epoch": 1.98, "learning_rate": 0.00010243942727143038, "loss": 4.1325, "step": 1052500 }, { "epoch": 1.98, "learning_rate": 0.00010234557426775886, "loss": 4.1181, "step": 1053000 }, { "epoch": 1.98, "learning_rate": 0.00010225172126408734, "loss": 4.1301, "step": 1053500 }, { "epoch": 1.98, "learning_rate": 0.0001021578682604158, "loss": 4.1245, "step": 1054000 }, { "epoch": 1.98, "learning_rate": 0.00010206401525674426, "loss": 4.128, "step": 1054500 }, { "epoch": 1.98, "learning_rate": 0.00010197016225307274, "loss": 4.1392, "step": 1055000 }, { "epoch": 1.98, "learning_rate": 0.0001018763092494012, "loss": 4.1226, "step": 1055500 }, { "epoch": 1.98, "learning_rate": 0.00010178245624572968, "loss": 4.1193, "step": 1056000 }, { "epoch": 1.98, "learning_rate": 0.00010168860324205815, "loss": 4.126, "step": 1056500 }, { "epoch": 1.98, "learning_rate": 0.00010159475023838662, "loss": 4.1291, "step": 1057000 }, { "epoch": 1.98, "learning_rate": 0.0001015008972347151, "loss": 4.1259, "step": 1057500 }, { "epoch": 1.99, "learning_rate": 0.00010140704423104355, "loss": 4.1266, "step": 1058000 }, { "epoch": 1.99, "learning_rate": 0.00010131319122737203, "loss": 4.1414, "step": 1058500 }, { "epoch": 1.99, "learning_rate": 0.0001012193382237005, "loss": 4.1284, "step": 1059000 }, { "epoch": 1.99, "learning_rate": 0.00010112548522002897, "loss": 4.1388, "step": 1059500 }, { "epoch": 1.99, "learning_rate": 0.00010103163221635745, "loss": 4.1371, "step": 1060000 }, { "epoch": 1.99, "learning_rate": 0.00010093777921268592, "loss": 4.1161, "step": 1060500 }, { "epoch": 1.99, "learning_rate": 0.00010084392620901437, "loss": 4.128, "step": 1061000 }, { "epoch": 1.99, "learning_rate": 0.00010075007320534285, "loss": 4.1318, "step": 1061500 }, { "epoch": 1.99, "learning_rate": 0.00010065622020167133, "loss": 4.113, "step": 1062000 }, { "epoch": 1.99, "learning_rate": 0.0001005623671979998, "loss": 4.1257, "step": 1062500 }, { "epoch": 2.0, "learning_rate": 0.00010046851419432827, "loss": 4.1213, "step": 1063000 }, { "epoch": 2.0, "learning_rate": 0.00010037466119065673, "loss": 4.1073, "step": 1063500 }, { "epoch": 2.0, "learning_rate": 0.00010028080818698521, "loss": 4.1265, "step": 1064000 }, { "epoch": 2.0, "learning_rate": 0.00010018695518331369, "loss": 4.115, "step": 1064500 }, { "epoch": 2.0, "learning_rate": 0.00010009310217964214, "loss": 4.1257, "step": 1065000 }, { "epoch": 2.0, "learning_rate": 9.999924917597062e-05, "loss": 4.1176, "step": 1065500 }, { "epoch": 2.0, "learning_rate": 9.99053961722991e-05, "loss": 4.1204, "step": 1066000 }, { "epoch": 2.0, "learning_rate": 9.981154316862756e-05, "loss": 4.1265, "step": 1066500 }, { "epoch": 2.0, "learning_rate": 9.971769016495604e-05, "loss": 4.1058, "step": 1067000 }, { "epoch": 2.0, "learning_rate": 9.962383716128451e-05, "loss": 4.105, "step": 1067500 }, { "epoch": 2.0, "learning_rate": 9.952998415761296e-05, "loss": 4.1223, "step": 1068000 }, { "epoch": 2.01, "learning_rate": 9.943613115394144e-05, "loss": 4.086, "step": 1068500 }, { "epoch": 2.01, "learning_rate": 9.93422781502699e-05, "loss": 4.1134, "step": 1069000 }, { "epoch": 2.01, "learning_rate": 9.924842514659838e-05, "loss": 4.1087, "step": 1069500 }, { "epoch": 2.01, "learning_rate": 9.915457214292686e-05, "loss": 4.1157, "step": 1070000 }, { "epoch": 2.01, "learning_rate": 9.906071913925532e-05, "loss": 4.1426, "step": 1070500 }, { "epoch": 2.01, "learning_rate": 9.896686613558379e-05, "loss": 4.1268, "step": 1071000 }, { "epoch": 2.01, "learning_rate": 9.887301313191227e-05, "loss": 4.1173, "step": 1071500 }, { "epoch": 2.01, "learning_rate": 9.877916012824073e-05, "loss": 4.1073, "step": 1072000 }, { "epoch": 2.01, "learning_rate": 9.86853071245692e-05, "loss": 4.1115, "step": 1072500 }, { "epoch": 2.01, "learning_rate": 9.859145412089768e-05, "loss": 4.1194, "step": 1073000 }, { "epoch": 2.02, "learning_rate": 9.849760111722615e-05, "loss": 4.1053, "step": 1073500 }, { "epoch": 2.02, "learning_rate": 9.840374811355463e-05, "loss": 4.0859, "step": 1074000 }, { "epoch": 2.02, "learning_rate": 9.830989510988308e-05, "loss": 4.1224, "step": 1074500 }, { "epoch": 2.02, "learning_rate": 9.821604210621155e-05, "loss": 4.123, "step": 1075000 }, { "epoch": 2.02, "learning_rate": 9.812218910254003e-05, "loss": 4.1193, "step": 1075500 }, { "epoch": 2.02, "learning_rate": 9.80283360988685e-05, "loss": 4.1113, "step": 1076000 }, { "epoch": 2.02, "learning_rate": 9.793448309519697e-05, "loss": 4.1098, "step": 1076500 }, { "epoch": 2.02, "learning_rate": 9.784063009152545e-05, "loss": 4.101, "step": 1077000 }, { "epoch": 2.02, "learning_rate": 9.77467770878539e-05, "loss": 4.1108, "step": 1077500 }, { "epoch": 2.02, "learning_rate": 9.765292408418238e-05, "loss": 4.1113, "step": 1078000 }, { "epoch": 2.02, "learning_rate": 9.755907108051085e-05, "loss": 4.1069, "step": 1078500 }, { "epoch": 2.03, "learning_rate": 9.746521807683932e-05, "loss": 4.107, "step": 1079000 }, { "epoch": 2.03, "learning_rate": 9.73713650731678e-05, "loss": 4.1067, "step": 1079500 }, { "epoch": 2.03, "learning_rate": 9.727751206949627e-05, "loss": 4.1144, "step": 1080000 }, { "epoch": 2.03, "learning_rate": 9.718365906582474e-05, "loss": 4.1251, "step": 1080500 }, { "epoch": 2.03, "learning_rate": 9.708980606215321e-05, "loss": 4.1126, "step": 1081000 }, { "epoch": 2.03, "learning_rate": 9.699595305848166e-05, "loss": 4.124, "step": 1081500 }, { "epoch": 2.03, "learning_rate": 9.690210005481014e-05, "loss": 4.1106, "step": 1082000 }, { "epoch": 2.03, "learning_rate": 9.680824705113862e-05, "loss": 4.1016, "step": 1082500 }, { "epoch": 2.03, "learning_rate": 9.671439404746708e-05, "loss": 4.085, "step": 1083000 }, { "epoch": 2.03, "learning_rate": 9.662054104379556e-05, "loss": 4.0947, "step": 1083500 }, { "epoch": 2.03, "learning_rate": 9.652668804012404e-05, "loss": 4.1078, "step": 1084000 }, { "epoch": 2.04, "learning_rate": 9.643283503645249e-05, "loss": 4.1102, "step": 1084500 }, { "epoch": 2.04, "learning_rate": 9.633898203278097e-05, "loss": 4.1315, "step": 1085000 }, { "epoch": 2.04, "learning_rate": 9.624512902910944e-05, "loss": 4.1096, "step": 1085500 }, { "epoch": 2.04, "learning_rate": 9.615127602543791e-05, "loss": 4.1068, "step": 1086000 }, { "epoch": 2.04, "learning_rate": 9.605742302176638e-05, "loss": 4.1076, "step": 1086500 }, { "epoch": 2.04, "learning_rate": 9.596357001809485e-05, "loss": 4.0988, "step": 1087000 }, { "epoch": 2.04, "learning_rate": 9.586971701442333e-05, "loss": 4.1168, "step": 1087500 }, { "epoch": 2.04, "learning_rate": 9.577586401075179e-05, "loss": 4.1267, "step": 1088000 }, { "epoch": 2.04, "learning_rate": 9.568201100708025e-05, "loss": 4.105, "step": 1088500 }, { "epoch": 2.04, "learning_rate": 9.558815800340873e-05, "loss": 4.1078, "step": 1089000 }, { "epoch": 2.05, "learning_rate": 9.549430499973721e-05, "loss": 4.1318, "step": 1089500 }, { "epoch": 2.05, "learning_rate": 9.540045199606567e-05, "loss": 4.1029, "step": 1090000 }, { "epoch": 2.05, "learning_rate": 9.530659899239415e-05, "loss": 4.1197, "step": 1090500 }, { "epoch": 2.05, "learning_rate": 9.521274598872263e-05, "loss": 4.1039, "step": 1091000 }, { "epoch": 2.05, "learning_rate": 9.511889298505108e-05, "loss": 4.1121, "step": 1091500 }, { "epoch": 2.05, "learning_rate": 9.502503998137955e-05, "loss": 4.1012, "step": 1092000 }, { "epoch": 2.05, "learning_rate": 9.493118697770802e-05, "loss": 4.1103, "step": 1092500 }, { "epoch": 2.05, "learning_rate": 9.48373339740365e-05, "loss": 4.0982, "step": 1093000 }, { "epoch": 2.05, "learning_rate": 9.474348097036497e-05, "loss": 4.1139, "step": 1093500 }, { "epoch": 2.05, "learning_rate": 9.464962796669344e-05, "loss": 4.1062, "step": 1094000 }, { "epoch": 2.05, "learning_rate": 9.45557749630219e-05, "loss": 4.1152, "step": 1094500 }, { "epoch": 2.06, "learning_rate": 9.446192195935038e-05, "loss": 4.11, "step": 1095000 }, { "epoch": 2.06, "learning_rate": 9.436806895567884e-05, "loss": 4.1151, "step": 1095500 }, { "epoch": 2.06, "learning_rate": 9.427421595200732e-05, "loss": 4.1043, "step": 1096000 }, { "epoch": 2.06, "learning_rate": 9.41803629483358e-05, "loss": 4.1052, "step": 1096500 }, { "epoch": 2.06, "learning_rate": 9.408650994466426e-05, "loss": 4.1109, "step": 1097000 }, { "epoch": 2.06, "learning_rate": 9.399265694099274e-05, "loss": 4.0984, "step": 1097500 }, { "epoch": 2.06, "learning_rate": 9.389880393732122e-05, "loss": 4.1029, "step": 1098000 }, { "epoch": 2.06, "learning_rate": 9.380495093364967e-05, "loss": 4.1077, "step": 1098500 }, { "epoch": 2.06, "learning_rate": 9.371109792997814e-05, "loss": 4.1014, "step": 1099000 }, { "epoch": 2.06, "learning_rate": 9.361724492630661e-05, "loss": 4.1199, "step": 1099500 }, { "epoch": 2.06, "learning_rate": 9.352339192263508e-05, "loss": 4.1035, "step": 1100000 }, { "epoch": 2.07, "learning_rate": 9.342953891896356e-05, "loss": 4.0974, "step": 1100500 }, { "epoch": 2.07, "learning_rate": 9.333568591529201e-05, "loss": 4.1303, "step": 1101000 }, { "epoch": 2.07, "learning_rate": 9.324183291162049e-05, "loss": 4.1143, "step": 1101500 }, { "epoch": 2.07, "learning_rate": 9.314797990794897e-05, "loss": 4.1117, "step": 1102000 }, { "epoch": 2.07, "learning_rate": 9.305412690427743e-05, "loss": 4.0956, "step": 1102500 }, { "epoch": 2.07, "learning_rate": 9.296027390060591e-05, "loss": 4.1063, "step": 1103000 }, { "epoch": 2.07, "learning_rate": 9.286642089693439e-05, "loss": 4.1058, "step": 1103500 }, { "epoch": 2.07, "learning_rate": 9.277256789326285e-05, "loss": 4.1128, "step": 1104000 }, { "epoch": 2.07, "learning_rate": 9.267871488959133e-05, "loss": 4.1244, "step": 1104500 }, { "epoch": 2.07, "learning_rate": 9.258486188591978e-05, "loss": 4.12, "step": 1105000 }, { "epoch": 2.08, "learning_rate": 9.249100888224826e-05, "loss": 4.1167, "step": 1105500 }, { "epoch": 2.08, "learning_rate": 9.239715587857673e-05, "loss": 4.1086, "step": 1106000 }, { "epoch": 2.08, "learning_rate": 9.23033028749052e-05, "loss": 4.1212, "step": 1106500 }, { "epoch": 2.08, "learning_rate": 9.220944987123367e-05, "loss": 4.1176, "step": 1107000 }, { "epoch": 2.08, "learning_rate": 9.211559686756215e-05, "loss": 4.1025, "step": 1107500 }, { "epoch": 2.08, "learning_rate": 9.20217438638906e-05, "loss": 4.104, "step": 1108000 }, { "epoch": 2.08, "learning_rate": 9.192789086021908e-05, "loss": 4.1013, "step": 1108500 }, { "epoch": 2.08, "learning_rate": 9.183403785654756e-05, "loss": 4.1294, "step": 1109000 }, { "epoch": 2.08, "learning_rate": 9.174018485287602e-05, "loss": 4.1124, "step": 1109500 }, { "epoch": 2.08, "learning_rate": 9.16463318492045e-05, "loss": 4.1205, "step": 1110000 }, { "epoch": 2.08, "learning_rate": 9.155247884553296e-05, "loss": 4.1046, "step": 1110500 }, { "epoch": 2.09, "learning_rate": 9.145862584186144e-05, "loss": 4.1032, "step": 1111000 }, { "epoch": 2.09, "learning_rate": 9.13647728381899e-05, "loss": 4.0988, "step": 1111500 }, { "epoch": 2.09, "learning_rate": 9.127091983451837e-05, "loss": 4.129, "step": 1112000 }, { "epoch": 2.09, "learning_rate": 9.117706683084684e-05, "loss": 4.1121, "step": 1112500 }, { "epoch": 2.09, "learning_rate": 9.108321382717532e-05, "loss": 4.1229, "step": 1113000 }, { "epoch": 2.09, "learning_rate": 9.098936082350379e-05, "loss": 4.1127, "step": 1113500 }, { "epoch": 2.09, "learning_rate": 9.089550781983226e-05, "loss": 4.1041, "step": 1114000 }, { "epoch": 2.09, "learning_rate": 9.080165481616074e-05, "loss": 4.1169, "step": 1114500 }, { "epoch": 2.09, "learning_rate": 9.070780181248919e-05, "loss": 4.1025, "step": 1115000 }, { "epoch": 2.09, "learning_rate": 9.061394880881767e-05, "loss": 4.0978, "step": 1115500 }, { "epoch": 2.09, "learning_rate": 9.052009580514615e-05, "loss": 4.0989, "step": 1116000 }, { "epoch": 2.1, "learning_rate": 9.042624280147461e-05, "loss": 4.1119, "step": 1116500 }, { "epoch": 2.1, "learning_rate": 9.033238979780309e-05, "loss": 4.1034, "step": 1117000 }, { "epoch": 2.1, "learning_rate": 9.023853679413155e-05, "loss": 4.0963, "step": 1117500 }, { "epoch": 2.1, "learning_rate": 9.014468379046001e-05, "loss": 4.1093, "step": 1118000 }, { "epoch": 2.1, "learning_rate": 9.005083078678849e-05, "loss": 4.0885, "step": 1118500 }, { "epoch": 2.1, "learning_rate": 8.995697778311696e-05, "loss": 4.0972, "step": 1119000 }, { "epoch": 2.1, "learning_rate": 8.986312477944543e-05, "loss": 4.0909, "step": 1119500 }, { "epoch": 2.1, "learning_rate": 8.976927177577391e-05, "loss": 4.1121, "step": 1120000 }, { "epoch": 2.1, "learning_rate": 8.967541877210237e-05, "loss": 4.0916, "step": 1120500 }, { "epoch": 2.1, "learning_rate": 8.958156576843085e-05, "loss": 4.0999, "step": 1121000 }, { "epoch": 2.11, "learning_rate": 8.948771276475933e-05, "loss": 4.1013, "step": 1121500 }, { "epoch": 2.11, "learning_rate": 8.939385976108778e-05, "loss": 4.1082, "step": 1122000 }, { "epoch": 2.11, "learning_rate": 8.930000675741626e-05, "loss": 4.1003, "step": 1122500 }, { "epoch": 2.11, "learning_rate": 8.920615375374472e-05, "loss": 4.1027, "step": 1123000 }, { "epoch": 2.11, "learning_rate": 8.91123007500732e-05, "loss": 4.1203, "step": 1123500 }, { "epoch": 2.11, "learning_rate": 8.901844774640168e-05, "loss": 4.1085, "step": 1124000 }, { "epoch": 2.11, "learning_rate": 8.892459474273013e-05, "loss": 4.1124, "step": 1124500 }, { "epoch": 2.11, "learning_rate": 8.88307417390586e-05, "loss": 4.1267, "step": 1125000 }, { "epoch": 2.11, "learning_rate": 8.873688873538708e-05, "loss": 4.1079, "step": 1125500 }, { "epoch": 2.11, "learning_rate": 8.864303573171554e-05, "loss": 4.1296, "step": 1126000 }, { "epoch": 2.11, "learning_rate": 8.854918272804402e-05, "loss": 4.1093, "step": 1126500 }, { "epoch": 2.12, "learning_rate": 8.84553297243725e-05, "loss": 4.1036, "step": 1127000 }, { "epoch": 2.12, "learning_rate": 8.836147672070096e-05, "loss": 4.1202, "step": 1127500 }, { "epoch": 2.12, "learning_rate": 8.826762371702944e-05, "loss": 4.1145, "step": 1128000 }, { "epoch": 2.12, "learning_rate": 8.81737707133579e-05, "loss": 4.1051, "step": 1128500 }, { "epoch": 2.12, "learning_rate": 8.807991770968637e-05, "loss": 4.0903, "step": 1129000 }, { "epoch": 2.12, "learning_rate": 8.798606470601485e-05, "loss": 4.1086, "step": 1129500 }, { "epoch": 2.12, "learning_rate": 8.789221170234331e-05, "loss": 4.1134, "step": 1130000 }, { "epoch": 2.12, "learning_rate": 8.779835869867179e-05, "loss": 4.1093, "step": 1130500 }, { "epoch": 2.12, "learning_rate": 8.770450569500026e-05, "loss": 4.0988, "step": 1131000 }, { "epoch": 2.12, "learning_rate": 8.761065269132871e-05, "loss": 4.1104, "step": 1131500 }, { "epoch": 2.12, "learning_rate": 8.751679968765719e-05, "loss": 4.1203, "step": 1132000 }, { "epoch": 2.13, "learning_rate": 8.742294668398567e-05, "loss": 4.0959, "step": 1132500 }, { "epoch": 2.13, "learning_rate": 8.732909368031413e-05, "loss": 4.1168, "step": 1133000 }, { "epoch": 2.13, "learning_rate": 8.723524067664261e-05, "loss": 4.1101, "step": 1133500 }, { "epoch": 2.13, "learning_rate": 8.714138767297109e-05, "loss": 4.1177, "step": 1134000 }, { "epoch": 2.13, "learning_rate": 8.704753466929955e-05, "loss": 4.0992, "step": 1134500 }, { "epoch": 2.13, "learning_rate": 8.695368166562802e-05, "loss": 4.1014, "step": 1135000 }, { "epoch": 2.13, "learning_rate": 8.685982866195648e-05, "loss": 4.096, "step": 1135500 }, { "epoch": 2.13, "learning_rate": 8.676597565828496e-05, "loss": 4.0882, "step": 1136000 }, { "epoch": 2.13, "learning_rate": 8.667212265461343e-05, "loss": 4.1013, "step": 1136500 }, { "epoch": 2.13, "learning_rate": 8.65782696509419e-05, "loss": 4.1161, "step": 1137000 }, { "epoch": 2.14, "learning_rate": 8.648441664727038e-05, "loss": 4.0952, "step": 1137500 }, { "epoch": 2.14, "learning_rate": 8.639056364359885e-05, "loss": 4.1192, "step": 1138000 }, { "epoch": 2.14, "learning_rate": 8.62967106399273e-05, "loss": 4.1057, "step": 1138500 }, { "epoch": 2.14, "learning_rate": 8.620285763625578e-05, "loss": 4.0951, "step": 1139000 }, { "epoch": 2.14, "learning_rate": 8.610900463258426e-05, "loss": 4.1038, "step": 1139500 }, { "epoch": 2.14, "learning_rate": 8.601515162891272e-05, "loss": 4.0837, "step": 1140000 }, { "epoch": 2.14, "learning_rate": 8.59212986252412e-05, "loss": 4.1054, "step": 1140500 }, { "epoch": 2.14, "learning_rate": 8.582744562156966e-05, "loss": 4.1175, "step": 1141000 }, { "epoch": 2.14, "learning_rate": 8.573359261789813e-05, "loss": 4.0941, "step": 1141500 }, { "epoch": 2.14, "learning_rate": 8.56397396142266e-05, "loss": 4.0994, "step": 1142000 }, { "epoch": 2.14, "learning_rate": 8.554588661055507e-05, "loss": 4.1103, "step": 1142500 }, { "epoch": 2.15, "learning_rate": 8.545203360688355e-05, "loss": 4.1026, "step": 1143000 }, { "epoch": 2.15, "learning_rate": 8.535818060321202e-05, "loss": 4.1127, "step": 1143500 }, { "epoch": 2.15, "learning_rate": 8.526432759954049e-05, "loss": 4.1059, "step": 1144000 }, { "epoch": 2.15, "learning_rate": 8.517047459586897e-05, "loss": 4.1182, "step": 1144500 }, { "epoch": 2.15, "learning_rate": 8.507662159219744e-05, "loss": 4.1071, "step": 1145000 }, { "epoch": 2.15, "learning_rate": 8.498276858852589e-05, "loss": 4.1002, "step": 1145500 }, { "epoch": 2.15, "learning_rate": 8.488891558485437e-05, "loss": 4.1182, "step": 1146000 }, { "epoch": 2.15, "learning_rate": 8.479506258118285e-05, "loss": 4.1034, "step": 1146500 }, { "epoch": 2.15, "learning_rate": 8.470120957751131e-05, "loss": 4.1135, "step": 1147000 }, { "epoch": 2.15, "learning_rate": 8.460735657383979e-05, "loss": 4.1225, "step": 1147500 }, { "epoch": 2.15, "learning_rate": 8.451350357016824e-05, "loss": 4.1207, "step": 1148000 }, { "epoch": 2.16, "learning_rate": 8.441965056649672e-05, "loss": 4.1078, "step": 1148500 }, { "epoch": 2.16, "learning_rate": 8.43257975628252e-05, "loss": 4.0904, "step": 1149000 }, { "epoch": 2.16, "learning_rate": 8.423194455915366e-05, "loss": 4.1124, "step": 1149500 }, { "epoch": 2.16, "learning_rate": 8.413809155548214e-05, "loss": 4.1007, "step": 1150000 }, { "epoch": 2.16, "learning_rate": 8.404423855181061e-05, "loss": 4.0937, "step": 1150500 }, { "epoch": 2.16, "learning_rate": 8.395038554813908e-05, "loss": 4.1043, "step": 1151000 }, { "epoch": 2.16, "learning_rate": 8.385653254446755e-05, "loss": 4.1088, "step": 1151500 }, { "epoch": 2.16, "learning_rate": 8.376267954079602e-05, "loss": 4.1029, "step": 1152000 }, { "epoch": 2.16, "learning_rate": 8.366882653712448e-05, "loss": 4.1002, "step": 1152500 }, { "epoch": 2.16, "learning_rate": 8.357497353345296e-05, "loss": 4.111, "step": 1153000 }, { "epoch": 2.17, "learning_rate": 8.348112052978142e-05, "loss": 4.1253, "step": 1153500 }, { "epoch": 2.17, "learning_rate": 8.33872675261099e-05, "loss": 4.1056, "step": 1154000 }, { "epoch": 2.17, "learning_rate": 8.329341452243838e-05, "loss": 4.0936, "step": 1154500 }, { "epoch": 2.17, "learning_rate": 8.319956151876683e-05, "loss": 4.1043, "step": 1155000 }, { "epoch": 2.17, "learning_rate": 8.31057085150953e-05, "loss": 4.0881, "step": 1155500 }, { "epoch": 2.17, "learning_rate": 8.301185551142378e-05, "loss": 4.0945, "step": 1156000 }, { "epoch": 2.17, "learning_rate": 8.291800250775225e-05, "loss": 4.1038, "step": 1156500 }, { "epoch": 2.17, "learning_rate": 8.282414950408072e-05, "loss": 4.1109, "step": 1157000 }, { "epoch": 2.17, "learning_rate": 8.27302965004092e-05, "loss": 4.1039, "step": 1157500 }, { "epoch": 2.17, "learning_rate": 8.263644349673765e-05, "loss": 4.1109, "step": 1158000 }, { "epoch": 2.17, "learning_rate": 8.254259049306613e-05, "loss": 4.104, "step": 1158500 }, { "epoch": 2.18, "learning_rate": 8.24487374893946e-05, "loss": 4.1112, "step": 1159000 }, { "epoch": 2.18, "learning_rate": 8.235488448572307e-05, "loss": 4.1087, "step": 1159500 }, { "epoch": 2.18, "learning_rate": 8.226103148205155e-05, "loss": 4.123, "step": 1160000 }, { "epoch": 2.18, "learning_rate": 8.216717847838001e-05, "loss": 4.1012, "step": 1160500 }, { "epoch": 2.18, "learning_rate": 8.207332547470849e-05, "loss": 4.119, "step": 1161000 }, { "epoch": 2.18, "learning_rate": 8.197947247103697e-05, "loss": 4.1136, "step": 1161500 }, { "epoch": 2.18, "learning_rate": 8.188561946736542e-05, "loss": 4.1048, "step": 1162000 }, { "epoch": 2.18, "learning_rate": 8.17917664636939e-05, "loss": 4.0936, "step": 1162500 }, { "epoch": 2.18, "learning_rate": 8.169791346002237e-05, "loss": 4.1135, "step": 1163000 }, { "epoch": 2.18, "learning_rate": 8.160406045635084e-05, "loss": 4.1141, "step": 1163500 }, { "epoch": 2.18, "learning_rate": 8.151020745267931e-05, "loss": 4.1368, "step": 1164000 }, { "epoch": 2.19, "learning_rate": 8.141635444900779e-05, "loss": 4.0932, "step": 1164500 }, { "epoch": 2.19, "learning_rate": 8.132250144533624e-05, "loss": 4.1007, "step": 1165000 }, { "epoch": 2.19, "learning_rate": 8.122864844166472e-05, "loss": 4.0984, "step": 1165500 }, { "epoch": 2.19, "learning_rate": 8.113479543799318e-05, "loss": 4.1073, "step": 1166000 }, { "epoch": 2.19, "learning_rate": 8.104094243432166e-05, "loss": 4.1027, "step": 1166500 }, { "epoch": 2.19, "learning_rate": 8.094708943065014e-05, "loss": 4.0946, "step": 1167000 }, { "epoch": 2.19, "learning_rate": 8.08532364269786e-05, "loss": 4.108, "step": 1167500 }, { "epoch": 2.19, "learning_rate": 8.075938342330708e-05, "loss": 4.1134, "step": 1168000 }, { "epoch": 2.19, "learning_rate": 8.066553041963554e-05, "loss": 4.1216, "step": 1168500 }, { "epoch": 2.19, "learning_rate": 8.0571677415964e-05, "loss": 4.0972, "step": 1169000 }, { "epoch": 2.2, "learning_rate": 8.047782441229248e-05, "loss": 4.1026, "step": 1169500 }, { "epoch": 2.2, "learning_rate": 8.038397140862096e-05, "loss": 4.0985, "step": 1170000 }, { "epoch": 2.2, "learning_rate": 8.029011840494942e-05, "loss": 4.0966, "step": 1170500 }, { "epoch": 2.2, "learning_rate": 8.01962654012779e-05, "loss": 4.1163, "step": 1171000 }, { "epoch": 2.2, "learning_rate": 8.010241239760635e-05, "loss": 4.0955, "step": 1171500 }, { "epoch": 2.2, "learning_rate": 8.000855939393483e-05, "loss": 4.1089, "step": 1172000 }, { "epoch": 2.2, "learning_rate": 7.991470639026331e-05, "loss": 4.1012, "step": 1172500 }, { "epoch": 2.2, "learning_rate": 7.982085338659177e-05, "loss": 4.102, "step": 1173000 }, { "epoch": 2.2, "learning_rate": 7.972700038292025e-05, "loss": 4.1096, "step": 1173500 }, { "epoch": 2.2, "learning_rate": 7.963314737924873e-05, "loss": 4.0962, "step": 1174000 }, { "epoch": 2.2, "learning_rate": 7.953929437557719e-05, "loss": 4.107, "step": 1174500 }, { "epoch": 2.21, "learning_rate": 7.944544137190565e-05, "loss": 4.0989, "step": 1175000 }, { "epoch": 2.21, "learning_rate": 7.935158836823413e-05, "loss": 4.106, "step": 1175500 }, { "epoch": 2.21, "learning_rate": 7.92577353645626e-05, "loss": 4.1022, "step": 1176000 }, { "epoch": 2.21, "learning_rate": 7.916388236089107e-05, "loss": 4.1093, "step": 1176500 }, { "epoch": 2.21, "learning_rate": 7.907002935721955e-05, "loss": 4.0966, "step": 1177000 }, { "epoch": 2.21, "learning_rate": 7.897617635354801e-05, "loss": 4.1013, "step": 1177500 }, { "epoch": 2.21, "learning_rate": 7.888232334987649e-05, "loss": 4.107, "step": 1178000 }, { "epoch": 2.21, "learning_rate": 7.878847034620494e-05, "loss": 4.1036, "step": 1178500 }, { "epoch": 2.21, "learning_rate": 7.869461734253342e-05, "loss": 4.1103, "step": 1179000 }, { "epoch": 2.21, "learning_rate": 7.86007643388619e-05, "loss": 4.1004, "step": 1179500 }, { "epoch": 2.21, "learning_rate": 7.850691133519036e-05, "loss": 4.1094, "step": 1180000 }, { "epoch": 2.22, "learning_rate": 7.841305833151884e-05, "loss": 4.0902, "step": 1180500 }, { "epoch": 2.22, "learning_rate": 7.831920532784732e-05, "loss": 4.1044, "step": 1181000 }, { "epoch": 2.22, "learning_rate": 7.822535232417577e-05, "loss": 4.102, "step": 1181500 }, { "epoch": 2.22, "learning_rate": 7.813149932050424e-05, "loss": 4.1137, "step": 1182000 }, { "epoch": 2.22, "learning_rate": 7.803764631683272e-05, "loss": 4.1135, "step": 1182500 }, { "epoch": 2.22, "learning_rate": 7.794379331316118e-05, "loss": 4.1144, "step": 1183000 }, { "epoch": 2.22, "learning_rate": 7.784994030948966e-05, "loss": 4.1082, "step": 1183500 }, { "epoch": 2.22, "learning_rate": 7.775608730581813e-05, "loss": 4.1202, "step": 1184000 }, { "epoch": 2.22, "learning_rate": 7.76622343021466e-05, "loss": 4.1067, "step": 1184500 }, { "epoch": 2.22, "learning_rate": 7.756838129847508e-05, "loss": 4.0964, "step": 1185000 }, { "epoch": 2.23, "learning_rate": 7.747452829480353e-05, "loss": 4.1089, "step": 1185500 }, { "epoch": 2.23, "learning_rate": 7.738067529113201e-05, "loss": 4.1162, "step": 1186000 }, { "epoch": 2.23, "learning_rate": 7.728682228746049e-05, "loss": 4.112, "step": 1186500 }, { "epoch": 2.23, "learning_rate": 7.719296928378895e-05, "loss": 4.1055, "step": 1187000 }, { "epoch": 2.23, "learning_rate": 7.709911628011743e-05, "loss": 4.1009, "step": 1187500 }, { "epoch": 2.23, "learning_rate": 7.70052632764459e-05, "loss": 4.0818, "step": 1188000 }, { "epoch": 2.23, "learning_rate": 7.691141027277435e-05, "loss": 4.1135, "step": 1188500 }, { "epoch": 2.23, "learning_rate": 7.681755726910283e-05, "loss": 4.0935, "step": 1189000 }, { "epoch": 2.23, "learning_rate": 7.67237042654313e-05, "loss": 4.1257, "step": 1189500 }, { "epoch": 2.23, "learning_rate": 7.662985126175977e-05, "loss": 4.1024, "step": 1190000 }, { "epoch": 2.23, "learning_rate": 7.653599825808825e-05, "loss": 4.0994, "step": 1190500 }, { "epoch": 2.24, "learning_rate": 7.644214525441671e-05, "loss": 4.1024, "step": 1191000 }, { "epoch": 2.24, "learning_rate": 7.634829225074519e-05, "loss": 4.1263, "step": 1191500 }, { "epoch": 2.24, "learning_rate": 7.625443924707366e-05, "loss": 4.1153, "step": 1192000 }, { "epoch": 2.24, "learning_rate": 7.616058624340212e-05, "loss": 4.0918, "step": 1192500 }, { "epoch": 2.24, "learning_rate": 7.60667332397306e-05, "loss": 4.1032, "step": 1193000 }, { "epoch": 2.24, "learning_rate": 7.597288023605907e-05, "loss": 4.1051, "step": 1193500 }, { "epoch": 2.24, "learning_rate": 7.587902723238754e-05, "loss": 4.1022, "step": 1194000 }, { "epoch": 2.24, "learning_rate": 7.578517422871602e-05, "loss": 4.106, "step": 1194500 }, { "epoch": 2.24, "learning_rate": 7.569132122504449e-05, "loss": 4.1124, "step": 1195000 }, { "epoch": 2.24, "learning_rate": 7.559746822137294e-05, "loss": 4.0926, "step": 1195500 }, { "epoch": 2.24, "learning_rate": 7.550361521770142e-05, "loss": 4.1068, "step": 1196000 }, { "epoch": 2.25, "learning_rate": 7.540976221402988e-05, "loss": 4.1004, "step": 1196500 }, { "epoch": 2.25, "learning_rate": 7.531590921035836e-05, "loss": 4.0918, "step": 1197000 }, { "epoch": 2.25, "learning_rate": 7.522205620668684e-05, "loss": 4.0949, "step": 1197500 }, { "epoch": 2.25, "learning_rate": 7.51282032030153e-05, "loss": 4.0948, "step": 1198000 }, { "epoch": 2.25, "learning_rate": 7.503435019934377e-05, "loss": 4.0955, "step": 1198500 }, { "epoch": 2.25, "learning_rate": 7.494049719567224e-05, "loss": 4.0958, "step": 1199000 }, { "epoch": 2.25, "learning_rate": 7.484664419200072e-05, "loss": 4.1228, "step": 1199500 }, { "epoch": 2.25, "learning_rate": 7.475279118832919e-05, "loss": 4.094, "step": 1200000 }, { "epoch": 2.25, "learning_rate": 7.465893818465765e-05, "loss": 4.1212, "step": 1200500 }, { "epoch": 2.25, "learning_rate": 7.456508518098613e-05, "loss": 4.1101, "step": 1201000 }, { "epoch": 2.26, "learning_rate": 7.44712321773146e-05, "loss": 4.1151, "step": 1201500 }, { "epoch": 2.26, "learning_rate": 7.437737917364307e-05, "loss": 4.1059, "step": 1202000 }, { "epoch": 2.26, "learning_rate": 7.428352616997153e-05, "loss": 4.1077, "step": 1202500 }, { "epoch": 2.26, "learning_rate": 7.418967316630001e-05, "loss": 4.0735, "step": 1203000 }, { "epoch": 2.26, "learning_rate": 7.409582016262847e-05, "loss": 4.1093, "step": 1203500 }, { "epoch": 2.26, "learning_rate": 7.400196715895695e-05, "loss": 4.0987, "step": 1204000 }, { "epoch": 2.26, "learning_rate": 7.390811415528541e-05, "loss": 4.093, "step": 1204500 }, { "epoch": 2.26, "learning_rate": 7.381426115161389e-05, "loss": 4.0789, "step": 1205000 }, { "epoch": 2.26, "learning_rate": 7.372040814794236e-05, "loss": 4.0952, "step": 1205500 }, { "epoch": 2.26, "learning_rate": 7.362655514427083e-05, "loss": 4.1015, "step": 1206000 }, { "epoch": 2.26, "learning_rate": 7.35327021405993e-05, "loss": 4.1063, "step": 1206500 }, { "epoch": 2.27, "learning_rate": 7.343884913692777e-05, "loss": 4.0828, "step": 1207000 }, { "epoch": 2.27, "learning_rate": 7.334499613325624e-05, "loss": 4.0942, "step": 1207500 }, { "epoch": 2.27, "learning_rate": 7.325114312958472e-05, "loss": 4.1159, "step": 1208000 }, { "epoch": 2.27, "learning_rate": 7.31572901259132e-05, "loss": 4.1063, "step": 1208500 }, { "epoch": 2.27, "learning_rate": 7.306343712224166e-05, "loss": 4.0955, "step": 1209000 }, { "epoch": 2.27, "learning_rate": 7.296958411857012e-05, "loss": 4.1058, "step": 1209500 }, { "epoch": 2.27, "learning_rate": 7.287573111489859e-05, "loss": 4.112, "step": 1210000 }, { "epoch": 2.27, "learning_rate": 7.278187811122706e-05, "loss": 4.0972, "step": 1210500 }, { "epoch": 2.27, "learning_rate": 7.268802510755554e-05, "loss": 4.1061, "step": 1211000 }, { "epoch": 2.27, "learning_rate": 7.2594172103884e-05, "loss": 4.094, "step": 1211500 }, { "epoch": 2.27, "learning_rate": 7.250031910021247e-05, "loss": 4.1078, "step": 1212000 }, { "epoch": 2.28, "learning_rate": 7.240646609654095e-05, "loss": 4.1167, "step": 1212500 }, { "epoch": 2.28, "learning_rate": 7.231261309286942e-05, "loss": 4.0967, "step": 1213000 }, { "epoch": 2.28, "learning_rate": 7.221876008919789e-05, "loss": 4.1071, "step": 1213500 }, { "epoch": 2.28, "learning_rate": 7.212490708552636e-05, "loss": 4.1017, "step": 1214000 }, { "epoch": 2.28, "learning_rate": 7.203105408185483e-05, "loss": 4.1092, "step": 1214500 }, { "epoch": 2.28, "learning_rate": 7.19372010781833e-05, "loss": 4.0823, "step": 1215000 }, { "epoch": 2.28, "learning_rate": 7.184334807451177e-05, "loss": 4.1099, "step": 1215500 }, { "epoch": 2.28, "learning_rate": 7.174949507084025e-05, "loss": 4.093, "step": 1216000 }, { "epoch": 2.28, "learning_rate": 7.165564206716871e-05, "loss": 4.089, "step": 1216500 }, { "epoch": 2.28, "learning_rate": 7.156178906349717e-05, "loss": 4.08, "step": 1217000 }, { "epoch": 2.29, "learning_rate": 7.146793605982565e-05, "loss": 4.1275, "step": 1217500 }, { "epoch": 2.29, "learning_rate": 7.137408305615413e-05, "loss": 4.1133, "step": 1218000 }, { "epoch": 2.29, "learning_rate": 7.128023005248259e-05, "loss": 4.0937, "step": 1218500 }, { "epoch": 2.29, "learning_rate": 7.118637704881106e-05, "loss": 4.111, "step": 1219000 }, { "epoch": 2.29, "learning_rate": 7.109252404513953e-05, "loss": 4.1103, "step": 1219500 }, { "epoch": 2.29, "learning_rate": 7.099867104146801e-05, "loss": 4.1054, "step": 1220000 }, { "epoch": 2.29, "learning_rate": 7.090481803779648e-05, "loss": 4.0776, "step": 1220500 }, { "epoch": 2.29, "learning_rate": 7.081096503412494e-05, "loss": 4.0992, "step": 1221000 }, { "epoch": 2.29, "learning_rate": 7.071711203045342e-05, "loss": 4.0908, "step": 1221500 }, { "epoch": 2.29, "learning_rate": 7.062325902678188e-05, "loss": 4.0832, "step": 1222000 }, { "epoch": 2.29, "learning_rate": 7.052940602311036e-05, "loss": 4.1159, "step": 1222500 }, { "epoch": 2.3, "learning_rate": 7.043555301943884e-05, "loss": 4.0935, "step": 1223000 }, { "epoch": 2.3, "learning_rate": 7.03417000157673e-05, "loss": 4.0953, "step": 1223500 }, { "epoch": 2.3, "learning_rate": 7.024784701209576e-05, "loss": 4.1198, "step": 1224000 }, { "epoch": 2.3, "learning_rate": 7.015399400842424e-05, "loss": 4.078, "step": 1224500 }, { "epoch": 2.3, "learning_rate": 7.006014100475272e-05, "loss": 4.0921, "step": 1225000 }, { "epoch": 2.3, "learning_rate": 6.996628800108118e-05, "loss": 4.0981, "step": 1225500 }, { "epoch": 2.3, "learning_rate": 6.987243499740965e-05, "loss": 4.0983, "step": 1226000 }, { "epoch": 2.3, "learning_rate": 6.977858199373812e-05, "loss": 4.0922, "step": 1226500 }, { "epoch": 2.3, "learning_rate": 6.968472899006659e-05, "loss": 4.1017, "step": 1227000 }, { "epoch": 2.3, "learning_rate": 6.959087598639506e-05, "loss": 4.1147, "step": 1227500 }, { "epoch": 2.31, "learning_rate": 6.949702298272353e-05, "loss": 4.0916, "step": 1228000 }, { "epoch": 2.31, "learning_rate": 6.9403169979052e-05, "loss": 4.0987, "step": 1228500 }, { "epoch": 2.31, "learning_rate": 6.930931697538047e-05, "loss": 4.1038, "step": 1229000 }, { "epoch": 2.31, "learning_rate": 6.921546397170895e-05, "loss": 4.1036, "step": 1229500 }, { "epoch": 2.31, "learning_rate": 6.912161096803742e-05, "loss": 4.1167, "step": 1230000 }, { "epoch": 2.31, "learning_rate": 6.902775796436589e-05, "loss": 4.0901, "step": 1230500 }, { "epoch": 2.31, "learning_rate": 6.893390496069435e-05, "loss": 4.0888, "step": 1231000 }, { "epoch": 2.31, "learning_rate": 6.884005195702283e-05, "loss": 4.078, "step": 1231500 }, { "epoch": 2.31, "learning_rate": 6.874619895335131e-05, "loss": 4.0876, "step": 1232000 }, { "epoch": 2.31, "learning_rate": 6.865234594967977e-05, "loss": 4.0953, "step": 1232500 }, { "epoch": 2.31, "learning_rate": 6.855849294600823e-05, "loss": 4.1031, "step": 1233000 }, { "epoch": 2.32, "learning_rate": 6.84646399423367e-05, "loss": 4.0961, "step": 1233500 }, { "epoch": 2.32, "learning_rate": 6.837078693866518e-05, "loss": 4.0956, "step": 1234000 }, { "epoch": 2.32, "learning_rate": 6.827693393499365e-05, "loss": 4.1023, "step": 1234500 }, { "epoch": 2.32, "learning_rate": 6.818308093132212e-05, "loss": 4.0994, "step": 1235000 }, { "epoch": 2.32, "learning_rate": 6.80892279276506e-05, "loss": 4.1028, "step": 1235500 }, { "epoch": 2.32, "learning_rate": 6.799537492397906e-05, "loss": 4.0757, "step": 1236000 }, { "epoch": 2.32, "learning_rate": 6.790152192030754e-05, "loss": 4.1009, "step": 1236500 }, { "epoch": 2.32, "learning_rate": 6.7807668916636e-05, "loss": 4.0908, "step": 1237000 }, { "epoch": 2.32, "learning_rate": 6.771381591296448e-05, "loss": 4.1133, "step": 1237500 }, { "epoch": 2.32, "learning_rate": 6.761996290929294e-05, "loss": 4.1037, "step": 1238000 }, { "epoch": 2.32, "learning_rate": 6.752610990562142e-05, "loss": 4.0813, "step": 1238500 }, { "epoch": 2.33, "learning_rate": 6.743225690194988e-05, "loss": 4.1041, "step": 1239000 }, { "epoch": 2.33, "learning_rate": 6.733840389827836e-05, "loss": 4.1025, "step": 1239500 }, { "epoch": 2.33, "learning_rate": 6.724455089460682e-05, "loss": 4.0822, "step": 1240000 }, { "epoch": 2.33, "learning_rate": 6.715069789093529e-05, "loss": 4.1142, "step": 1240500 }, { "epoch": 2.33, "learning_rate": 6.705684488726376e-05, "loss": 4.0983, "step": 1241000 }, { "epoch": 2.33, "learning_rate": 6.696299188359224e-05, "loss": 4.0867, "step": 1241500 }, { "epoch": 2.33, "learning_rate": 6.68691388799207e-05, "loss": 4.0851, "step": 1242000 }, { "epoch": 2.33, "learning_rate": 6.677528587624917e-05, "loss": 4.1189, "step": 1242500 }, { "epoch": 2.33, "learning_rate": 6.668143287257765e-05, "loss": 4.0714, "step": 1243000 }, { "epoch": 2.33, "learning_rate": 6.658757986890612e-05, "loss": 4.0986, "step": 1243500 }, { "epoch": 2.34, "learning_rate": 6.649372686523459e-05, "loss": 4.1201, "step": 1244000 }, { "epoch": 2.34, "learning_rate": 6.639987386156307e-05, "loss": 4.1051, "step": 1244500 }, { "epoch": 2.34, "learning_rate": 6.630602085789153e-05, "loss": 4.0986, "step": 1245000 }, { "epoch": 2.34, "learning_rate": 6.621216785422e-05, "loss": 4.1061, "step": 1245500 }, { "epoch": 2.34, "learning_rate": 6.611831485054847e-05, "loss": 4.1017, "step": 1246000 }, { "epoch": 2.34, "learning_rate": 6.602446184687695e-05, "loss": 4.0975, "step": 1246500 }, { "epoch": 2.34, "learning_rate": 6.593060884320541e-05, "loss": 4.0997, "step": 1247000 }, { "epoch": 2.34, "learning_rate": 6.583675583953388e-05, "loss": 4.1023, "step": 1247500 }, { "epoch": 2.34, "learning_rate": 6.574290283586235e-05, "loss": 4.0847, "step": 1248000 }, { "epoch": 2.34, "learning_rate": 6.564904983219083e-05, "loss": 4.0966, "step": 1248500 }, { "epoch": 2.34, "learning_rate": 6.55551968285193e-05, "loss": 4.0887, "step": 1249000 }, { "epoch": 2.35, "learning_rate": 6.546134382484776e-05, "loss": 4.0982, "step": 1249500 }, { "epoch": 2.35, "learning_rate": 6.536749082117624e-05, "loss": 4.0799, "step": 1250000 }, { "epoch": 2.35, "learning_rate": 6.52736378175047e-05, "loss": 4.092, "step": 1250500 }, { "epoch": 2.35, "learning_rate": 6.517978481383318e-05, "loss": 4.0915, "step": 1251000 }, { "epoch": 2.35, "learning_rate": 6.508593181016164e-05, "loss": 4.0926, "step": 1251500 }, { "epoch": 2.35, "learning_rate": 6.499207880649012e-05, "loss": 4.0992, "step": 1252000 }, { "epoch": 2.35, "learning_rate": 6.489822580281858e-05, "loss": 4.1042, "step": 1252500 }, { "epoch": 2.35, "learning_rate": 6.480437279914706e-05, "loss": 4.0993, "step": 1253000 }, { "epoch": 2.35, "learning_rate": 6.471051979547554e-05, "loss": 4.0977, "step": 1253500 }, { "epoch": 2.35, "learning_rate": 6.4616666791804e-05, "loss": 4.1063, "step": 1254000 }, { "epoch": 2.35, "learning_rate": 6.452281378813247e-05, "loss": 4.1063, "step": 1254500 }, { "epoch": 2.36, "learning_rate": 6.442896078446094e-05, "loss": 4.0871, "step": 1255000 }, { "epoch": 2.36, "learning_rate": 6.43351077807894e-05, "loss": 4.0933, "step": 1255500 }, { "epoch": 2.36, "learning_rate": 6.424125477711788e-05, "loss": 4.0835, "step": 1256000 }, { "epoch": 2.36, "learning_rate": 6.414740177344635e-05, "loss": 4.1049, "step": 1256500 }, { "epoch": 2.36, "learning_rate": 6.405354876977483e-05, "loss": 4.0941, "step": 1257000 }, { "epoch": 2.36, "learning_rate": 6.395969576610329e-05, "loss": 4.1, "step": 1257500 }, { "epoch": 2.36, "learning_rate": 6.386584276243177e-05, "loss": 4.102, "step": 1258000 }, { "epoch": 2.36, "learning_rate": 6.377198975876023e-05, "loss": 4.0943, "step": 1258500 }, { "epoch": 2.36, "learning_rate": 6.367813675508871e-05, "loss": 4.1028, "step": 1259000 }, { "epoch": 2.36, "learning_rate": 6.358428375141717e-05, "loss": 4.0894, "step": 1259500 }, { "epoch": 2.37, "learning_rate": 6.349043074774565e-05, "loss": 4.0958, "step": 1260000 }, { "epoch": 2.37, "learning_rate": 6.339657774407411e-05, "loss": 4.1097, "step": 1260500 }, { "epoch": 2.37, "learning_rate": 6.330272474040259e-05, "loss": 4.1162, "step": 1261000 }, { "epoch": 2.37, "learning_rate": 6.320887173673105e-05, "loss": 4.0978, "step": 1261500 }, { "epoch": 2.37, "learning_rate": 6.311501873305952e-05, "loss": 4.0811, "step": 1262000 }, { "epoch": 2.37, "learning_rate": 6.3021165729388e-05, "loss": 4.0884, "step": 1262500 }, { "epoch": 2.37, "learning_rate": 6.292731272571647e-05, "loss": 4.0902, "step": 1263000 }, { "epoch": 2.37, "learning_rate": 6.283345972204494e-05, "loss": 4.0878, "step": 1263500 }, { "epoch": 2.37, "learning_rate": 6.27396067183734e-05, "loss": 4.0935, "step": 1264000 }, { "epoch": 2.37, "learning_rate": 6.264575371470188e-05, "loss": 4.1081, "step": 1264500 }, { "epoch": 2.37, "learning_rate": 6.255190071103036e-05, "loss": 4.112, "step": 1265000 }, { "epoch": 2.38, "learning_rate": 6.245804770735882e-05, "loss": 4.0864, "step": 1265500 }, { "epoch": 2.38, "learning_rate": 6.23641947036873e-05, "loss": 4.1276, "step": 1266000 }, { "epoch": 2.38, "learning_rate": 6.227034170001576e-05, "loss": 4.0768, "step": 1266500 }, { "epoch": 2.38, "learning_rate": 6.217648869634424e-05, "loss": 4.095, "step": 1267000 }, { "epoch": 2.38, "learning_rate": 6.20826356926727e-05, "loss": 4.1089, "step": 1267500 }, { "epoch": 2.38, "learning_rate": 6.198878268900118e-05, "loss": 4.092, "step": 1268000 }, { "epoch": 2.38, "learning_rate": 6.189492968532964e-05, "loss": 4.07, "step": 1268500 }, { "epoch": 2.38, "learning_rate": 6.180107668165811e-05, "loss": 4.1085, "step": 1269000 }, { "epoch": 2.38, "learning_rate": 6.170722367798658e-05, "loss": 4.0897, "step": 1269500 }, { "epoch": 2.38, "learning_rate": 6.161337067431506e-05, "loss": 4.1174, "step": 1270000 }, { "epoch": 2.38, "learning_rate": 6.151951767064353e-05, "loss": 4.0826, "step": 1270500 }, { "epoch": 2.39, "learning_rate": 6.142566466697199e-05, "loss": 4.0797, "step": 1271000 }, { "epoch": 2.39, "learning_rate": 6.133181166330047e-05, "loss": 4.0879, "step": 1271500 }, { "epoch": 2.39, "learning_rate": 6.123795865962894e-05, "loss": 4.1003, "step": 1272000 }, { "epoch": 2.39, "learning_rate": 6.114410565595741e-05, "loss": 4.0919, "step": 1272500 }, { "epoch": 2.39, "learning_rate": 6.105025265228587e-05, "loss": 4.1016, "step": 1273000 }, { "epoch": 2.39, "learning_rate": 6.095639964861435e-05, "loss": 4.0906, "step": 1273500 }, { "epoch": 2.39, "learning_rate": 6.086254664494282e-05, "loss": 4.1043, "step": 1274000 }, { "epoch": 2.39, "learning_rate": 6.076869364127129e-05, "loss": 4.0874, "step": 1274500 }, { "epoch": 2.39, "learning_rate": 6.067484063759976e-05, "loss": 4.0982, "step": 1275000 }, { "epoch": 2.39, "learning_rate": 6.058098763392823e-05, "loss": 4.0894, "step": 1275500 }, { "epoch": 2.4, "learning_rate": 6.04871346302567e-05, "loss": 4.0958, "step": 1276000 }, { "epoch": 2.4, "learning_rate": 6.039328162658517e-05, "loss": 4.0812, "step": 1276500 }, { "epoch": 2.4, "learning_rate": 6.0299428622913644e-05, "loss": 4.0904, "step": 1277000 }, { "epoch": 2.4, "learning_rate": 6.0205575619242115e-05, "loss": 4.0993, "step": 1277500 }, { "epoch": 2.4, "learning_rate": 6.011172261557058e-05, "loss": 4.0997, "step": 1278000 }, { "epoch": 2.4, "learning_rate": 6.001786961189905e-05, "loss": 4.1008, "step": 1278500 }, { "epoch": 2.4, "learning_rate": 5.992401660822753e-05, "loss": 4.1016, "step": 1279000 }, { "epoch": 2.4, "learning_rate": 5.9830163604556e-05, "loss": 4.0974, "step": 1279500 }, { "epoch": 2.4, "learning_rate": 5.973631060088446e-05, "loss": 4.0807, "step": 1280000 }, { "epoch": 2.4, "learning_rate": 5.964245759721294e-05, "loss": 4.0798, "step": 1280500 }, { "epoch": 2.4, "learning_rate": 5.954860459354141e-05, "loss": 4.0915, "step": 1281000 }, { "epoch": 2.41, "learning_rate": 5.945475158986987e-05, "loss": 4.077, "step": 1281500 }, { "epoch": 2.41, "learning_rate": 5.9360898586198344e-05, "loss": 4.1096, "step": 1282000 }, { "epoch": 2.41, "learning_rate": 5.926704558252682e-05, "loss": 4.0865, "step": 1282500 }, { "epoch": 2.41, "learning_rate": 5.917319257885529e-05, "loss": 4.083, "step": 1283000 }, { "epoch": 2.41, "learning_rate": 5.9079339575183756e-05, "loss": 4.0864, "step": 1283500 }, { "epoch": 2.41, "learning_rate": 5.898548657151223e-05, "loss": 4.0902, "step": 1284000 }, { "epoch": 2.41, "learning_rate": 5.8891633567840704e-05, "loss": 4.0884, "step": 1284500 }, { "epoch": 2.41, "learning_rate": 5.879778056416917e-05, "loss": 4.1099, "step": 1285000 }, { "epoch": 2.41, "learning_rate": 5.870392756049764e-05, "loss": 4.0829, "step": 1285500 }, { "epoch": 2.41, "learning_rate": 5.8610074556826116e-05, "loss": 4.0975, "step": 1286000 }, { "epoch": 2.41, "learning_rate": 5.851622155315458e-05, "loss": 4.1111, "step": 1286500 }, { "epoch": 2.42, "learning_rate": 5.842236854948305e-05, "loss": 4.103, "step": 1287000 }, { "epoch": 2.42, "learning_rate": 5.832851554581153e-05, "loss": 4.0883, "step": 1287500 }, { "epoch": 2.42, "learning_rate": 5.823466254214e-05, "loss": 4.0897, "step": 1288000 }, { "epoch": 2.42, "learning_rate": 5.814080953846846e-05, "loss": 4.0964, "step": 1288500 }, { "epoch": 2.42, "learning_rate": 5.804695653479693e-05, "loss": 4.0823, "step": 1289000 }, { "epoch": 2.42, "learning_rate": 5.795310353112541e-05, "loss": 4.0833, "step": 1289500 }, { "epoch": 2.42, "learning_rate": 5.7859250527453874e-05, "loss": 4.0981, "step": 1290000 }, { "epoch": 2.42, "learning_rate": 5.7765397523782345e-05, "loss": 4.0781, "step": 1290500 }, { "epoch": 2.42, "learning_rate": 5.7671544520110815e-05, "loss": 4.0967, "step": 1291000 }, { "epoch": 2.42, "learning_rate": 5.7577691516439286e-05, "loss": 4.0889, "step": 1291500 }, { "epoch": 2.43, "learning_rate": 5.748383851276776e-05, "loss": 4.0886, "step": 1292000 }, { "epoch": 2.43, "learning_rate": 5.738998550909623e-05, "loss": 4.0745, "step": 1292500 }, { "epoch": 2.43, "learning_rate": 5.7296132505424705e-05, "loss": 4.1027, "step": 1293000 }, { "epoch": 2.43, "learning_rate": 5.720227950175317e-05, "loss": 4.0981, "step": 1293500 }, { "epoch": 2.43, "learning_rate": 5.710842649808164e-05, "loss": 4.0851, "step": 1294000 }, { "epoch": 2.43, "learning_rate": 5.701457349441011e-05, "loss": 4.0905, "step": 1294500 }, { "epoch": 2.43, "learning_rate": 5.692072049073858e-05, "loss": 4.0839, "step": 1295000 }, { "epoch": 2.43, "learning_rate": 5.682686748706705e-05, "loss": 4.0818, "step": 1295500 }, { "epoch": 2.43, "learning_rate": 5.673301448339552e-05, "loss": 4.085, "step": 1296000 }, { "epoch": 2.43, "learning_rate": 5.6639161479724e-05, "loss": 4.0981, "step": 1296500 }, { "epoch": 2.43, "learning_rate": 5.654530847605246e-05, "loss": 4.0758, "step": 1297000 }, { "epoch": 2.44, "learning_rate": 5.6451455472380934e-05, "loss": 4.078, "step": 1297500 }, { "epoch": 2.44, "learning_rate": 5.63576024687094e-05, "loss": 4.0894, "step": 1298000 }, { "epoch": 2.44, "learning_rate": 5.6263749465037875e-05, "loss": 4.088, "step": 1298500 }, { "epoch": 2.44, "learning_rate": 5.6169896461366346e-05, "loss": 4.0978, "step": 1299000 }, { "epoch": 2.44, "learning_rate": 5.6076043457694816e-05, "loss": 4.0809, "step": 1299500 }, { "epoch": 2.44, "learning_rate": 5.598219045402328e-05, "loss": 4.0888, "step": 1300000 }, { "epoch": 2.44, "learning_rate": 5.588833745035176e-05, "loss": 4.1212, "step": 1300500 }, { "epoch": 2.44, "learning_rate": 5.579448444668023e-05, "loss": 4.0825, "step": 1301000 }, { "epoch": 2.44, "learning_rate": 5.570063144300869e-05, "loss": 4.1032, "step": 1301500 }, { "epoch": 2.44, "learning_rate": 5.560677843933717e-05, "loss": 4.0911, "step": 1302000 }, { "epoch": 2.44, "learning_rate": 5.551292543566564e-05, "loss": 4.0875, "step": 1302500 }, { "epoch": 2.45, "learning_rate": 5.541907243199411e-05, "loss": 4.0608, "step": 1303000 }, { "epoch": 2.45, "learning_rate": 5.5325219428322575e-05, "loss": 4.0953, "step": 1303500 }, { "epoch": 2.45, "learning_rate": 5.523136642465105e-05, "loss": 4.0945, "step": 1304000 }, { "epoch": 2.45, "learning_rate": 5.513751342097952e-05, "loss": 4.1076, "step": 1304500 }, { "epoch": 2.45, "learning_rate": 5.5043660417307987e-05, "loss": 4.094, "step": 1305000 }, { "epoch": 2.45, "learning_rate": 5.4949807413636464e-05, "loss": 4.0832, "step": 1305500 }, { "epoch": 2.45, "learning_rate": 5.4855954409964935e-05, "loss": 4.1009, "step": 1306000 }, { "epoch": 2.45, "learning_rate": 5.47621014062934e-05, "loss": 4.0724, "step": 1306500 }, { "epoch": 2.45, "learning_rate": 5.466824840262187e-05, "loss": 4.1011, "step": 1307000 }, { "epoch": 2.45, "learning_rate": 5.4574395398950346e-05, "loss": 4.101, "step": 1307500 }, { "epoch": 2.46, "learning_rate": 5.448054239527882e-05, "loss": 4.0842, "step": 1308000 }, { "epoch": 2.46, "learning_rate": 5.438668939160728e-05, "loss": 4.0852, "step": 1308500 }, { "epoch": 2.46, "learning_rate": 5.429283638793575e-05, "loss": 4.0981, "step": 1309000 }, { "epoch": 2.46, "learning_rate": 5.419898338426423e-05, "loss": 4.0998, "step": 1309500 }, { "epoch": 2.46, "learning_rate": 5.410513038059269e-05, "loss": 4.0701, "step": 1310000 }, { "epoch": 2.46, "learning_rate": 5.4011277376921164e-05, "loss": 4.0971, "step": 1310500 }, { "epoch": 2.46, "learning_rate": 5.391742437324964e-05, "loss": 4.0919, "step": 1311000 }, { "epoch": 2.46, "learning_rate": 5.382357136957811e-05, "loss": 4.1148, "step": 1311500 }, { "epoch": 2.46, "learning_rate": 5.3729718365906575e-05, "loss": 4.0948, "step": 1312000 }, { "epoch": 2.46, "learning_rate": 5.3635865362235046e-05, "loss": 4.0855, "step": 1312500 }, { "epoch": 2.46, "learning_rate": 5.3542012358563524e-05, "loss": 4.0918, "step": 1313000 }, { "epoch": 2.47, "learning_rate": 5.344815935489199e-05, "loss": 4.0908, "step": 1313500 }, { "epoch": 2.47, "learning_rate": 5.335430635122046e-05, "loss": 4.1036, "step": 1314000 }, { "epoch": 2.47, "learning_rate": 5.3260453347548935e-05, "loss": 4.0688, "step": 1314500 }, { "epoch": 2.47, "learning_rate": 5.31666003438774e-05, "loss": 4.0748, "step": 1315000 }, { "epoch": 2.47, "learning_rate": 5.307274734020587e-05, "loss": 4.0831, "step": 1315500 }, { "epoch": 2.47, "learning_rate": 5.297889433653434e-05, "loss": 4.0808, "step": 1316000 }, { "epoch": 2.47, "learning_rate": 5.288504133286282e-05, "loss": 4.0749, "step": 1316500 }, { "epoch": 2.47, "learning_rate": 5.279118832919128e-05, "loss": 4.0916, "step": 1317000 }, { "epoch": 2.47, "learning_rate": 5.269733532551975e-05, "loss": 4.0932, "step": 1317500 }, { "epoch": 2.47, "learning_rate": 5.260348232184822e-05, "loss": 4.089, "step": 1318000 }, { "epoch": 2.47, "learning_rate": 5.2509629318176694e-05, "loss": 4.0886, "step": 1318500 }, { "epoch": 2.48, "learning_rate": 5.2415776314505164e-05, "loss": 4.085, "step": 1319000 }, { "epoch": 2.48, "learning_rate": 5.2321923310833635e-05, "loss": 4.0798, "step": 1319500 }, { "epoch": 2.48, "learning_rate": 5.222807030716211e-05, "loss": 4.0888, "step": 1320000 }, { "epoch": 2.48, "learning_rate": 5.2134217303490576e-05, "loss": 4.0912, "step": 1320500 }, { "epoch": 2.48, "learning_rate": 5.204036429981905e-05, "loss": 4.1052, "step": 1321000 }, { "epoch": 2.48, "learning_rate": 5.194651129614751e-05, "loss": 4.1082, "step": 1321500 }, { "epoch": 2.48, "learning_rate": 5.185265829247599e-05, "loss": 4.0893, "step": 1322000 }, { "epoch": 2.48, "learning_rate": 5.175880528880446e-05, "loss": 4.1092, "step": 1322500 }, { "epoch": 2.48, "learning_rate": 5.166495228513293e-05, "loss": 4.0737, "step": 1323000 }, { "epoch": 2.48, "learning_rate": 5.15710992814614e-05, "loss": 4.0798, "step": 1323500 }, { "epoch": 2.49, "learning_rate": 5.147724627778987e-05, "loss": 4.109, "step": 1324000 }, { "epoch": 2.49, "learning_rate": 5.138339327411834e-05, "loss": 4.0857, "step": 1324500 }, { "epoch": 2.49, "learning_rate": 5.1289540270446805e-05, "loss": 4.1154, "step": 1325000 }, { "epoch": 2.49, "learning_rate": 5.119568726677528e-05, "loss": 4.0871, "step": 1325500 }, { "epoch": 2.49, "learning_rate": 5.1101834263103753e-05, "loss": 4.0941, "step": 1326000 }, { "epoch": 2.49, "learning_rate": 5.1007981259432224e-05, "loss": 4.0885, "step": 1326500 }, { "epoch": 2.49, "learning_rate": 5.091412825576069e-05, "loss": 4.0857, "step": 1327000 }, { "epoch": 2.49, "learning_rate": 5.0820275252089165e-05, "loss": 4.0746, "step": 1327500 }, { "epoch": 2.49, "learning_rate": 5.0726422248417636e-05, "loss": 4.064, "step": 1328000 }, { "epoch": 2.49, "learning_rate": 5.06325692447461e-05, "loss": 4.0834, "step": 1328500 }, { "epoch": 2.49, "learning_rate": 5.053871624107458e-05, "loss": 4.0884, "step": 1329000 }, { "epoch": 2.5, "learning_rate": 5.044486323740305e-05, "loss": 4.0873, "step": 1329500 }, { "epoch": 2.5, "learning_rate": 5.035101023373151e-05, "loss": 4.0933, "step": 1330000 }, { "epoch": 2.5, "learning_rate": 5.025715723005998e-05, "loss": 4.0932, "step": 1330500 }, { "epoch": 2.5, "learning_rate": 5.016330422638846e-05, "loss": 4.0799, "step": 1331000 }, { "epoch": 2.5, "learning_rate": 5.006945122271693e-05, "loss": 4.0742, "step": 1331500 }, { "epoch": 2.5, "learning_rate": 4.9975598219045394e-05, "loss": 4.0959, "step": 1332000 }, { "epoch": 2.5, "learning_rate": 4.988174521537387e-05, "loss": 4.087, "step": 1332500 }, { "epoch": 2.5, "learning_rate": 4.978789221170234e-05, "loss": 4.0873, "step": 1333000 }, { "epoch": 2.5, "learning_rate": 4.9694039208030806e-05, "loss": 4.0715, "step": 1333500 }, { "epoch": 2.5, "learning_rate": 4.960018620435928e-05, "loss": 4.0868, "step": 1334000 }, { "epoch": 2.5, "learning_rate": 4.9506333200687754e-05, "loss": 4.0726, "step": 1334500 }, { "epoch": 2.51, "learning_rate": 4.941248019701622e-05, "loss": 4.0785, "step": 1335000 }, { "epoch": 2.51, "learning_rate": 4.931862719334469e-05, "loss": 4.0701, "step": 1335500 }, { "epoch": 2.51, "learning_rate": 4.922477418967316e-05, "loss": 4.0798, "step": 1336000 }, { "epoch": 2.51, "learning_rate": 4.913092118600164e-05, "loss": 4.0938, "step": 1336500 }, { "epoch": 2.51, "learning_rate": 4.90370681823301e-05, "loss": 4.0849, "step": 1337000 }, { "epoch": 2.51, "learning_rate": 4.894321517865857e-05, "loss": 4.0584, "step": 1337500 }, { "epoch": 2.51, "learning_rate": 4.884936217498705e-05, "loss": 4.0809, "step": 1338000 }, { "epoch": 2.51, "learning_rate": 4.875550917131551e-05, "loss": 4.0834, "step": 1338500 }, { "epoch": 2.51, "learning_rate": 4.866165616764398e-05, "loss": 4.1006, "step": 1339000 }, { "epoch": 2.51, "learning_rate": 4.8567803163972454e-05, "loss": 4.0869, "step": 1339500 }, { "epoch": 2.52, "learning_rate": 4.847395016030093e-05, "loss": 4.0998, "step": 1340000 }, { "epoch": 2.52, "learning_rate": 4.8380097156629395e-05, "loss": 4.0782, "step": 1340500 }, { "epoch": 2.52, "learning_rate": 4.8286244152957866e-05, "loss": 4.0942, "step": 1341000 }, { "epoch": 2.52, "learning_rate": 4.819239114928634e-05, "loss": 4.1044, "step": 1341500 }, { "epoch": 2.52, "learning_rate": 4.809853814561481e-05, "loss": 4.0865, "step": 1342000 }, { "epoch": 2.52, "learning_rate": 4.800468514194328e-05, "loss": 4.0917, "step": 1342500 }, { "epoch": 2.52, "learning_rate": 4.791083213827175e-05, "loss": 4.0874, "step": 1343000 }, { "epoch": 2.52, "learning_rate": 4.781697913460022e-05, "loss": 4.0948, "step": 1343500 }, { "epoch": 2.52, "learning_rate": 4.772312613092869e-05, "loss": 4.082, "step": 1344000 }, { "epoch": 2.52, "learning_rate": 4.762927312725716e-05, "loss": 4.0872, "step": 1344500 }, { "epoch": 2.52, "learning_rate": 4.753542012358564e-05, "loss": 4.0923, "step": 1345000 }, { "epoch": 2.53, "learning_rate": 4.74415671199141e-05, "loss": 4.094, "step": 1345500 }, { "epoch": 2.53, "learning_rate": 4.734771411624257e-05, "loss": 4.1156, "step": 1346000 }, { "epoch": 2.53, "learning_rate": 4.725386111257104e-05, "loss": 4.0886, "step": 1346500 }, { "epoch": 2.53, "learning_rate": 4.7160008108899514e-05, "loss": 4.1015, "step": 1347000 }, { "epoch": 2.53, "learning_rate": 4.7066155105227984e-05, "loss": 4.0873, "step": 1347500 }, { "epoch": 2.53, "learning_rate": 4.6972302101556455e-05, "loss": 4.0725, "step": 1348000 }, { "epoch": 2.53, "learning_rate": 4.687844909788492e-05, "loss": 4.077, "step": 1348500 }, { "epoch": 2.53, "learning_rate": 4.6784596094213396e-05, "loss": 4.0915, "step": 1349000 }, { "epoch": 2.53, "learning_rate": 4.669074309054187e-05, "loss": 4.0733, "step": 1349500 }, { "epoch": 2.53, "learning_rate": 4.659689008687033e-05, "loss": 4.0878, "step": 1350000 }, { "epoch": 2.53, "learning_rate": 4.650303708319881e-05, "loss": 4.0922, "step": 1350500 }, { "epoch": 2.54, "learning_rate": 4.640918407952728e-05, "loss": 4.0829, "step": 1351000 }, { "epoch": 2.54, "learning_rate": 4.631533107585575e-05, "loss": 4.0832, "step": 1351500 }, { "epoch": 2.54, "learning_rate": 4.622147807218421e-05, "loss": 4.0796, "step": 1352000 }, { "epoch": 2.54, "learning_rate": 4.612762506851269e-05, "loss": 4.0716, "step": 1352500 }, { "epoch": 2.54, "learning_rate": 4.603377206484116e-05, "loss": 4.0947, "step": 1353000 }, { "epoch": 2.54, "learning_rate": 4.5939919061169625e-05, "loss": 4.0737, "step": 1353500 }, { "epoch": 2.54, "learning_rate": 4.58460660574981e-05, "loss": 4.0652, "step": 1354000 }, { "epoch": 2.54, "learning_rate": 4.575221305382657e-05, "loss": 4.0775, "step": 1354500 }, { "epoch": 2.54, "learning_rate": 4.5658360050155044e-05, "loss": 4.0734, "step": 1355000 }, { "epoch": 2.54, "learning_rate": 4.556450704648351e-05, "loss": 4.0891, "step": 1355500 }, { "epoch": 2.55, "learning_rate": 4.5470654042811985e-05, "loss": 4.0902, "step": 1356000 }, { "epoch": 2.55, "learning_rate": 4.5376801039140456e-05, "loss": 4.0873, "step": 1356500 }, { "epoch": 2.55, "learning_rate": 4.528294803546892e-05, "loss": 4.1043, "step": 1357000 }, { "epoch": 2.55, "learning_rate": 4.518909503179739e-05, "loss": 4.0743, "step": 1357500 }, { "epoch": 2.55, "learning_rate": 4.509524202812587e-05, "loss": 4.0598, "step": 1358000 }, { "epoch": 2.55, "learning_rate": 4.500138902445433e-05, "loss": 4.0914, "step": 1358500 }, { "epoch": 2.55, "learning_rate": 4.49075360207828e-05, "loss": 4.0748, "step": 1359000 }, { "epoch": 2.55, "learning_rate": 4.481368301711128e-05, "loss": 4.091, "step": 1359500 }, { "epoch": 2.55, "learning_rate": 4.471983001343975e-05, "loss": 4.0587, "step": 1360000 }, { "epoch": 2.55, "learning_rate": 4.4625977009768214e-05, "loss": 4.0886, "step": 1360500 }, { "epoch": 2.55, "learning_rate": 4.4532124006096685e-05, "loss": 4.0904, "step": 1361000 }, { "epoch": 2.56, "learning_rate": 4.443827100242516e-05, "loss": 4.0911, "step": 1361500 }, { "epoch": 2.56, "learning_rate": 4.4344417998753626e-05, "loss": 4.0815, "step": 1362000 }, { "epoch": 2.56, "learning_rate": 4.42505649950821e-05, "loss": 4.0724, "step": 1362500 }, { "epoch": 2.56, "learning_rate": 4.4156711991410574e-05, "loss": 4.0758, "step": 1363000 }, { "epoch": 2.56, "learning_rate": 4.4062858987739045e-05, "loss": 4.0822, "step": 1363500 }, { "epoch": 2.56, "learning_rate": 4.396900598406751e-05, "loss": 4.0769, "step": 1364000 }, { "epoch": 2.56, "learning_rate": 4.387515298039598e-05, "loss": 4.067, "step": 1364500 }, { "epoch": 2.56, "learning_rate": 4.3781299976724457e-05, "loss": 4.0907, "step": 1365000 }, { "epoch": 2.56, "learning_rate": 4.368744697305292e-05, "loss": 4.0809, "step": 1365500 }, { "epoch": 2.56, "learning_rate": 4.359359396938139e-05, "loss": 4.0687, "step": 1366000 }, { "epoch": 2.57, "learning_rate": 4.349974096570986e-05, "loss": 4.078, "step": 1366500 }, { "epoch": 2.57, "learning_rate": 4.340588796203833e-05, "loss": 4.0769, "step": 1367000 }, { "epoch": 2.57, "learning_rate": 4.33120349583668e-05, "loss": 4.0747, "step": 1367500 }, { "epoch": 2.57, "learning_rate": 4.3218181954695274e-05, "loss": 4.0935, "step": 1368000 }, { "epoch": 2.57, "learning_rate": 4.312432895102375e-05, "loss": 4.0849, "step": 1368500 }, { "epoch": 2.57, "learning_rate": 4.3030475947352215e-05, "loss": 4.0822, "step": 1369000 }, { "epoch": 2.57, "learning_rate": 4.2936622943680686e-05, "loss": 4.0804, "step": 1369500 }, { "epoch": 2.57, "learning_rate": 4.2842769940009156e-05, "loss": 4.0616, "step": 1370000 }, { "epoch": 2.57, "learning_rate": 4.274891693633763e-05, "loss": 4.0836, "step": 1370500 }, { "epoch": 2.57, "learning_rate": 4.26550639326661e-05, "loss": 4.0786, "step": 1371000 }, { "epoch": 2.57, "learning_rate": 4.256121092899457e-05, "loss": 4.0962, "step": 1371500 }, { "epoch": 2.58, "learning_rate": 4.2467357925323046e-05, "loss": 4.0542, "step": 1372000 }, { "epoch": 2.58, "learning_rate": 4.237350492165151e-05, "loss": 4.066, "step": 1372500 }, { "epoch": 2.58, "learning_rate": 4.227965191797998e-05, "loss": 4.0942, "step": 1373000 }, { "epoch": 2.58, "learning_rate": 4.2185798914308444e-05, "loss": 4.1004, "step": 1373500 }, { "epoch": 2.58, "learning_rate": 4.209194591063692e-05, "loss": 4.0927, "step": 1374000 }, { "epoch": 2.58, "learning_rate": 4.199809290696539e-05, "loss": 4.0715, "step": 1374500 }, { "epoch": 2.58, "learning_rate": 4.190423990329386e-05, "loss": 4.0849, "step": 1375000 }, { "epoch": 2.58, "learning_rate": 4.1810386899622327e-05, "loss": 4.0863, "step": 1375500 }, { "epoch": 2.58, "learning_rate": 4.1716533895950804e-05, "loss": 4.0774, "step": 1376000 }, { "epoch": 2.58, "learning_rate": 4.1622680892279275e-05, "loss": 4.0682, "step": 1376500 }, { "epoch": 2.58, "learning_rate": 4.152882788860774e-05, "loss": 4.0664, "step": 1377000 }, { "epoch": 2.59, "learning_rate": 4.1434974884936216e-05, "loss": 4.0759, "step": 1377500 }, { "epoch": 2.59, "learning_rate": 4.1341121881264686e-05, "loss": 4.0804, "step": 1378000 }, { "epoch": 2.59, "learning_rate": 4.124726887759315e-05, "loss": 4.0909, "step": 1378500 }, { "epoch": 2.59, "learning_rate": 4.115341587392162e-05, "loss": 4.0613, "step": 1379000 }, { "epoch": 2.59, "learning_rate": 4.10595628702501e-05, "loss": 4.0658, "step": 1379500 }, { "epoch": 2.59, "learning_rate": 4.096570986657857e-05, "loss": 4.0913, "step": 1380000 }, { "epoch": 2.59, "learning_rate": 4.087185686290703e-05, "loss": 4.0913, "step": 1380500 }, { "epoch": 2.59, "learning_rate": 4.077800385923551e-05, "loss": 4.0813, "step": 1381000 }, { "epoch": 2.59, "learning_rate": 4.068415085556398e-05, "loss": 4.0881, "step": 1381500 }, { "epoch": 2.59, "learning_rate": 4.0590297851892445e-05, "loss": 4.0741, "step": 1382000 }, { "epoch": 2.6, "learning_rate": 4.0496444848220915e-05, "loss": 4.0748, "step": 1382500 }, { "epoch": 2.6, "learning_rate": 4.040259184454939e-05, "loss": 4.0922, "step": 1383000 }, { "epoch": 2.6, "learning_rate": 4.0308738840877864e-05, "loss": 4.068, "step": 1383500 }, { "epoch": 2.6, "learning_rate": 4.021488583720633e-05, "loss": 4.1101, "step": 1384000 }, { "epoch": 2.6, "learning_rate": 4.01210328335348e-05, "loss": 4.0935, "step": 1384500 }, { "epoch": 2.6, "learning_rate": 4.0027179829863275e-05, "loss": 4.0758, "step": 1385000 }, { "epoch": 2.6, "learning_rate": 3.993332682619174e-05, "loss": 4.0839, "step": 1385500 }, { "epoch": 2.6, "learning_rate": 3.983947382252021e-05, "loss": 4.1031, "step": 1386000 }, { "epoch": 2.6, "learning_rate": 3.974562081884869e-05, "loss": 4.0771, "step": 1386500 }, { "epoch": 2.6, "learning_rate": 3.965176781517715e-05, "loss": 4.0786, "step": 1387000 }, { "epoch": 2.6, "learning_rate": 3.955791481150562e-05, "loss": 4.0738, "step": 1387500 }, { "epoch": 2.61, "learning_rate": 3.946406180783409e-05, "loss": 4.072, "step": 1388000 }, { "epoch": 2.61, "learning_rate": 3.937020880416257e-05, "loss": 4.091, "step": 1388500 }, { "epoch": 2.61, "learning_rate": 3.9276355800491034e-05, "loss": 4.088, "step": 1389000 }, { "epoch": 2.61, "learning_rate": 3.9182502796819504e-05, "loss": 4.0842, "step": 1389500 }, { "epoch": 2.61, "learning_rate": 3.908864979314798e-05, "loss": 4.0651, "step": 1390000 }, { "epoch": 2.61, "learning_rate": 3.8994796789476446e-05, "loss": 4.0968, "step": 1390500 }, { "epoch": 2.61, "learning_rate": 3.8900943785804916e-05, "loss": 4.06, "step": 1391000 }, { "epoch": 2.61, "learning_rate": 3.880709078213339e-05, "loss": 4.0594, "step": 1391500 }, { "epoch": 2.61, "learning_rate": 3.8713237778461864e-05, "loss": 4.0864, "step": 1392000 }, { "epoch": 2.61, "learning_rate": 3.861938477479033e-05, "loss": 4.083, "step": 1392500 }, { "epoch": 2.61, "learning_rate": 3.85255317711188e-05, "loss": 4.0956, "step": 1393000 }, { "epoch": 2.62, "learning_rate": 3.843167876744726e-05, "loss": 4.0824, "step": 1393500 }, { "epoch": 2.62, "learning_rate": 3.833782576377574e-05, "loss": 4.0737, "step": 1394000 }, { "epoch": 2.62, "learning_rate": 3.824397276010421e-05, "loss": 4.0801, "step": 1394500 }, { "epoch": 2.62, "learning_rate": 3.815011975643268e-05, "loss": 4.0845, "step": 1395000 }, { "epoch": 2.62, "learning_rate": 3.805626675276115e-05, "loss": 4.0736, "step": 1395500 }, { "epoch": 2.62, "learning_rate": 3.796241374908962e-05, "loss": 4.0924, "step": 1396000 }, { "epoch": 2.62, "learning_rate": 3.7868560745418093e-05, "loss": 4.0812, "step": 1396500 }, { "epoch": 2.62, "learning_rate": 3.777470774174656e-05, "loss": 4.0882, "step": 1397000 }, { "epoch": 2.62, "learning_rate": 3.7680854738075035e-05, "loss": 4.0813, "step": 1397500 }, { "epoch": 2.62, "learning_rate": 3.7587001734403505e-05, "loss": 4.0796, "step": 1398000 }, { "epoch": 2.63, "learning_rate": 3.7493148730731976e-05, "loss": 4.0755, "step": 1398500 }, { "epoch": 2.63, "learning_rate": 3.739929572706045e-05, "loss": 4.0857, "step": 1399000 }, { "epoch": 2.63, "learning_rate": 3.730544272338892e-05, "loss": 4.0851, "step": 1399500 }, { "epoch": 2.63, "learning_rate": 3.721158971971739e-05, "loss": 4.0766, "step": 1400000 }, { "epoch": 2.63, "learning_rate": 3.711773671604586e-05, "loss": 4.0766, "step": 1400500 }, { "epoch": 2.63, "learning_rate": 3.702388371237433e-05, "loss": 4.0816, "step": 1401000 }, { "epoch": 2.63, "learning_rate": 3.69300307087028e-05, "loss": 4.0744, "step": 1401500 }, { "epoch": 2.63, "learning_rate": 3.6836177705031264e-05, "loss": 4.0843, "step": 1402000 }, { "epoch": 2.63, "learning_rate": 3.674232470135974e-05, "loss": 4.0984, "step": 1402500 }, { "epoch": 2.63, "learning_rate": 3.664847169768821e-05, "loss": 4.0624, "step": 1403000 }, { "epoch": 2.63, "learning_rate": 3.655461869401668e-05, "loss": 4.0806, "step": 1403500 }, { "epoch": 2.64, "learning_rate": 3.646076569034515e-05, "loss": 4.0838, "step": 1404000 }, { "epoch": 2.64, "learning_rate": 3.636691268667362e-05, "loss": 4.0784, "step": 1404500 }, { "epoch": 2.64, "learning_rate": 3.6273059683002094e-05, "loss": 4.0847, "step": 1405000 }, { "epoch": 2.64, "learning_rate": 3.617920667933056e-05, "loss": 4.0783, "step": 1405500 }, { "epoch": 2.64, "learning_rate": 3.6085353675659036e-05, "loss": 4.0807, "step": 1406000 }, { "epoch": 2.64, "learning_rate": 3.59915006719875e-05, "loss": 4.0524, "step": 1406500 }, { "epoch": 2.64, "learning_rate": 3.589764766831598e-05, "loss": 4.0808, "step": 1407000 }, { "epoch": 2.64, "learning_rate": 3.580379466464445e-05, "loss": 4.0714, "step": 1407500 }, { "epoch": 2.64, "learning_rate": 3.570994166097291e-05, "loss": 4.0916, "step": 1408000 }, { "epoch": 2.64, "learning_rate": 3.561608865730139e-05, "loss": 4.0711, "step": 1408500 }, { "epoch": 2.64, "learning_rate": 3.552223565362985e-05, "loss": 4.0768, "step": 1409000 }, { "epoch": 2.65, "learning_rate": 3.542838264995833e-05, "loss": 4.0762, "step": 1409500 }, { "epoch": 2.65, "learning_rate": 3.5334529646286794e-05, "loss": 4.1018, "step": 1410000 }, { "epoch": 2.65, "learning_rate": 3.5240676642615265e-05, "loss": 4.0837, "step": 1410500 }, { "epoch": 2.65, "learning_rate": 3.5146823638943735e-05, "loss": 4.0754, "step": 1411000 }, { "epoch": 2.65, "learning_rate": 3.5052970635272206e-05, "loss": 4.0883, "step": 1411500 }, { "epoch": 2.65, "learning_rate": 3.495911763160068e-05, "loss": 4.073, "step": 1412000 }, { "epoch": 2.65, "learning_rate": 3.486526462792915e-05, "loss": 4.0878, "step": 1412500 }, { "epoch": 2.65, "learning_rate": 3.477141162425762e-05, "loss": 4.0713, "step": 1413000 }, { "epoch": 2.65, "learning_rate": 3.467755862058609e-05, "loss": 4.0709, "step": 1413500 }, { "epoch": 2.65, "learning_rate": 3.458370561691456e-05, "loss": 4.083, "step": 1414000 }, { "epoch": 2.66, "learning_rate": 3.448985261324303e-05, "loss": 4.0697, "step": 1414500 }, { "epoch": 2.66, "learning_rate": 3.43959996095715e-05, "loss": 4.0878, "step": 1415000 }, { "epoch": 2.66, "learning_rate": 3.430214660589997e-05, "loss": 4.075, "step": 1415500 }, { "epoch": 2.66, "learning_rate": 3.420829360222844e-05, "loss": 4.085, "step": 1416000 }, { "epoch": 2.66, "learning_rate": 3.411444059855691e-05, "loss": 4.0705, "step": 1416500 }, { "epoch": 2.66, "learning_rate": 3.402058759488538e-05, "loss": 4.0619, "step": 1417000 }, { "epoch": 2.66, "learning_rate": 3.3926734591213854e-05, "loss": 4.0675, "step": 1417500 }, { "epoch": 2.66, "learning_rate": 3.3832881587542324e-05, "loss": 4.0697, "step": 1418000 }, { "epoch": 2.66, "learning_rate": 3.3739028583870795e-05, "loss": 4.0825, "step": 1418500 }, { "epoch": 2.66, "learning_rate": 3.3645175580199265e-05, "loss": 4.0742, "step": 1419000 }, { "epoch": 2.66, "learning_rate": 3.3551322576527736e-05, "loss": 4.082, "step": 1419500 }, { "epoch": 2.67, "learning_rate": 3.345746957285621e-05, "loss": 4.0752, "step": 1420000 }, { "epoch": 2.67, "learning_rate": 3.336361656918468e-05, "loss": 4.0694, "step": 1420500 }, { "epoch": 2.67, "learning_rate": 3.326976356551315e-05, "loss": 4.0844, "step": 1421000 }, { "epoch": 2.67, "learning_rate": 3.317591056184162e-05, "loss": 4.0892, "step": 1421500 }, { "epoch": 2.67, "learning_rate": 3.308205755817009e-05, "loss": 4.1071, "step": 1422000 }, { "epoch": 2.67, "learning_rate": 3.298820455449856e-05, "loss": 4.0752, "step": 1422500 }, { "epoch": 2.67, "learning_rate": 3.289435155082703e-05, "loss": 4.0692, "step": 1423000 }, { "epoch": 2.67, "learning_rate": 3.28004985471555e-05, "loss": 4.0879, "step": 1423500 }, { "epoch": 2.67, "learning_rate": 3.270664554348397e-05, "loss": 4.0688, "step": 1424000 }, { "epoch": 2.67, "learning_rate": 3.261279253981244e-05, "loss": 4.0851, "step": 1424500 }, { "epoch": 2.67, "learning_rate": 3.251893953614091e-05, "loss": 4.0762, "step": 1425000 }, { "epoch": 2.68, "learning_rate": 3.2425086532469384e-05, "loss": 4.0752, "step": 1425500 }, { "epoch": 2.68, "learning_rate": 3.2331233528797854e-05, "loss": 4.0928, "step": 1426000 }, { "epoch": 2.68, "learning_rate": 3.2237380525126325e-05, "loss": 4.0672, "step": 1426500 }, { "epoch": 2.68, "learning_rate": 3.2143527521454796e-05, "loss": 4.0802, "step": 1427000 }, { "epoch": 2.68, "learning_rate": 3.2049674517783266e-05, "loss": 4.0856, "step": 1427500 }, { "epoch": 2.68, "learning_rate": 3.195582151411173e-05, "loss": 4.0751, "step": 1428000 }, { "epoch": 2.68, "learning_rate": 3.186196851044021e-05, "loss": 4.0848, "step": 1428500 }, { "epoch": 2.68, "learning_rate": 3.176811550676867e-05, "loss": 4.0826, "step": 1429000 }, { "epoch": 2.68, "learning_rate": 3.167426250309715e-05, "loss": 4.0575, "step": 1429500 }, { "epoch": 2.68, "learning_rate": 3.158040949942562e-05, "loss": 4.0846, "step": 1430000 }, { "epoch": 2.69, "learning_rate": 3.1486556495754083e-05, "loss": 4.0905, "step": 1430500 }, { "epoch": 2.69, "learning_rate": 3.139270349208256e-05, "loss": 4.0635, "step": 1431000 }, { "epoch": 2.69, "learning_rate": 3.1298850488411025e-05, "loss": 4.0681, "step": 1431500 }, { "epoch": 2.69, "learning_rate": 3.12049974847395e-05, "loss": 4.0833, "step": 1432000 }, { "epoch": 2.69, "learning_rate": 3.1111144481067966e-05, "loss": 4.0834, "step": 1432500 }, { "epoch": 2.69, "learning_rate": 3.1017291477396443e-05, "loss": 4.0623, "step": 1433000 }, { "epoch": 2.69, "learning_rate": 3.0923438473724914e-05, "loss": 4.0842, "step": 1433500 }, { "epoch": 2.69, "learning_rate": 3.082958547005338e-05, "loss": 4.0805, "step": 1434000 }, { "epoch": 2.69, "learning_rate": 3.0735732466381855e-05, "loss": 4.0886, "step": 1434500 }, { "epoch": 2.69, "learning_rate": 3.064187946271032e-05, "loss": 4.0627, "step": 1435000 }, { "epoch": 2.69, "learning_rate": 3.0548026459038797e-05, "loss": 4.0817, "step": 1435500 }, { "epoch": 2.7, "learning_rate": 3.045417345536726e-05, "loss": 4.1082, "step": 1436000 }, { "epoch": 2.7, "learning_rate": 3.0360320451695734e-05, "loss": 4.0759, "step": 1436500 }, { "epoch": 2.7, "learning_rate": 3.0266467448024202e-05, "loss": 4.0907, "step": 1437000 }, { "epoch": 2.7, "learning_rate": 3.0172614444352676e-05, "loss": 4.0807, "step": 1437500 }, { "epoch": 2.7, "learning_rate": 3.0078761440681146e-05, "loss": 4.0782, "step": 1438000 }, { "epoch": 2.7, "learning_rate": 2.9984908437009614e-05, "loss": 4.0759, "step": 1438500 }, { "epoch": 2.7, "learning_rate": 2.9891055433338088e-05, "loss": 4.0529, "step": 1439000 }, { "epoch": 2.7, "learning_rate": 2.9797202429666555e-05, "loss": 4.0761, "step": 1439500 }, { "epoch": 2.7, "learning_rate": 2.970334942599503e-05, "loss": 4.0891, "step": 1440000 }, { "epoch": 2.7, "learning_rate": 2.9609496422323496e-05, "loss": 4.0623, "step": 1440500 }, { "epoch": 2.7, "learning_rate": 2.951564341865197e-05, "loss": 4.0779, "step": 1441000 }, { "epoch": 2.71, "learning_rate": 2.9421790414980438e-05, "loss": 4.0642, "step": 1441500 }, { "epoch": 2.71, "learning_rate": 2.9327937411308908e-05, "loss": 4.0584, "step": 1442000 }, { "epoch": 2.71, "learning_rate": 2.9234084407637382e-05, "loss": 4.0769, "step": 1442500 }, { "epoch": 2.71, "learning_rate": 2.914023140396585e-05, "loss": 4.0739, "step": 1443000 }, { "epoch": 2.71, "learning_rate": 2.9046378400294323e-05, "loss": 4.0837, "step": 1443500 }, { "epoch": 2.71, "learning_rate": 2.895252539662279e-05, "loss": 4.0882, "step": 1444000 }, { "epoch": 2.71, "learning_rate": 2.885867239295126e-05, "loss": 4.0639, "step": 1444500 }, { "epoch": 2.71, "learning_rate": 2.8764819389279732e-05, "loss": 4.0715, "step": 1445000 }, { "epoch": 2.71, "learning_rate": 2.8670966385608203e-05, "loss": 4.071, "step": 1445500 }, { "epoch": 2.71, "learning_rate": 2.857711338193667e-05, "loss": 4.0727, "step": 1446000 }, { "epoch": 2.72, "learning_rate": 2.8483260378265144e-05, "loss": 4.0621, "step": 1446500 }, { "epoch": 2.72, "learning_rate": 2.8389407374593615e-05, "loss": 4.0671, "step": 1447000 }, { "epoch": 2.72, "learning_rate": 2.8295554370922085e-05, "loss": 4.0786, "step": 1447500 }, { "epoch": 2.72, "learning_rate": 2.8201701367250556e-05, "loss": 4.0629, "step": 1448000 }, { "epoch": 2.72, "learning_rate": 2.8107848363579023e-05, "loss": 4.0769, "step": 1448500 }, { "epoch": 2.72, "learning_rate": 2.8013995359907497e-05, "loss": 4.084, "step": 1449000 }, { "epoch": 2.72, "learning_rate": 2.7920142356235964e-05, "loss": 4.0597, "step": 1449500 }, { "epoch": 2.72, "learning_rate": 2.782628935256444e-05, "loss": 4.0746, "step": 1450000 }, { "epoch": 2.72, "learning_rate": 2.7732436348892906e-05, "loss": 4.0876, "step": 1450500 }, { "epoch": 2.72, "learning_rate": 2.763858334522138e-05, "loss": 4.0802, "step": 1451000 }, { "epoch": 2.72, "learning_rate": 2.754473034154985e-05, "loss": 4.0643, "step": 1451500 }, { "epoch": 2.73, "learning_rate": 2.7450877337878318e-05, "loss": 4.0735, "step": 1452000 }, { "epoch": 2.73, "learning_rate": 2.735702433420679e-05, "loss": 4.0577, "step": 1452500 }, { "epoch": 2.73, "learning_rate": 2.726317133053526e-05, "loss": 4.0716, "step": 1453000 }, { "epoch": 2.73, "learning_rate": 2.7169318326863733e-05, "loss": 4.0616, "step": 1453500 }, { "epoch": 2.73, "learning_rate": 2.70754653231922e-05, "loss": 4.0756, "step": 1454000 }, { "epoch": 2.73, "learning_rate": 2.698161231952067e-05, "loss": 4.0606, "step": 1454500 }, { "epoch": 2.73, "learning_rate": 2.688775931584914e-05, "loss": 4.0849, "step": 1455000 }, { "epoch": 2.73, "learning_rate": 2.6793906312177612e-05, "loss": 4.0531, "step": 1455500 }, { "epoch": 2.73, "learning_rate": 2.6700053308506086e-05, "loss": 4.0721, "step": 1456000 }, { "epoch": 2.73, "learning_rate": 2.6606200304834553e-05, "loss": 4.0736, "step": 1456500 }, { "epoch": 2.73, "learning_rate": 2.6512347301163024e-05, "loss": 4.074, "step": 1457000 }, { "epoch": 2.74, "learning_rate": 2.6418494297491495e-05, "loss": 4.1067, "step": 1457500 }, { "epoch": 2.74, "learning_rate": 2.6324641293819965e-05, "loss": 4.0933, "step": 1458000 }, { "epoch": 2.74, "learning_rate": 2.6230788290148436e-05, "loss": 4.0737, "step": 1458500 }, { "epoch": 2.74, "learning_rate": 2.6136935286476907e-05, "loss": 4.0638, "step": 1459000 }, { "epoch": 2.74, "learning_rate": 2.6043082282805374e-05, "loss": 4.0896, "step": 1459500 }, { "epoch": 2.74, "learning_rate": 2.5949229279133848e-05, "loss": 4.0674, "step": 1460000 }, { "epoch": 2.74, "learning_rate": 2.585537627546232e-05, "loss": 4.0543, "step": 1460500 }, { "epoch": 2.74, "learning_rate": 2.576152327179079e-05, "loss": 4.0558, "step": 1461000 }, { "epoch": 2.74, "learning_rate": 2.566767026811926e-05, "loss": 4.0691, "step": 1461500 }, { "epoch": 2.74, "learning_rate": 2.5573817264447727e-05, "loss": 4.0653, "step": 1462000 }, { "epoch": 2.75, "learning_rate": 2.54799642607762e-05, "loss": 4.0803, "step": 1462500 }, { "epoch": 2.75, "learning_rate": 2.5386111257104668e-05, "loss": 4.0641, "step": 1463000 }, { "epoch": 2.75, "learning_rate": 2.5292258253433142e-05, "loss": 4.0812, "step": 1463500 }, { "epoch": 2.75, "learning_rate": 2.519840524976161e-05, "loss": 4.0733, "step": 1464000 }, { "epoch": 2.75, "learning_rate": 2.510455224609008e-05, "loss": 4.0701, "step": 1464500 }, { "epoch": 2.75, "learning_rate": 2.5010699242418554e-05, "loss": 4.082, "step": 1465000 }, { "epoch": 2.75, "learning_rate": 2.491684623874702e-05, "loss": 4.0887, "step": 1465500 }, { "epoch": 2.75, "learning_rate": 2.4822993235075496e-05, "loss": 4.0729, "step": 1466000 }, { "epoch": 2.75, "learning_rate": 2.4729140231403963e-05, "loss": 4.0826, "step": 1466500 }, { "epoch": 2.75, "learning_rate": 2.4635287227732433e-05, "loss": 4.073, "step": 1467000 }, { "epoch": 2.75, "learning_rate": 2.4541434224060904e-05, "loss": 4.0581, "step": 1467500 }, { "epoch": 2.76, "learning_rate": 2.4447581220389375e-05, "loss": 4.0781, "step": 1468000 }, { "epoch": 2.76, "learning_rate": 2.4353728216717845e-05, "loss": 4.0805, "step": 1468500 }, { "epoch": 2.76, "learning_rate": 2.4259875213046316e-05, "loss": 4.057, "step": 1469000 }, { "epoch": 2.76, "learning_rate": 2.416602220937479e-05, "loss": 4.0634, "step": 1469500 }, { "epoch": 2.76, "learning_rate": 2.4072169205703257e-05, "loss": 4.0545, "step": 1470000 }, { "epoch": 2.76, "learning_rate": 2.3978316202031728e-05, "loss": 4.083, "step": 1470500 }, { "epoch": 2.76, "learning_rate": 2.38844631983602e-05, "loss": 4.0732, "step": 1471000 }, { "epoch": 2.76, "learning_rate": 2.379061019468867e-05, "loss": 4.0605, "step": 1471500 }, { "epoch": 2.76, "learning_rate": 2.3696757191017136e-05, "loss": 4.0614, "step": 1472000 }, { "epoch": 2.76, "learning_rate": 2.360290418734561e-05, "loss": 4.0822, "step": 1472500 }, { "epoch": 2.76, "learning_rate": 2.3509051183674078e-05, "loss": 4.0573, "step": 1473000 }, { "epoch": 2.77, "learning_rate": 2.3415198180002552e-05, "loss": 4.0701, "step": 1473500 }, { "epoch": 2.77, "learning_rate": 2.3321345176331022e-05, "loss": 4.0729, "step": 1474000 }, { "epoch": 2.77, "learning_rate": 2.322749217265949e-05, "loss": 4.06, "step": 1474500 }, { "epoch": 2.77, "learning_rate": 2.3133639168987964e-05, "loss": 4.0639, "step": 1475000 }, { "epoch": 2.77, "learning_rate": 2.303978616531643e-05, "loss": 4.0811, "step": 1475500 }, { "epoch": 2.77, "learning_rate": 2.2945933161644905e-05, "loss": 4.0953, "step": 1476000 }, { "epoch": 2.77, "learning_rate": 2.2852080157973372e-05, "loss": 4.0624, "step": 1476500 }, { "epoch": 2.77, "learning_rate": 2.2758227154301846e-05, "loss": 4.0748, "step": 1477000 }, { "epoch": 2.77, "learning_rate": 2.2664374150630313e-05, "loss": 4.0562, "step": 1477500 }, { "epoch": 2.77, "learning_rate": 2.2570521146958784e-05, "loss": 4.0767, "step": 1478000 }, { "epoch": 2.78, "learning_rate": 2.2476668143287258e-05, "loss": 4.0814, "step": 1478500 }, { "epoch": 2.78, "learning_rate": 2.2382815139615725e-05, "loss": 4.0775, "step": 1479000 }, { "epoch": 2.78, "learning_rate": 2.22889621359442e-05, "loss": 4.0572, "step": 1479500 }, { "epoch": 2.78, "learning_rate": 2.2195109132272667e-05, "loss": 4.0755, "step": 1480000 }, { "epoch": 2.78, "learning_rate": 2.2101256128601137e-05, "loss": 4.0798, "step": 1480500 }, { "epoch": 2.78, "learning_rate": 2.2007403124929608e-05, "loss": 4.0777, "step": 1481000 }, { "epoch": 2.78, "learning_rate": 2.191355012125808e-05, "loss": 4.0596, "step": 1481500 }, { "epoch": 2.78, "learning_rate": 2.1819697117586546e-05, "loss": 4.0721, "step": 1482000 }, { "epoch": 2.78, "learning_rate": 2.172584411391502e-05, "loss": 4.0976, "step": 1482500 }, { "epoch": 2.78, "learning_rate": 2.163199111024349e-05, "loss": 4.0837, "step": 1483000 }, { "epoch": 2.78, "learning_rate": 2.153813810657196e-05, "loss": 4.0534, "step": 1483500 }, { "epoch": 2.79, "learning_rate": 2.1444285102900432e-05, "loss": 4.0872, "step": 1484000 }, { "epoch": 2.79, "learning_rate": 2.1350432099228902e-05, "loss": 4.0767, "step": 1484500 }, { "epoch": 2.79, "learning_rate": 2.1256579095557373e-05, "loss": 4.0758, "step": 1485000 }, { "epoch": 2.79, "learning_rate": 2.116272609188584e-05, "loss": 4.0661, "step": 1485500 }, { "epoch": 2.79, "learning_rate": 2.1068873088214314e-05, "loss": 4.0621, "step": 1486000 }, { "epoch": 2.79, "learning_rate": 2.097502008454278e-05, "loss": 4.0947, "step": 1486500 }, { "epoch": 2.79, "learning_rate": 2.0881167080871256e-05, "loss": 4.0672, "step": 1487000 }, { "epoch": 2.79, "learning_rate": 2.0787314077199726e-05, "loss": 4.086, "step": 1487500 }, { "epoch": 2.79, "learning_rate": 2.0693461073528194e-05, "loss": 4.0562, "step": 1488000 }, { "epoch": 2.79, "learning_rate": 2.0599608069856668e-05, "loss": 4.0736, "step": 1488500 }, { "epoch": 2.79, "learning_rate": 2.0505755066185135e-05, "loss": 4.0753, "step": 1489000 }, { "epoch": 2.8, "learning_rate": 2.041190206251361e-05, "loss": 4.0578, "step": 1489500 }, { "epoch": 2.8, "learning_rate": 2.0318049058842076e-05, "loss": 4.0664, "step": 1490000 }, { "epoch": 2.8, "learning_rate": 2.0224196055170547e-05, "loss": 4.0841, "step": 1490500 }, { "epoch": 2.8, "learning_rate": 2.013034305149902e-05, "loss": 4.0693, "step": 1491000 }, { "epoch": 2.8, "learning_rate": 2.0036490047827488e-05, "loss": 4.0514, "step": 1491500 }, { "epoch": 2.8, "learning_rate": 1.9942637044155962e-05, "loss": 4.0778, "step": 1492000 }, { "epoch": 2.8, "learning_rate": 1.984878404048443e-05, "loss": 4.0732, "step": 1492500 }, { "epoch": 2.8, "learning_rate": 1.97549310368129e-05, "loss": 4.0693, "step": 1493000 }, { "epoch": 2.8, "learning_rate": 1.966107803314137e-05, "loss": 4.0502, "step": 1493500 }, { "epoch": 2.8, "learning_rate": 1.956722502946984e-05, "loss": 4.0793, "step": 1494000 }, { "epoch": 2.81, "learning_rate": 1.9473372025798312e-05, "loss": 4.0909, "step": 1494500 }, { "epoch": 2.81, "learning_rate": 1.9379519022126783e-05, "loss": 4.0733, "step": 1495000 }, { "epoch": 2.81, "learning_rate": 1.9285666018455257e-05, "loss": 4.0509, "step": 1495500 }, { "epoch": 2.81, "learning_rate": 1.9191813014783724e-05, "loss": 4.0561, "step": 1496000 }, { "epoch": 2.81, "learning_rate": 1.9097960011112194e-05, "loss": 4.0527, "step": 1496500 }, { "epoch": 2.81, "learning_rate": 1.9004107007440665e-05, "loss": 4.0767, "step": 1497000 }, { "epoch": 2.81, "learning_rate": 1.8910254003769136e-05, "loss": 4.0684, "step": 1497500 }, { "epoch": 2.81, "learning_rate": 1.8816401000097603e-05, "loss": 4.0715, "step": 1498000 }, { "epoch": 2.81, "learning_rate": 1.8722547996426077e-05, "loss": 4.0734, "step": 1498500 }, { "epoch": 2.81, "learning_rate": 1.8628694992754548e-05, "loss": 4.0651, "step": 1499000 }, { "epoch": 2.81, "learning_rate": 1.8534841989083018e-05, "loss": 4.0526, "step": 1499500 }, { "epoch": 2.82, "learning_rate": 1.844098898541149e-05, "loss": 4.0517, "step": 1500000 }, { "epoch": 2.82, "learning_rate": 1.8347135981739956e-05, "loss": 4.0661, "step": 1500500 }, { "epoch": 2.82, "learning_rate": 1.8253282978068427e-05, "loss": 4.0654, "step": 1501000 }, { "epoch": 2.82, "learning_rate": 1.8159429974396897e-05, "loss": 4.0687, "step": 1501500 }, { "epoch": 2.82, "learning_rate": 1.8065576970725368e-05, "loss": 4.1042, "step": 1502000 }, { "epoch": 2.82, "learning_rate": 1.7971723967053842e-05, "loss": 4.0794, "step": 1502500 }, { "epoch": 2.82, "learning_rate": 1.7877870963382313e-05, "loss": 4.0609, "step": 1503000 }, { "epoch": 2.82, "learning_rate": 1.778401795971078e-05, "loss": 4.06, "step": 1503500 }, { "epoch": 2.82, "learning_rate": 1.769016495603925e-05, "loss": 4.0815, "step": 1504000 }, { "epoch": 2.82, "learning_rate": 1.759631195236772e-05, "loss": 4.0839, "step": 1504500 }, { "epoch": 2.82, "learning_rate": 1.7502458948696192e-05, "loss": 4.0673, "step": 1505000 }, { "epoch": 2.83, "learning_rate": 1.7408605945024663e-05, "loss": 4.0789, "step": 1505500 }, { "epoch": 2.83, "learning_rate": 1.7314752941353133e-05, "loss": 4.0882, "step": 1506000 }, { "epoch": 2.83, "learning_rate": 1.7220899937681604e-05, "loss": 4.0565, "step": 1506500 }, { "epoch": 2.83, "learning_rate": 1.7127046934010074e-05, "loss": 4.071, "step": 1507000 }, { "epoch": 2.83, "learning_rate": 1.7033193930338545e-05, "loss": 4.0723, "step": 1507500 }, { "epoch": 2.83, "learning_rate": 1.6939340926667016e-05, "loss": 4.0561, "step": 1508000 }, { "epoch": 2.83, "learning_rate": 1.6845487922995486e-05, "loss": 4.0665, "step": 1508500 }, { "epoch": 2.83, "learning_rate": 1.6751634919323957e-05, "loss": 4.0753, "step": 1509000 }, { "epoch": 2.83, "learning_rate": 1.6657781915652428e-05, "loss": 4.0621, "step": 1509500 }, { "epoch": 2.83, "learning_rate": 1.65639289119809e-05, "loss": 4.0514, "step": 1510000 }, { "epoch": 2.84, "learning_rate": 1.6470075908309366e-05, "loss": 4.0546, "step": 1510500 }, { "epoch": 2.84, "learning_rate": 1.637622290463784e-05, "loss": 4.0733, "step": 1511000 }, { "epoch": 2.84, "learning_rate": 1.628236990096631e-05, "loss": 4.057, "step": 1511500 }, { "epoch": 2.84, "learning_rate": 1.618851689729478e-05, "loss": 4.0731, "step": 1512000 }, { "epoch": 2.84, "learning_rate": 1.609466389362325e-05, "loss": 4.0748, "step": 1512500 }, { "epoch": 2.84, "learning_rate": 1.6000810889951722e-05, "loss": 4.0901, "step": 1513000 }, { "epoch": 2.84, "learning_rate": 1.590695788628019e-05, "loss": 4.0753, "step": 1513500 }, { "epoch": 2.84, "learning_rate": 1.581310488260866e-05, "loss": 4.0635, "step": 1514000 }, { "epoch": 2.84, "learning_rate": 1.571925187893713e-05, "loss": 4.0726, "step": 1514500 }, { "epoch": 2.84, "learning_rate": 1.56253988752656e-05, "loss": 4.0904, "step": 1515000 }, { "epoch": 2.84, "learning_rate": 1.5531545871594075e-05, "loss": 4.0687, "step": 1515500 }, { "epoch": 2.85, "learning_rate": 1.5437692867922546e-05, "loss": 4.0737, "step": 1516000 }, { "epoch": 2.85, "learning_rate": 1.5343839864251013e-05, "loss": 4.0753, "step": 1516500 }, { "epoch": 2.85, "learning_rate": 1.5249986860579486e-05, "loss": 4.0761, "step": 1517000 }, { "epoch": 2.85, "learning_rate": 1.5156133856907955e-05, "loss": 4.0725, "step": 1517500 }, { "epoch": 2.85, "learning_rate": 1.5062280853236425e-05, "loss": 4.0736, "step": 1518000 }, { "epoch": 2.85, "learning_rate": 1.4968427849564896e-05, "loss": 4.0662, "step": 1518500 }, { "epoch": 2.85, "learning_rate": 1.4874574845893366e-05, "loss": 4.0811, "step": 1519000 }, { "epoch": 2.85, "learning_rate": 1.4780721842221835e-05, "loss": 4.0701, "step": 1519500 }, { "epoch": 2.85, "learning_rate": 1.468686883855031e-05, "loss": 4.0552, "step": 1520000 }, { "epoch": 2.85, "learning_rate": 1.4593015834878778e-05, "loss": 4.0639, "step": 1520500 }, { "epoch": 2.86, "learning_rate": 1.4499162831207249e-05, "loss": 4.0573, "step": 1521000 }, { "epoch": 2.86, "learning_rate": 1.440530982753572e-05, "loss": 4.0606, "step": 1521500 }, { "epoch": 2.86, "learning_rate": 1.431145682386419e-05, "loss": 4.0827, "step": 1522000 }, { "epoch": 2.86, "learning_rate": 1.421760382019266e-05, "loss": 4.0573, "step": 1522500 }, { "epoch": 2.86, "learning_rate": 1.412375081652113e-05, "loss": 4.0568, "step": 1523000 }, { "epoch": 2.86, "learning_rate": 1.40298978128496e-05, "loss": 4.0633, "step": 1523500 }, { "epoch": 2.86, "learning_rate": 1.3936044809178071e-05, "loss": 4.0646, "step": 1524000 }, { "epoch": 2.86, "learning_rate": 1.3842191805506544e-05, "loss": 4.0796, "step": 1524500 }, { "epoch": 2.86, "learning_rate": 1.3748338801835014e-05, "loss": 4.0707, "step": 1525000 }, { "epoch": 2.86, "learning_rate": 1.3654485798163483e-05, "loss": 4.0444, "step": 1525500 }, { "epoch": 2.86, "learning_rate": 1.3560632794491954e-05, "loss": 4.0624, "step": 1526000 }, { "epoch": 2.87, "learning_rate": 1.3466779790820424e-05, "loss": 4.065, "step": 1526500 }, { "epoch": 2.87, "learning_rate": 1.3372926787148895e-05, "loss": 4.0685, "step": 1527000 }, { "epoch": 2.87, "learning_rate": 1.3279073783477364e-05, "loss": 4.0811, "step": 1527500 }, { "epoch": 2.87, "learning_rate": 1.3185220779805835e-05, "loss": 4.0686, "step": 1528000 }, { "epoch": 2.87, "learning_rate": 1.3091367776134305e-05, "loss": 4.0632, "step": 1528500 }, { "epoch": 2.87, "learning_rate": 1.2997514772462778e-05, "loss": 4.0716, "step": 1529000 }, { "epoch": 2.87, "learning_rate": 1.2903661768791248e-05, "loss": 4.0852, "step": 1529500 }, { "epoch": 2.87, "learning_rate": 1.2809808765119719e-05, "loss": 4.0707, "step": 1530000 }, { "epoch": 2.87, "learning_rate": 1.2715955761448188e-05, "loss": 4.0465, "step": 1530500 }, { "epoch": 2.87, "learning_rate": 1.2622102757776658e-05, "loss": 4.0381, "step": 1531000 }, { "epoch": 2.87, "learning_rate": 1.2528249754105129e-05, "loss": 4.0863, "step": 1531500 }, { "epoch": 2.88, "learning_rate": 1.24343967504336e-05, "loss": 4.0686, "step": 1532000 }, { "epoch": 2.88, "learning_rate": 1.2340543746762069e-05, "loss": 4.0502, "step": 1532500 }, { "epoch": 2.88, "learning_rate": 1.224669074309054e-05, "loss": 4.044, "step": 1533000 }, { "epoch": 2.88, "learning_rate": 1.2152837739419012e-05, "loss": 4.0523, "step": 1533500 }, { "epoch": 2.88, "learning_rate": 1.2058984735747482e-05, "loss": 4.0548, "step": 1534000 }, { "epoch": 2.88, "learning_rate": 1.1965131732075953e-05, "loss": 4.063, "step": 1534500 }, { "epoch": 2.88, "learning_rate": 1.1871278728404424e-05, "loss": 4.0483, "step": 1535000 }, { "epoch": 2.88, "learning_rate": 1.1777425724732893e-05, "loss": 4.0641, "step": 1535500 }, { "epoch": 2.88, "learning_rate": 1.1683572721061363e-05, "loss": 4.067, "step": 1536000 }, { "epoch": 2.88, "learning_rate": 1.1589719717389834e-05, "loss": 4.0757, "step": 1536500 }, { "epoch": 2.89, "learning_rate": 1.1495866713718304e-05, "loss": 4.0793, "step": 1537000 }, { "epoch": 2.89, "learning_rate": 1.1402013710046777e-05, "loss": 4.0673, "step": 1537500 }, { "epoch": 2.89, "learning_rate": 1.1308160706375247e-05, "loss": 4.0763, "step": 1538000 }, { "epoch": 2.89, "learning_rate": 1.1214307702703716e-05, "loss": 4.0714, "step": 1538500 }, { "epoch": 2.89, "learning_rate": 1.1120454699032187e-05, "loss": 4.0724, "step": 1539000 }, { "epoch": 2.89, "learning_rate": 1.1026601695360658e-05, "loss": 4.0746, "step": 1539500 }, { "epoch": 2.89, "learning_rate": 1.0932748691689128e-05, "loss": 4.0671, "step": 1540000 }, { "epoch": 2.89, "learning_rate": 1.0838895688017597e-05, "loss": 4.0652, "step": 1540500 }, { "epoch": 2.89, "learning_rate": 1.0745042684346068e-05, "loss": 4.0546, "step": 1541000 }, { "epoch": 2.89, "learning_rate": 1.0651189680674539e-05, "loss": 4.0641, "step": 1541500 }, { "epoch": 2.89, "learning_rate": 1.0557336677003011e-05, "loss": 4.0924, "step": 1542000 }, { "epoch": 2.9, "learning_rate": 1.0463483673331481e-05, "loss": 4.0365, "step": 1542500 }, { "epoch": 2.9, "learning_rate": 1.0369630669659952e-05, "loss": 4.0726, "step": 1543000 }, { "epoch": 2.9, "learning_rate": 1.0275777665988421e-05, "loss": 4.0716, "step": 1543500 }, { "epoch": 2.9, "learning_rate": 1.0181924662316892e-05, "loss": 4.0571, "step": 1544000 }, { "epoch": 2.9, "learning_rate": 1.0088071658645362e-05, "loss": 4.068, "step": 1544500 }, { "epoch": 2.9, "learning_rate": 9.994218654973833e-06, "loss": 4.0666, "step": 1545000 }, { "epoch": 2.9, "learning_rate": 9.900365651302302e-06, "loss": 4.0692, "step": 1545500 }, { "epoch": 2.9, "learning_rate": 9.806512647630773e-06, "loss": 4.0601, "step": 1546000 }, { "epoch": 2.9, "learning_rate": 9.712659643959245e-06, "loss": 4.0684, "step": 1546500 }, { "epoch": 2.9, "learning_rate": 9.618806640287716e-06, "loss": 4.0693, "step": 1547000 }, { "epoch": 2.9, "learning_rate": 9.524953636616186e-06, "loss": 4.0503, "step": 1547500 }, { "epoch": 2.91, "learning_rate": 9.431100632944657e-06, "loss": 4.0649, "step": 1548000 }, { "epoch": 2.91, "learning_rate": 9.337247629273126e-06, "loss": 4.0477, "step": 1548500 }, { "epoch": 2.91, "learning_rate": 9.243394625601596e-06, "loss": 4.0735, "step": 1549000 }, { "epoch": 2.91, "learning_rate": 9.149541621930067e-06, "loss": 4.0613, "step": 1549500 }, { "epoch": 2.91, "learning_rate": 9.055688618258538e-06, "loss": 4.0593, "step": 1550000 }, { "epoch": 2.91, "learning_rate": 8.961835614587008e-06, "loss": 4.0832, "step": 1550500 }, { "epoch": 2.91, "learning_rate": 8.867982610915479e-06, "loss": 4.0533, "step": 1551000 }, { "epoch": 2.91, "learning_rate": 8.77412960724395e-06, "loss": 4.0752, "step": 1551500 }, { "epoch": 2.91, "learning_rate": 8.68027660357242e-06, "loss": 4.0505, "step": 1552000 }, { "epoch": 2.91, "learning_rate": 8.586423599900891e-06, "loss": 4.0665, "step": 1552500 }, { "epoch": 2.92, "learning_rate": 8.492570596229362e-06, "loss": 4.0617, "step": 1553000 }, { "epoch": 2.92, "learning_rate": 8.39871759255783e-06, "loss": 4.0793, "step": 1553500 }, { "epoch": 2.92, "learning_rate": 8.304864588886301e-06, "loss": 4.0449, "step": 1554000 }, { "epoch": 2.92, "learning_rate": 8.211011585214773e-06, "loss": 4.0949, "step": 1554500 }, { "epoch": 2.92, "learning_rate": 8.117158581543242e-06, "loss": 4.0613, "step": 1555000 }, { "epoch": 2.92, "learning_rate": 8.023305577871713e-06, "loss": 4.0632, "step": 1555500 }, { "epoch": 2.92, "learning_rate": 7.929452574200184e-06, "loss": 4.0474, "step": 1556000 }, { "epoch": 2.92, "learning_rate": 7.835599570528654e-06, "loss": 4.0592, "step": 1556500 }, { "epoch": 2.92, "learning_rate": 7.741746566857125e-06, "loss": 4.0555, "step": 1557000 }, { "epoch": 2.92, "learning_rate": 7.647893563185596e-06, "loss": 4.0757, "step": 1557500 }, { "epoch": 2.92, "learning_rate": 7.5540405595140654e-06, "loss": 4.0636, "step": 1558000 }, { "epoch": 2.93, "learning_rate": 7.460187555842537e-06, "loss": 4.0772, "step": 1558500 }, { "epoch": 2.93, "learning_rate": 7.3663345521710076e-06, "loss": 4.0507, "step": 1559000 }, { "epoch": 2.93, "learning_rate": 7.272481548499477e-06, "loss": 4.062, "step": 1559500 }, { "epoch": 2.93, "learning_rate": 7.178628544827948e-06, "loss": 4.0709, "step": 1560000 }, { "epoch": 2.93, "learning_rate": 7.084775541156418e-06, "loss": 4.066, "step": 1560500 }, { "epoch": 2.93, "learning_rate": 6.990922537484889e-06, "loss": 4.0429, "step": 1561000 }, { "epoch": 2.93, "learning_rate": 6.89706953381336e-06, "loss": 4.0554, "step": 1561500 }, { "epoch": 2.93, "learning_rate": 6.80321653014183e-06, "loss": 4.0622, "step": 1562000 }, { "epoch": 2.93, "learning_rate": 6.7093635264703e-06, "loss": 4.0711, "step": 1562500 }, { "epoch": 2.93, "learning_rate": 6.615510522798772e-06, "loss": 4.0514, "step": 1563000 }, { "epoch": 2.93, "learning_rate": 6.521657519127242e-06, "loss": 4.0655, "step": 1563500 }, { "epoch": 2.94, "learning_rate": 6.427804515455712e-06, "loss": 4.0667, "step": 1564000 }, { "epoch": 2.94, "learning_rate": 6.333951511784182e-06, "loss": 4.0616, "step": 1564500 }, { "epoch": 2.94, "learning_rate": 6.240098508112653e-06, "loss": 4.069, "step": 1565000 }, { "epoch": 2.94, "learning_rate": 6.146245504441124e-06, "loss": 4.0408, "step": 1565500 }, { "epoch": 2.94, "learning_rate": 6.052392500769594e-06, "loss": 4.0799, "step": 1566000 }, { "epoch": 2.94, "learning_rate": 5.958539497098065e-06, "loss": 4.0542, "step": 1566500 }, { "epoch": 2.94, "learning_rate": 5.864686493426534e-06, "loss": 4.0523, "step": 1567000 }, { "epoch": 2.94, "learning_rate": 5.770833489755006e-06, "loss": 4.0604, "step": 1567500 }, { "epoch": 2.94, "learning_rate": 5.6769804860834765e-06, "loss": 4.0685, "step": 1568000 }, { "epoch": 2.94, "learning_rate": 5.583127482411946e-06, "loss": 4.0656, "step": 1568500 }, { "epoch": 2.95, "learning_rate": 5.489274478740417e-06, "loss": 4.0775, "step": 1569000 }, { "epoch": 2.95, "learning_rate": 5.395421475068887e-06, "loss": 4.0758, "step": 1569500 }, { "epoch": 2.95, "learning_rate": 5.301568471397358e-06, "loss": 4.0527, "step": 1570000 }, { "epoch": 2.95, "learning_rate": 5.207715467725829e-06, "loss": 4.0844, "step": 1570500 }, { "epoch": 2.95, "learning_rate": 5.113862464054299e-06, "loss": 4.0691, "step": 1571000 }, { "epoch": 2.95, "learning_rate": 5.020009460382769e-06, "loss": 4.0732, "step": 1571500 }, { "epoch": 2.95, "learning_rate": 4.926156456711241e-06, "loss": 4.0753, "step": 1572000 }, { "epoch": 2.95, "learning_rate": 4.832303453039711e-06, "loss": 4.07, "step": 1572500 }, { "epoch": 2.95, "learning_rate": 4.738450449368181e-06, "loss": 4.0587, "step": 1573000 }, { "epoch": 2.95, "learning_rate": 4.644597445696652e-06, "loss": 4.0684, "step": 1573500 }, { "epoch": 2.95, "learning_rate": 4.5507444420251225e-06, "loss": 4.0555, "step": 1574000 }, { "epoch": 2.96, "learning_rate": 4.456891438353592e-06, "loss": 4.046, "step": 1574500 }, { "epoch": 2.96, "learning_rate": 4.363038434682063e-06, "loss": 4.0417, "step": 1575000 }, { "epoch": 2.96, "learning_rate": 4.269185431010534e-06, "loss": 4.0441, "step": 1575500 }, { "epoch": 2.96, "learning_rate": 4.175332427339004e-06, "loss": 4.0691, "step": 1576000 }, { "epoch": 2.96, "learning_rate": 4.081479423667475e-06, "loss": 4.0547, "step": 1576500 }, { "epoch": 2.96, "learning_rate": 3.9876264199959455e-06, "loss": 4.08, "step": 1577000 }, { "epoch": 2.96, "learning_rate": 3.893773416324415e-06, "loss": 4.0607, "step": 1577500 }, { "epoch": 2.96, "learning_rate": 3.7999204126528864e-06, "loss": 4.063, "step": 1578000 }, { "epoch": 2.96, "learning_rate": 3.7060674089813566e-06, "loss": 4.068, "step": 1578500 }, { "epoch": 2.96, "learning_rate": 3.6122144053098273e-06, "loss": 4.0872, "step": 1579000 }, { "epoch": 2.96, "learning_rate": 3.518361401638298e-06, "loss": 4.0464, "step": 1579500 }, { "epoch": 2.97, "learning_rate": 3.424508397966768e-06, "loss": 4.068, "step": 1580000 }, { "epoch": 2.97, "learning_rate": 3.3306553942952387e-06, "loss": 4.0741, "step": 1580500 }, { "epoch": 2.97, "learning_rate": 3.236802390623709e-06, "loss": 4.0678, "step": 1581000 }, { "epoch": 2.97, "learning_rate": 3.14294938695218e-06, "loss": 4.0486, "step": 1581500 }, { "epoch": 2.97, "learning_rate": 3.0490963832806502e-06, "loss": 4.0615, "step": 1582000 }, { "epoch": 2.97, "learning_rate": 2.955243379609121e-06, "loss": 4.0524, "step": 1582500 }, { "epoch": 2.97, "learning_rate": 2.861390375937591e-06, "loss": 4.0626, "step": 1583000 }, { "epoch": 2.97, "learning_rate": 2.7675373722660617e-06, "loss": 4.0577, "step": 1583500 }, { "epoch": 2.97, "learning_rate": 2.6736843685945324e-06, "loss": 4.0545, "step": 1584000 }, { "epoch": 2.97, "learning_rate": 2.579831364923003e-06, "loss": 4.0658, "step": 1584500 }, { "epoch": 2.98, "learning_rate": 2.4859783612514732e-06, "loss": 4.0748, "step": 1585000 }, { "epoch": 2.98, "learning_rate": 2.3921253575799435e-06, "loss": 4.0625, "step": 1585500 }, { "epoch": 2.98, "learning_rate": 2.2982723539084145e-06, "loss": 4.0686, "step": 1586000 }, { "epoch": 2.98, "learning_rate": 2.2044193502368847e-06, "loss": 4.0765, "step": 1586500 }, { "epoch": 2.98, "learning_rate": 2.1105663465653554e-06, "loss": 4.0554, "step": 1587000 }, { "epoch": 2.98, "learning_rate": 2.0167133428938256e-06, "loss": 4.0595, "step": 1587500 }, { "epoch": 2.98, "learning_rate": 1.9228603392222962e-06, "loss": 4.0785, "step": 1588000 }, { "epoch": 2.98, "learning_rate": 1.8290073355507667e-06, "loss": 4.0654, "step": 1588500 }, { "epoch": 2.98, "learning_rate": 1.7351543318792373e-06, "loss": 4.0748, "step": 1589000 }, { "epoch": 2.98, "learning_rate": 1.6413013282077077e-06, "loss": 4.0668, "step": 1589500 }, { "epoch": 2.98, "learning_rate": 1.5474483245361784e-06, "loss": 4.0526, "step": 1590000 }, { "epoch": 2.99, "learning_rate": 1.4535953208646488e-06, "loss": 4.0478, "step": 1590500 }, { "epoch": 2.99, "learning_rate": 1.3597423171931194e-06, "loss": 4.0519, "step": 1591000 }, { "epoch": 2.99, "learning_rate": 1.2658893135215899e-06, "loss": 4.0559, "step": 1591500 }, { "epoch": 2.99, "learning_rate": 1.1720363098500605e-06, "loss": 4.0642, "step": 1592000 }, { "epoch": 2.99, "learning_rate": 1.0781833061785307e-06, "loss": 4.0688, "step": 1592500 }, { "epoch": 2.99, "learning_rate": 9.843303025070014e-07, "loss": 4.0684, "step": 1593000 }, { "epoch": 2.99, "learning_rate": 8.904772988354719e-07, "loss": 4.045, "step": 1593500 }, { "epoch": 2.99, "learning_rate": 7.966242951639423e-07, "loss": 4.0677, "step": 1594000 }, { "epoch": 2.99, "learning_rate": 7.027712914924129e-07, "loss": 4.0525, "step": 1594500 }, { "epoch": 2.99, "learning_rate": 6.089182878208833e-07, "loss": 4.0839, "step": 1595000 }, { "epoch": 2.99, "learning_rate": 5.150652841493538e-07, "loss": 4.0531, "step": 1595500 }, { "epoch": 3.0, "learning_rate": 4.2121228047782437e-07, "loss": 4.0731, "step": 1596000 }, { "epoch": 3.0, "learning_rate": 3.2735927680629485e-07, "loss": 4.0568, "step": 1596500 }, { "epoch": 3.0, "learning_rate": 2.3350627313476536e-07, "loss": 4.0647, "step": 1597000 }, { "epoch": 3.0, "learning_rate": 1.396532694632359e-07, "loss": 4.0598, "step": 1597500 }, { "epoch": 3.0, "learning_rate": 4.580026579170639e-08, "loss": 4.0552, "step": 1598000 }, { "epoch": 3.0, "step": 1598244, "total_flos": 5.01129636544512e+18, "train_loss": 0.7859680134460877, "train_runtime": 210975.8843, "train_samples_per_second": 45.453, "train_steps_per_second": 7.575 } ], "max_steps": 1598244, "num_train_epochs": 3, "total_flos": 5.01129636544512e+18, "trial_name": null, "trial_params": null }