diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,19201 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 3.0, + "global_step": 1598244, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.0002999061469963284, + "loss": 9.0229, + "step": 500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00029981229399265693, + "loss": 8.3193, + "step": 1000 + }, + { + "epoch": 0.0, + "learning_rate": 0.0002997184409889854, + "loss": 8.0421, + "step": 1500 + }, + { + "epoch": 0.0, + "learning_rate": 0.00029962458798531383, + "loss": 7.8282, + "step": 2000 + }, + { + "epoch": 0.0, + "learning_rate": 0.00029953073498164233, + "loss": 7.6413, + "step": 2500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002994368819779708, + "loss": 7.4768, + "step": 3000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029934302897429923, + "loss": 7.3366, + "step": 3500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029924917597062774, + "loss": 7.1968, + "step": 4000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002991553229669562, + "loss": 7.0463, + "step": 4500 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002990614699632847, + "loss": 6.9269, + "step": 5000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029896761695961314, + "loss": 6.796, + "step": 5500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029887376395594165, + "loss": 6.7006, + "step": 6000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0002987799109522701, + "loss": 6.5999, + "step": 6500 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029868605794859855, + "loss": 6.516, + "step": 7000 + }, + { + "epoch": 0.01, + "learning_rate": 0.00029859220494492705, + "loss": 6.4314, + "step": 7500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002984983519412555, + "loss": 6.3386, + "step": 8000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029840449893758395, + "loss": 6.2853, + "step": 8500 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029831064593391246, + "loss": 6.2242, + "step": 9000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002982167929302409, + "loss": 6.1438, + "step": 9500 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029812293992656936, + "loss": 6.1042, + "step": 10000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029802908692289786, + "loss": 6.0451, + "step": 10500 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002979352339192263, + "loss": 5.9868, + "step": 11000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029784138091555476, + "loss": 5.9375, + "step": 11500 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029774752791188327, + "loss": 5.896, + "step": 12000 + }, + { + "epoch": 0.02, + "learning_rate": 0.0002976536749082117, + "loss": 5.8403, + "step": 12500 + }, + { + "epoch": 0.02, + "learning_rate": 0.00029755982190454017, + "loss": 5.8008, + "step": 13000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002974659689008687, + "loss": 5.7554, + "step": 13500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002973721158971971, + "loss": 5.726, + "step": 14000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029727826289352563, + "loss": 5.6831, + "step": 14500 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002971844098898541, + "loss": 5.6498, + "step": 15000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002970905568861826, + "loss": 5.6124, + "step": 15500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029699670388251103, + "loss": 5.5981, + "step": 16000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029690285087883954, + "loss": 5.5783, + "step": 16500 + }, + { + "epoch": 0.03, + "learning_rate": 0.000296808997875168, + "loss": 5.543, + "step": 17000 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029671514487149644, + "loss": 5.5149, + "step": 17500 + }, + { + "epoch": 0.03, + "learning_rate": 0.00029662129186782494, + "loss": 5.4884, + "step": 18000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0002965274388641534, + "loss": 5.4705, + "step": 18500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029643358586048184, + "loss": 5.455, + "step": 19000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002963397328568103, + "loss": 5.4311, + "step": 19500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002962458798531388, + "loss": 5.4137, + "step": 20000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029615202684946725, + "loss": 5.3846, + "step": 20500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002960581738457957, + "loss": 5.3621, + "step": 21000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002959643208421242, + "loss": 5.3677, + "step": 21500 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029587046783845265, + "loss": 5.3455, + "step": 22000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029577661483478116, + "loss": 5.3252, + "step": 22500 + }, + { + "epoch": 0.04, + "learning_rate": 0.0002956827618311096, + "loss": 5.3115, + "step": 23000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00029558890882743806, + "loss": 5.3063, + "step": 23500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029549505582376656, + "loss": 5.2829, + "step": 24000 + }, + { + "epoch": 0.05, + "learning_rate": 0.000295401202820095, + "loss": 5.2593, + "step": 24500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002953073498164235, + "loss": 5.2641, + "step": 25000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029521349681275197, + "loss": 5.2502, + "step": 25500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002951196438090805, + "loss": 5.2255, + "step": 26000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002950257908054089, + "loss": 5.2052, + "step": 26500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002949319378017374, + "loss": 5.1995, + "step": 27000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002948380847980659, + "loss": 5.1872, + "step": 27500 + }, + { + "epoch": 0.05, + "learning_rate": 0.00029474423179439433, + "loss": 5.2049, + "step": 28000 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002946503787907228, + "loss": 5.1725, + "step": 28500 + }, + { + "epoch": 0.05, + "learning_rate": 0.0002945565257870513, + "loss": 5.176, + "step": 29000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029446267278337973, + "loss": 5.1581, + "step": 29500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002943688197797082, + "loss": 5.1469, + "step": 30000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029427496677603664, + "loss": 5.1353, + "step": 30500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029418111377236514, + "loss": 5.1279, + "step": 31000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002940872607686936, + "loss": 5.1085, + "step": 31500 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002939934077650221, + "loss": 5.1187, + "step": 32000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029389955476135055, + "loss": 5.0909, + "step": 32500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029380570175767905, + "loss": 5.0936, + "step": 33000 + }, + { + "epoch": 0.06, + "learning_rate": 0.0002937118487540075, + "loss": 5.0821, + "step": 33500 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029361799575033595, + "loss": 5.0849, + "step": 34000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00029352414274666445, + "loss": 5.0655, + "step": 34500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002934302897429929, + "loss": 5.0643, + "step": 35000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002933364367393214, + "loss": 5.0693, + "step": 35500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029324258373564986, + "loss": 5.0493, + "step": 36000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002931487307319783, + "loss": 5.0408, + "step": 36500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002930548777283068, + "loss": 5.0339, + "step": 37000 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029296102472463526, + "loss": 5.0291, + "step": 37500 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002928671717209637, + "loss": 5.0077, + "step": 38000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002927733187172922, + "loss": 5.0201, + "step": 38500 + }, + { + "epoch": 0.07, + "learning_rate": 0.00029267946571362067, + "loss": 5.0042, + "step": 39000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0002925856127099491, + "loss": 4.9954, + "step": 39500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002924917597062776, + "loss": 5.0108, + "step": 40000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002923979067026061, + "loss": 4.9867, + "step": 40500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002923040536989345, + "loss": 4.9779, + "step": 41000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029221020069526303, + "loss": 4.9828, + "step": 41500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002921163476915915, + "loss": 4.9588, + "step": 42000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029202249468792, + "loss": 4.9711, + "step": 42500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029192864168424844, + "loss": 4.9616, + "step": 43000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029183478868057694, + "loss": 4.9597, + "step": 43500 + }, + { + "epoch": 0.08, + "learning_rate": 0.0002917409356769054, + "loss": 4.9501, + "step": 44000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029164708267323384, + "loss": 4.9507, + "step": 44500 + }, + { + "epoch": 0.08, + "learning_rate": 0.00029155322966956234, + "loss": 4.9347, + "step": 45000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002914593766658908, + "loss": 4.9387, + "step": 45500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029136552366221925, + "loss": 4.9312, + "step": 46000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029127167065854775, + "loss": 4.9258, + "step": 46500 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002911778176548762, + "loss": 4.9162, + "step": 47000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029108396465120465, + "loss": 4.9197, + "step": 47500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029099011164753316, + "loss": 4.8996, + "step": 48000 + }, + { + "epoch": 0.09, + "learning_rate": 0.0002908962586438616, + "loss": 4.8886, + "step": 48500 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029080240564019006, + "loss": 4.9008, + "step": 49000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029070855263651856, + "loss": 4.9134, + "step": 49500 + }, + { + "epoch": 0.09, + "learning_rate": 0.000290614699632847, + "loss": 4.8817, + "step": 50000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00029052084662917546, + "loss": 4.8882, + "step": 50500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029042699362550397, + "loss": 4.8738, + "step": 51000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002903331406218324, + "loss": 4.8696, + "step": 51500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002902392876181609, + "loss": 4.8936, + "step": 52000 + }, + { + "epoch": 0.1, + "learning_rate": 0.00029014543461448937, + "loss": 4.8812, + "step": 52500 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002900515816108179, + "loss": 4.8556, + "step": 53000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002899577286071463, + "loss": 4.8639, + "step": 53500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00028986387560347483, + "loss": 4.8762, + "step": 54000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002897700225998033, + "loss": 4.8768, + "step": 54500 + }, + { + "epoch": 0.1, + "learning_rate": 0.00028967616959613173, + "loss": 4.8507, + "step": 55000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002895823165924602, + "loss": 4.8578, + "step": 55500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002894884635887887, + "loss": 4.8623, + "step": 56000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028939461058511714, + "loss": 4.8503, + "step": 56500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002893007575814456, + "loss": 4.83, + "step": 57000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002892069045777741, + "loss": 4.8259, + "step": 57500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028911305157410254, + "loss": 4.8262, + "step": 58000 + }, + { + "epoch": 0.11, + "learning_rate": 0.000289019198570431, + "loss": 4.8296, + "step": 58500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002889253455667595, + "loss": 4.8132, + "step": 59000 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028883149256308795, + "loss": 4.8128, + "step": 59500 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002887376395594164, + "loss": 4.8257, + "step": 60000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0002886437865557449, + "loss": 4.8056, + "step": 60500 + }, + { + "epoch": 0.11, + "learning_rate": 0.00028854993355207335, + "loss": 4.8002, + "step": 61000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028845608054840186, + "loss": 4.8176, + "step": 61500 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002883622275447303, + "loss": 4.8031, + "step": 62000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002882683745410588, + "loss": 4.7931, + "step": 62500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028817452153738726, + "loss": 4.7996, + "step": 63000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028808066853371577, + "loss": 4.7932, + "step": 63500 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002879868155300442, + "loss": 4.7942, + "step": 64000 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028789296252637267, + "loss": 4.7629, + "step": 64500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028779910952270117, + "loss": 4.7946, + "step": 65000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002877052565190296, + "loss": 4.7972, + "step": 65500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00028761140351535807, + "loss": 4.7747, + "step": 66000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0002875175505116866, + "loss": 4.7656, + "step": 66500 + }, + { + "epoch": 0.13, + "learning_rate": 0.000287423697508015, + "loss": 4.7701, + "step": 67000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002873298445043435, + "loss": 4.7587, + "step": 67500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002872359915006719, + "loss": 4.7727, + "step": 68000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028714213849700043, + "loss": 4.7734, + "step": 68500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002870482854933289, + "loss": 4.7839, + "step": 69000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002869544324896574, + "loss": 4.7712, + "step": 69500 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028686057948598584, + "loss": 4.7754, + "step": 70000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002867667264823143, + "loss": 4.7686, + "step": 70500 + }, + { + "epoch": 0.13, + "learning_rate": 0.0002866728734786428, + "loss": 4.7434, + "step": 71000 + }, + { + "epoch": 0.13, + "learning_rate": 0.00028657902047497124, + "loss": 4.7537, + "step": 71500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028648516747129975, + "loss": 4.7555, + "step": 72000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002863913144676282, + "loss": 4.7473, + "step": 72500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002862974614639567, + "loss": 4.7394, + "step": 73000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028620360846028515, + "loss": 4.7185, + "step": 73500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002861097554566136, + "loss": 4.7267, + "step": 74000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002860159024529421, + "loss": 4.7256, + "step": 74500 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028592204944927056, + "loss": 4.7324, + "step": 75000 + }, + { + "epoch": 0.14, + "learning_rate": 0.000285828196445599, + "loss": 4.7389, + "step": 75500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002857343434419275, + "loss": 4.7295, + "step": 76000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00028564049043825596, + "loss": 4.7122, + "step": 76500 + }, + { + "epoch": 0.14, + "learning_rate": 0.0002855466374345844, + "loss": 4.7236, + "step": 77000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002854527844309129, + "loss": 4.7286, + "step": 77500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028535893142724137, + "loss": 4.7192, + "step": 78000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002852650784235698, + "loss": 4.7253, + "step": 78500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002851712254198983, + "loss": 4.7103, + "step": 79000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028507737241622677, + "loss": 4.7163, + "step": 79500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002849835194125553, + "loss": 4.7043, + "step": 80000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002848896664088837, + "loss": 4.6942, + "step": 80500 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002847958134052122, + "loss": 4.7068, + "step": 81000 + }, + { + "epoch": 0.15, + "learning_rate": 0.0002847019604015407, + "loss": 4.7219, + "step": 81500 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028460810739786913, + "loss": 4.6885, + "step": 82000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00028451425439419764, + "loss": 4.7104, + "step": 82500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002844204013905261, + "loss": 4.7067, + "step": 83000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028432654838685454, + "loss": 4.7005, + "step": 83500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028423269538318304, + "loss": 4.6733, + "step": 84000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002841388423795115, + "loss": 4.6877, + "step": 84500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028404498937583994, + "loss": 4.6818, + "step": 85000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028395113637216845, + "loss": 4.6662, + "step": 85500 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002838572833684969, + "loss": 4.6931, + "step": 86000 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028376343036482535, + "loss": 4.6788, + "step": 86500 + }, + { + "epoch": 0.16, + "learning_rate": 0.00028366957736115385, + "loss": 4.6833, + "step": 87000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0002835757243574823, + "loss": 4.6715, + "step": 87500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028348187135381075, + "loss": 4.6749, + "step": 88000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028338801835013926, + "loss": 4.6802, + "step": 88500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002832941653464677, + "loss": 4.6582, + "step": 89000 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002832003123427962, + "loss": 4.6841, + "step": 89500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028310645933912466, + "loss": 4.6766, + "step": 90000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028301260633545317, + "loss": 4.6729, + "step": 90500 + }, + { + "epoch": 0.17, + "learning_rate": 0.0002829187533317816, + "loss": 4.6545, + "step": 91000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028282490032811007, + "loss": 4.668, + "step": 91500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028273104732443857, + "loss": 4.6747, + "step": 92000 + }, + { + "epoch": 0.17, + "learning_rate": 0.000282637194320767, + "loss": 4.6719, + "step": 92500 + }, + { + "epoch": 0.17, + "learning_rate": 0.00028254334131709547, + "loss": 4.642, + "step": 93000 + }, + { + "epoch": 0.18, + "learning_rate": 0.000282449488313424, + "loss": 4.6562, + "step": 93500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002823556353097524, + "loss": 4.6628, + "step": 94000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002822617823060809, + "loss": 4.6466, + "step": 94500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002821679293024094, + "loss": 4.6609, + "step": 95000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028207407629873783, + "loss": 4.6409, + "step": 95500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002819802232950663, + "loss": 4.6591, + "step": 96000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002818863702913948, + "loss": 4.6383, + "step": 96500 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028179251728772324, + "loss": 4.6406, + "step": 97000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002816986642840517, + "loss": 4.6608, + "step": 97500 + }, + { + "epoch": 0.18, + "learning_rate": 0.0002816048112803802, + "loss": 4.6444, + "step": 98000 + }, + { + "epoch": 0.18, + "learning_rate": 0.00028151095827670864, + "loss": 4.6573, + "step": 98500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028141710527303715, + "loss": 4.646, + "step": 99000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002813232522693656, + "loss": 4.647, + "step": 99500 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002812293992656941, + "loss": 4.6531, + "step": 100000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028113554626202255, + "loss": 4.6351, + "step": 100500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028104169325835106, + "loss": 4.6135, + "step": 101000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002809478402546795, + "loss": 4.6253, + "step": 101500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028085398725100796, + "loss": 4.6441, + "step": 102000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028076013424733646, + "loss": 4.6277, + "step": 102500 + }, + { + "epoch": 0.19, + "learning_rate": 0.0002806662812436649, + "loss": 4.622, + "step": 103000 + }, + { + "epoch": 0.19, + "learning_rate": 0.00028057242823999336, + "loss": 4.6247, + "step": 103500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002804785752363218, + "loss": 4.6346, + "step": 104000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002803847222326503, + "loss": 4.6262, + "step": 104500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028029086922897877, + "loss": 4.6191, + "step": 105000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002801970162253072, + "loss": 4.6266, + "step": 105500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002801031632216357, + "loss": 4.6181, + "step": 106000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00028000931021796417, + "loss": 4.6199, + "step": 106500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002799154572142926, + "loss": 4.6123, + "step": 107000 + }, + { + "epoch": 0.2, + "learning_rate": 0.00027982160421062113, + "loss": 4.6061, + "step": 107500 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002797277512069496, + "loss": 4.6127, + "step": 108000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0002796338982032781, + "loss": 4.6098, + "step": 108500 + }, + { + "epoch": 0.2, + "learning_rate": 0.00027954004519960653, + "loss": 4.5959, + "step": 109000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027944619219593504, + "loss": 4.6217, + "step": 109500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002793523391922635, + "loss": 4.6196, + "step": 110000 + }, + { + "epoch": 0.21, + "learning_rate": 0.000279258486188592, + "loss": 4.613, + "step": 110500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027916463318492044, + "loss": 4.5969, + "step": 111000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002790707801812489, + "loss": 4.5849, + "step": 111500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002789769271775774, + "loss": 4.5936, + "step": 112000 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027888307417390585, + "loss": 4.5909, + "step": 112500 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002787892211702343, + "loss": 4.5804, + "step": 113000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002786953681665628, + "loss": 4.5991, + "step": 113500 + }, + { + "epoch": 0.21, + "learning_rate": 0.00027860151516289125, + "loss": 4.5796, + "step": 114000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0002785076621592197, + "loss": 4.6, + "step": 114500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027841380915554815, + "loss": 4.5904, + "step": 115000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027831995615187666, + "loss": 4.6135, + "step": 115500 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002782261031482051, + "loss": 4.5849, + "step": 116000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002781322501445336, + "loss": 4.5934, + "step": 116500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027803839714086206, + "loss": 4.5789, + "step": 117000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002779445441371905, + "loss": 4.6019, + "step": 117500 + }, + { + "epoch": 0.22, + "learning_rate": 0.000277850691133519, + "loss": 4.5784, + "step": 118000 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027775683812984747, + "loss": 4.5894, + "step": 118500 + }, + { + "epoch": 0.22, + "learning_rate": 0.00027766298512617597, + "loss": 4.5661, + "step": 119000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0002775691321225044, + "loss": 4.5731, + "step": 119500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027747527911883293, + "loss": 4.5879, + "step": 120000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002773814261151614, + "loss": 4.5668, + "step": 120500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027728757311148983, + "loss": 4.5854, + "step": 121000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027719372010781833, + "loss": 4.5802, + "step": 121500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002770998671041468, + "loss": 4.5791, + "step": 122000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027700601410047523, + "loss": 4.5802, + "step": 122500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027691216109680374, + "loss": 4.5772, + "step": 123000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002768183080931322, + "loss": 4.5704, + "step": 123500 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027672445508946064, + "loss": 4.5865, + "step": 124000 + }, + { + "epoch": 0.23, + "learning_rate": 0.00027663060208578914, + "loss": 4.5698, + "step": 124500 + }, + { + "epoch": 0.23, + "learning_rate": 0.0002765367490821176, + "loss": 4.58, + "step": 125000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027644289607844604, + "loss": 4.5589, + "step": 125500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027634904307477455, + "loss": 4.557, + "step": 126000 + }, + { + "epoch": 0.24, + "learning_rate": 0.000276255190071103, + "loss": 4.5452, + "step": 126500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002761613370674315, + "loss": 4.5473, + "step": 127000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027606748406375995, + "loss": 4.5498, + "step": 127500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002759736310600884, + "loss": 4.5575, + "step": 128000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002758797780564169, + "loss": 4.5452, + "step": 128500 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027578592505274536, + "loss": 4.5502, + "step": 129000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027569207204907386, + "loss": 4.5673, + "step": 129500 + }, + { + "epoch": 0.24, + "learning_rate": 0.0002755982190454023, + "loss": 4.5635, + "step": 130000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00027550436604173076, + "loss": 4.556, + "step": 130500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027541051303805927, + "loss": 4.5591, + "step": 131000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002753166600343877, + "loss": 4.5233, + "step": 131500 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027522280703071617, + "loss": 4.5415, + "step": 132000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002751289540270447, + "loss": 4.5755, + "step": 132500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002750351010233731, + "loss": 4.5587, + "step": 133000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002749412480197016, + "loss": 4.5436, + "step": 133500 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002748473950160301, + "loss": 4.5312, + "step": 134000 + }, + { + "epoch": 0.25, + "learning_rate": 0.00027475354201235853, + "loss": 4.562, + "step": 134500 + }, + { + "epoch": 0.25, + "learning_rate": 0.000274659689008687, + "loss": 4.5392, + "step": 135000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0002745658360050155, + "loss": 4.5207, + "step": 135500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027447198300134393, + "loss": 4.5521, + "step": 136000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027437812999767244, + "loss": 4.5322, + "step": 136500 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002742842769940009, + "loss": 4.5305, + "step": 137000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002741904239903294, + "loss": 4.5166, + "step": 137500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027409657098665784, + "loss": 4.5248, + "step": 138000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002740027179829863, + "loss": 4.5413, + "step": 138500 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002739088649793148, + "loss": 4.5441, + "step": 139000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027381501197564325, + "loss": 4.5325, + "step": 139500 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002737211589719717, + "loss": 4.5529, + "step": 140000 + }, + { + "epoch": 0.26, + "learning_rate": 0.0002736273059683002, + "loss": 4.5275, + "step": 140500 + }, + { + "epoch": 0.26, + "learning_rate": 0.00027353345296462865, + "loss": 4.543, + "step": 141000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002734395999609571, + "loss": 4.5283, + "step": 141500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002733457469572856, + "loss": 4.5547, + "step": 142000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027325189395361406, + "loss": 4.523, + "step": 142500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002731580409499425, + "loss": 4.5202, + "step": 143000 + }, + { + "epoch": 0.27, + "learning_rate": 0.000273064187946271, + "loss": 4.5235, + "step": 143500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027297033494259946, + "loss": 4.5292, + "step": 144000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002728764819389279, + "loss": 4.5304, + "step": 144500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002727826289352564, + "loss": 4.5203, + "step": 145000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00027268877593158487, + "loss": 4.5426, + "step": 145500 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002725949229279134, + "loss": 4.5062, + "step": 146000 + }, + { + "epoch": 0.27, + "learning_rate": 0.0002725010699242418, + "loss": 4.5218, + "step": 146500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027240721692057033, + "loss": 4.5159, + "step": 147000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002723133639168988, + "loss": 4.5134, + "step": 147500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002722195109132273, + "loss": 4.5127, + "step": 148000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027212565790955573, + "loss": 4.5214, + "step": 148500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002720318049058842, + "loss": 4.5216, + "step": 149000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002719379519022127, + "loss": 4.5274, + "step": 149500 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027184409889854114, + "loss": 4.5214, + "step": 150000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002717502458948696, + "loss": 4.5199, + "step": 150500 + }, + { + "epoch": 0.28, + "learning_rate": 0.0002716563928911981, + "loss": 4.498, + "step": 151000 + }, + { + "epoch": 0.28, + "learning_rate": 0.00027156253988752654, + "loss": 4.5243, + "step": 151500 + }, + { + "epoch": 0.29, + "learning_rate": 0.000271468686883855, + "loss": 4.5082, + "step": 152000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027137483388018344, + "loss": 4.4959, + "step": 152500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027128098087651195, + "loss": 4.4927, + "step": 153000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002711871278728404, + "loss": 4.4983, + "step": 153500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027109327486916885, + "loss": 4.5191, + "step": 154000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027099942186549735, + "loss": 4.4982, + "step": 154500 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002709055688618258, + "loss": 4.5146, + "step": 155000 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002708117158581543, + "loss": 4.5323, + "step": 155500 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027071786285448276, + "loss": 4.4845, + "step": 156000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00027062400985081126, + "loss": 4.5019, + "step": 156500 + }, + { + "epoch": 0.29, + "learning_rate": 0.0002705301568471397, + "loss": 4.5051, + "step": 157000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002704363038434682, + "loss": 4.4981, + "step": 157500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00027034245083979667, + "loss": 4.5029, + "step": 158000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002702485978361251, + "loss": 4.4943, + "step": 158500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002701547448324536, + "loss": 4.5068, + "step": 159000 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002700608918287821, + "loss": 4.5022, + "step": 159500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002699670388251105, + "loss": 4.5088, + "step": 160000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00026987318582143903, + "loss": 4.4995, + "step": 160500 + }, + { + "epoch": 0.3, + "learning_rate": 0.0002697793328177675, + "loss": 4.482, + "step": 161000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00026968547981409593, + "loss": 4.498, + "step": 161500 + }, + { + "epoch": 0.3, + "learning_rate": 0.00026959162681042443, + "loss": 4.4966, + "step": 162000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002694977738067529, + "loss": 4.4759, + "step": 162500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026940392080308133, + "loss": 4.4983, + "step": 163000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002693100677994098, + "loss": 4.501, + "step": 163500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002692162147957383, + "loss": 4.493, + "step": 164000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026912236179206674, + "loss": 4.5082, + "step": 164500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026902850878839524, + "loss": 4.4999, + "step": 165000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002689346557847237, + "loss": 4.499, + "step": 165500 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002688408027810522, + "loss": 4.4951, + "step": 166000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026874694977738065, + "loss": 4.4889, + "step": 166500 + }, + { + "epoch": 0.31, + "learning_rate": 0.00026865309677370915, + "loss": 4.4975, + "step": 167000 + }, + { + "epoch": 0.31, + "learning_rate": 0.0002685592437700376, + "loss": 4.4951, + "step": 167500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026846539076636605, + "loss": 4.4833, + "step": 168000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026837153776269456, + "loss": 4.4934, + "step": 168500 + }, + { + "epoch": 0.32, + "learning_rate": 0.000268277684759023, + "loss": 4.4715, + "step": 169000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026818383175535146, + "loss": 4.4913, + "step": 169500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026808997875167996, + "loss": 4.4812, + "step": 170000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002679961257480084, + "loss": 4.4829, + "step": 170500 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026790227274433687, + "loss": 4.4816, + "step": 171000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026780841974066537, + "loss": 4.4943, + "step": 171500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002677145667369938, + "loss": 4.4927, + "step": 172000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00026762071373332227, + "loss": 4.4701, + "step": 172500 + }, + { + "epoch": 0.32, + "learning_rate": 0.0002675268607296508, + "loss": 4.4765, + "step": 173000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002674330077259792, + "loss": 4.4852, + "step": 173500 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002673391547223077, + "loss": 4.4734, + "step": 174000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002672453017186362, + "loss": 4.487, + "step": 174500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026715144871496463, + "loss": 4.4866, + "step": 175000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026705759571129313, + "loss": 4.499, + "step": 175500 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002669637427076216, + "loss": 4.4848, + "step": 176000 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002668698897039501, + "loss": 4.4732, + "step": 176500 + }, + { + "epoch": 0.33, + "learning_rate": 0.00026677603670027854, + "loss": 4.4708, + "step": 177000 + }, + { + "epoch": 0.33, + "learning_rate": 0.000266682183696607, + "loss": 4.4751, + "step": 177500 + }, + { + "epoch": 0.33, + "learning_rate": 0.0002665883306929355, + "loss": 4.4744, + "step": 178000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026649447768926394, + "loss": 4.4781, + "step": 178500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002664006246855924, + "loss": 4.4573, + "step": 179000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002663067716819209, + "loss": 4.4974, + "step": 179500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026621291867824935, + "loss": 4.482, + "step": 180000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002661190656745778, + "loss": 4.4881, + "step": 180500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002660252126709063, + "loss": 4.4581, + "step": 181000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026593135966723476, + "loss": 4.4804, + "step": 181500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002658375066635632, + "loss": 4.4607, + "step": 182000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0002657436536598917, + "loss": 4.4511, + "step": 182500 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026564980065622016, + "loss": 4.4807, + "step": 183000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00026555594765254866, + "loss": 4.455, + "step": 183500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002654620946488771, + "loss": 4.4794, + "step": 184000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026536824164520557, + "loss": 4.4458, + "step": 184500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026527438864153407, + "loss": 4.4586, + "step": 185000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002651805356378625, + "loss": 4.455, + "step": 185500 + }, + { + "epoch": 0.35, + "learning_rate": 0.000265086682634191, + "loss": 4.4608, + "step": 186000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002649928296305195, + "loss": 4.4719, + "step": 186500 + }, + { + "epoch": 0.35, + "learning_rate": 0.000264898976626848, + "loss": 4.4616, + "step": 187000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026480512362317643, + "loss": 4.4581, + "step": 187500 + }, + { + "epoch": 0.35, + "learning_rate": 0.0002647112706195049, + "loss": 4.4627, + "step": 188000 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026461741761583333, + "loss": 4.4613, + "step": 188500 + }, + { + "epoch": 0.35, + "learning_rate": 0.00026452356461216184, + "loss": 4.4615, + "step": 189000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002644297116084903, + "loss": 4.4574, + "step": 189500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026433585860481874, + "loss": 4.4601, + "step": 190000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026424200560114724, + "loss": 4.4783, + "step": 190500 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002641481525974757, + "loss": 4.4601, + "step": 191000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026405429959380414, + "loss": 4.4601, + "step": 191500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026396044659013265, + "loss": 4.4616, + "step": 192000 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002638665935864611, + "loss": 4.4425, + "step": 192500 + }, + { + "epoch": 0.36, + "learning_rate": 0.0002637727405827896, + "loss": 4.454, + "step": 193000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026367888757911805, + "loss": 4.442, + "step": 193500 + }, + { + "epoch": 0.36, + "learning_rate": 0.00026358503457544656, + "loss": 4.4526, + "step": 194000 + }, + { + "epoch": 0.37, + "learning_rate": 0.000263491181571775, + "loss": 4.4622, + "step": 194500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026339732856810346, + "loss": 4.4352, + "step": 195000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026330347556443196, + "loss": 4.4377, + "step": 195500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002632096225607604, + "loss": 4.457, + "step": 196000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002631157695570889, + "loss": 4.4503, + "step": 196500 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026302191655341737, + "loss": 4.4549, + "step": 197000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002629280635497458, + "loss": 4.4328, + "step": 197500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002628342105460743, + "loss": 4.4321, + "step": 198000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00026274035754240277, + "loss": 4.4529, + "step": 198500 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002626465045387312, + "loss": 4.4408, + "step": 199000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0002625526515350597, + "loss": 4.4626, + "step": 199500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002624587985313882, + "loss": 4.4443, + "step": 200000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002623649455277166, + "loss": 4.4406, + "step": 200500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002622710925240451, + "loss": 4.4512, + "step": 201000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002621772395203736, + "loss": 4.452, + "step": 201500 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026208338651670203, + "loss": 4.4217, + "step": 202000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026198953351303054, + "loss": 4.4309, + "step": 202500 + }, + { + "epoch": 0.38, + "learning_rate": 0.000261895680509359, + "loss": 4.4316, + "step": 203000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002618018275056875, + "loss": 4.4368, + "step": 203500 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026170797450201594, + "loss": 4.444, + "step": 204000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00026161412149834445, + "loss": 4.4473, + "step": 204500 + }, + { + "epoch": 0.38, + "learning_rate": 0.0002615202684946729, + "loss": 4.4358, + "step": 205000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026142641549100135, + "loss": 4.4415, + "step": 205500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026133256248732985, + "loss": 4.449, + "step": 206000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002612387094836583, + "loss": 4.4278, + "step": 206500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026114485647998675, + "loss": 4.4475, + "step": 207000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026105100347631526, + "loss": 4.4435, + "step": 207500 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002609571504726437, + "loss": 4.4337, + "step": 208000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026086329746897216, + "loss": 4.4399, + "step": 208500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026076944446530066, + "loss": 4.4535, + "step": 209000 + }, + { + "epoch": 0.39, + "learning_rate": 0.0002606755914616291, + "loss": 4.4317, + "step": 209500 + }, + { + "epoch": 0.39, + "learning_rate": 0.00026058173845795756, + "loss": 4.4487, + "step": 210000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026048788545428607, + "loss": 4.4395, + "step": 210500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002603940324506145, + "loss": 4.4324, + "step": 211000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026030017944694297, + "loss": 4.4298, + "step": 211500 + }, + { + "epoch": 0.4, + "learning_rate": 0.00026020632644327147, + "loss": 4.4465, + "step": 212000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002601124734395999, + "loss": 4.4077, + "step": 212500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002600186204359284, + "loss": 4.4179, + "step": 213000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002599247674322569, + "loss": 4.4179, + "step": 213500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002598309144285854, + "loss": 4.4445, + "step": 214000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00025973706142491383, + "loss": 4.4384, + "step": 214500 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002596432084212423, + "loss": 4.414, + "step": 215000 + }, + { + "epoch": 0.4, + "learning_rate": 0.0002595493554175708, + "loss": 4.4242, + "step": 215500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025945550241389924, + "loss": 4.4212, + "step": 216000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002593616494102277, + "loss": 4.4517, + "step": 216500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002592677964065562, + "loss": 4.4202, + "step": 217000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025917394340288464, + "loss": 4.419, + "step": 217500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002590800903992131, + "loss": 4.4212, + "step": 218000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002589862373955416, + "loss": 4.4205, + "step": 218500 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025889238439187005, + "loss": 4.4269, + "step": 219000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002587985313881985, + "loss": 4.4234, + "step": 219500 + }, + { + "epoch": 0.41, + "learning_rate": 0.000258704678384527, + "loss": 4.4394, + "step": 220000 + }, + { + "epoch": 0.41, + "learning_rate": 0.00025861082538085545, + "loss": 4.448, + "step": 220500 + }, + { + "epoch": 0.41, + "learning_rate": 0.0002585169723771839, + "loss": 4.4311, + "step": 221000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002584231193735124, + "loss": 4.4127, + "step": 221500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025832926636984086, + "loss": 4.4226, + "step": 222000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025823541336616936, + "loss": 4.4315, + "step": 222500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002581415603624978, + "loss": 4.4174, + "step": 223000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002580477073588263, + "loss": 4.408, + "step": 223500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025795385435515477, + "loss": 4.4174, + "step": 224000 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002578600013514832, + "loss": 4.4194, + "step": 224500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002577661483478117, + "loss": 4.4232, + "step": 225000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00025767229534414017, + "loss": 4.4008, + "step": 225500 + }, + { + "epoch": 0.42, + "learning_rate": 0.0002575784423404686, + "loss": 4.4126, + "step": 226000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002574845893367971, + "loss": 4.419, + "step": 226500 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002573907363331256, + "loss": 4.4076, + "step": 227000 + }, + { + "epoch": 0.43, + "learning_rate": 0.000257296883329454, + "loss": 4.4135, + "step": 227500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025720303032578253, + "loss": 4.4165, + "step": 228000 + }, + { + "epoch": 0.43, + "learning_rate": 0.000257109177322111, + "loss": 4.3921, + "step": 228500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025701532431843943, + "loss": 4.4044, + "step": 229000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025692147131476794, + "loss": 4.4253, + "step": 229500 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002568276183110964, + "loss": 4.3979, + "step": 230000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002567337653074249, + "loss": 4.4103, + "step": 230500 + }, + { + "epoch": 0.43, + "learning_rate": 0.00025663991230375334, + "loss": 4.4104, + "step": 231000 + }, + { + "epoch": 0.43, + "learning_rate": 0.0002565460593000818, + "loss": 4.4139, + "step": 231500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002564522062964103, + "loss": 4.4317, + "step": 232000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025635835329273875, + "loss": 4.4092, + "step": 232500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025626450028906725, + "loss": 4.4138, + "step": 233000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002561706472853957, + "loss": 4.4008, + "step": 233500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002560767942817242, + "loss": 4.4074, + "step": 234000 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025598294127805266, + "loss": 4.4017, + "step": 234500 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002558890882743811, + "loss": 4.3993, + "step": 235000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002557952352707096, + "loss": 4.3899, + "step": 235500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025570138226703806, + "loss": 4.411, + "step": 236000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0002556075292633665, + "loss": 4.4277, + "step": 236500 + }, + { + "epoch": 0.44, + "learning_rate": 0.00025551367625969496, + "loss": 4.3965, + "step": 237000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025541982325602347, + "loss": 4.4047, + "step": 237500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002553259702523519, + "loss": 4.4081, + "step": 238000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025523211724868037, + "loss": 4.4082, + "step": 238500 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025513826424500887, + "loss": 4.397, + "step": 239000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002550444112413373, + "loss": 4.3929, + "step": 239500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002549505582376658, + "loss": 4.4188, + "step": 240000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002548567052339943, + "loss": 4.3988, + "step": 240500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002547628522303228, + "loss": 4.4008, + "step": 241000 + }, + { + "epoch": 0.45, + "learning_rate": 0.00025466899922665123, + "loss": 4.4084, + "step": 241500 + }, + { + "epoch": 0.45, + "learning_rate": 0.0002545751462229797, + "loss": 4.4074, + "step": 242000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002544812932193082, + "loss": 4.4194, + "step": 242500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025438744021563664, + "loss": 4.4067, + "step": 243000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025429358721196514, + "loss": 4.4004, + "step": 243500 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002541997342082936, + "loss": 4.4099, + "step": 244000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025410588120462204, + "loss": 4.4007, + "step": 244500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025401202820095055, + "loss": 4.3855, + "step": 245000 + }, + { + "epoch": 0.46, + "learning_rate": 0.000253918175197279, + "loss": 4.3895, + "step": 245500 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025382432219360745, + "loss": 4.4021, + "step": 246000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025373046918993595, + "loss": 4.4017, + "step": 246500 + }, + { + "epoch": 0.46, + "learning_rate": 0.0002536366161862644, + "loss": 4.3945, + "step": 247000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00025354276318259285, + "loss": 4.4167, + "step": 247500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025344891017892136, + "loss": 4.3858, + "step": 248000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002533550571752498, + "loss": 4.385, + "step": 248500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025326120417157826, + "loss": 4.3828, + "step": 249000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025316735116790676, + "loss": 4.3942, + "step": 249500 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002530734981642352, + "loss": 4.4001, + "step": 250000 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002529796451605637, + "loss": 4.3929, + "step": 250500 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025288579215689217, + "loss": 4.4087, + "step": 251000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025279193915322067, + "loss": 4.389, + "step": 251500 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002526980861495491, + "loss": 4.3932, + "step": 252000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00025260423314587757, + "loss": 4.403, + "step": 252500 + }, + { + "epoch": 0.47, + "learning_rate": 0.0002525103801422061, + "loss": 4.3735, + "step": 253000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025241652713853453, + "loss": 4.3942, + "step": 253500 + }, + { + "epoch": 0.48, + "learning_rate": 0.000252322674134863, + "loss": 4.3777, + "step": 254000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002522288211311915, + "loss": 4.3981, + "step": 254500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025213496812751993, + "loss": 4.3964, + "step": 255000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002520411151238484, + "loss": 4.3924, + "step": 255500 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002519472621201769, + "loss": 4.4021, + "step": 256000 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025185340911650534, + "loss": 4.3905, + "step": 256500 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002517595561128338, + "loss": 4.3912, + "step": 257000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0002516657031091623, + "loss": 4.3911, + "step": 257500 + }, + { + "epoch": 0.48, + "learning_rate": 0.00025157185010549074, + "loss": 4.384, + "step": 258000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002514779971018192, + "loss": 4.3818, + "step": 258500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002513841440981477, + "loss": 4.3821, + "step": 259000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025129029109447615, + "loss": 4.3911, + "step": 259500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025119643809080465, + "loss": 4.3689, + "step": 260000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002511025850871331, + "loss": 4.3823, + "step": 260500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002510087320834616, + "loss": 4.383, + "step": 261000 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025091487907979006, + "loss": 4.3888, + "step": 261500 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002508210260761185, + "loss": 4.3815, + "step": 262000 + }, + { + "epoch": 0.49, + "learning_rate": 0.000250727173072447, + "loss": 4.3687, + "step": 262500 + }, + { + "epoch": 0.49, + "learning_rate": 0.00025063332006877546, + "loss": 4.3823, + "step": 263000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0002505394670651039, + "loss": 4.3825, + "step": 263500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002504456140614324, + "loss": 4.374, + "step": 264000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00025035176105776087, + "loss": 4.3859, + "step": 264500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002502579080540893, + "loss": 4.4123, + "step": 265000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002501640550504178, + "loss": 4.3858, + "step": 265500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002500702020467463, + "loss": 4.3665, + "step": 266000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002499763490430747, + "loss": 4.3723, + "step": 266500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00024988249603940323, + "loss": 4.3791, + "step": 267000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002497886430357317, + "loss": 4.3884, + "step": 267500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00024969479003206013, + "loss": 4.3901, + "step": 268000 + }, + { + "epoch": 0.5, + "learning_rate": 0.00024960093702838863, + "loss": 4.381, + "step": 268500 + }, + { + "epoch": 0.5, + "learning_rate": 0.0002495070840247171, + "loss": 4.3554, + "step": 269000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002494132310210456, + "loss": 4.3889, + "step": 269500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024931937801737404, + "loss": 4.3601, + "step": 270000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024922552501370254, + "loss": 4.38, + "step": 270500 + }, + { + "epoch": 0.51, + "learning_rate": 0.000249131672010031, + "loss": 4.3733, + "step": 271000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002490378190063595, + "loss": 4.3805, + "step": 271500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024894396600268795, + "loss": 4.3747, + "step": 272000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002488501129990164, + "loss": 4.3724, + "step": 272500 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024875625999534485, + "loss": 4.3476, + "step": 273000 + }, + { + "epoch": 0.51, + "learning_rate": 0.00024866240699167335, + "loss": 4.364, + "step": 273500 + }, + { + "epoch": 0.51, + "learning_rate": 0.0002485685539880018, + "loss": 4.385, + "step": 274000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024847470098433025, + "loss": 4.3838, + "step": 274500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024838084798065876, + "loss": 4.3826, + "step": 275000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002482869949769872, + "loss": 4.377, + "step": 275500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024819314197331566, + "loss": 4.3527, + "step": 276000 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024809928896964416, + "loss": 4.3549, + "step": 276500 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002480054359659726, + "loss": 4.3647, + "step": 277000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002479115829623011, + "loss": 4.363, + "step": 277500 + }, + { + "epoch": 0.52, + "learning_rate": 0.00024781772995862957, + "loss": 4.3738, + "step": 278000 + }, + { + "epoch": 0.52, + "learning_rate": 0.000247723876954958, + "loss": 4.3596, + "step": 278500 + }, + { + "epoch": 0.52, + "learning_rate": 0.0002476300239512865, + "loss": 4.3564, + "step": 279000 + }, + { + "epoch": 0.52, + "learning_rate": 0.000247536170947615, + "loss": 4.3638, + "step": 279500 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002474423179439435, + "loss": 4.3797, + "step": 280000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024734846494027193, + "loss": 4.3552, + "step": 280500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024725461193660043, + "loss": 4.3644, + "step": 281000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002471607589329289, + "loss": 4.3884, + "step": 281500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024706690592925733, + "loss": 4.3544, + "step": 282000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024697305292558584, + "loss": 4.3692, + "step": 282500 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002468791999219143, + "loss": 4.3538, + "step": 283000 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024678534691824274, + "loss": 4.3398, + "step": 283500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024669149391457124, + "loss": 4.3694, + "step": 284000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0002465976409108997, + "loss": 4.3593, + "step": 284500 + }, + { + "epoch": 0.53, + "learning_rate": 0.00024650378790722814, + "loss": 4.366, + "step": 285000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002464099349035566, + "loss": 4.3603, + "step": 285500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002463160818998851, + "loss": 4.3453, + "step": 286000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024622222889621355, + "loss": 4.3559, + "step": 286500 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024612837589254205, + "loss": 4.3786, + "step": 287000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002460345228888705, + "loss": 4.3518, + "step": 287500 + }, + { + "epoch": 0.54, + "learning_rate": 0.000245940669885199, + "loss": 4.359, + "step": 288000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024584681688152746, + "loss": 4.3653, + "step": 288500 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002457529638778559, + "loss": 4.3666, + "step": 289000 + }, + { + "epoch": 0.54, + "learning_rate": 0.0002456591108741844, + "loss": 4.3611, + "step": 289500 + }, + { + "epoch": 0.54, + "learning_rate": 0.00024556525787051286, + "loss": 4.3547, + "step": 290000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024547140486684137, + "loss": 4.3447, + "step": 290500 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002453775518631698, + "loss": 4.3746, + "step": 291000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024528369885949827, + "loss": 4.3582, + "step": 291500 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002451898458558268, + "loss": 4.3502, + "step": 292000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002450959928521552, + "loss": 4.3524, + "step": 292500 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002450021398484837, + "loss": 4.3419, + "step": 293000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002449082868448122, + "loss": 4.364, + "step": 293500 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024481443384114063, + "loss": 4.3592, + "step": 294000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002447205808374691, + "loss": 4.3825, + "step": 294500 + }, + { + "epoch": 0.55, + "learning_rate": 0.0002446267278337976, + "loss": 4.3526, + "step": 295000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00024453287483012603, + "loss": 4.3585, + "step": 295500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002444390218264545, + "loss": 4.3622, + "step": 296000 + }, + { + "epoch": 0.56, + "learning_rate": 0.000244345168822783, + "loss": 4.3534, + "step": 296500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024425131581911144, + "loss": 4.353, + "step": 297000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024415746281543994, + "loss": 4.3559, + "step": 297500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024406360981176837, + "loss": 4.3668, + "step": 298000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024396975680809687, + "loss": 4.346, + "step": 298500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024387590380442532, + "loss": 4.367, + "step": 299000 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002437820508007538, + "loss": 4.3727, + "step": 299500 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024368819779708228, + "loss": 4.3396, + "step": 300000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00024359434479341075, + "loss": 4.3482, + "step": 300500 + }, + { + "epoch": 0.56, + "learning_rate": 0.0002435004917897392, + "loss": 4.3492, + "step": 301000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002434066387860677, + "loss": 4.3645, + "step": 301500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024331278578239616, + "loss": 4.3599, + "step": 302000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0002432189327787246, + "loss": 4.3476, + "step": 302500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024312507977505311, + "loss": 4.3586, + "step": 303000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024303122677138156, + "loss": 4.3416, + "step": 303500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024293737376771004, + "loss": 4.3826, + "step": 304000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024284352076403852, + "loss": 4.3317, + "step": 304500 + }, + { + "epoch": 0.57, + "learning_rate": 0.000242749667760367, + "loss": 4.3539, + "step": 305000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024265581475669545, + "loss": 4.344, + "step": 305500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00024256196175302392, + "loss": 4.3666, + "step": 306000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002424681087493524, + "loss": 4.3372, + "step": 306500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024237425574568085, + "loss": 4.3413, + "step": 307000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024228040274200936, + "loss": 4.346, + "step": 307500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002421865497383378, + "loss": 4.3614, + "step": 308000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024209269673466626, + "loss": 4.3508, + "step": 308500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024199884373099473, + "loss": 4.3532, + "step": 309000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002419049907273232, + "loss": 4.3417, + "step": 309500 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002418111377236517, + "loss": 4.3452, + "step": 310000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024171728471998014, + "loss": 4.3506, + "step": 310500 + }, + { + "epoch": 0.58, + "learning_rate": 0.00024162343171630864, + "loss": 4.3387, + "step": 311000 + }, + { + "epoch": 0.58, + "learning_rate": 0.0002415295787126371, + "loss": 4.35, + "step": 311500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024143572570896555, + "loss": 4.3511, + "step": 312000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024134187270529405, + "loss": 4.3422, + "step": 312500 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002412480197016225, + "loss": 4.3391, + "step": 313000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024115416669795098, + "loss": 4.3499, + "step": 313500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024106031369427945, + "loss": 4.3613, + "step": 314000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024096646069060793, + "loss": 4.3589, + "step": 314500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024087260768693638, + "loss": 4.3397, + "step": 315000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002407787546832649, + "loss": 4.3506, + "step": 315500 + }, + { + "epoch": 0.59, + "learning_rate": 0.00024068490167959334, + "loss": 4.3429, + "step": 316000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0002405910486759218, + "loss": 4.3441, + "step": 316500 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002404971956722503, + "loss": 4.3449, + "step": 317000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024040334266857874, + "loss": 4.3546, + "step": 317500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024030948966490722, + "loss": 4.3526, + "step": 318000 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002402156366612357, + "loss": 4.3328, + "step": 318500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024012178365756415, + "loss": 4.3409, + "step": 319000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00024002793065389262, + "loss": 4.3399, + "step": 319500 + }, + { + "epoch": 0.6, + "learning_rate": 0.0002399340776502211, + "loss": 4.3447, + "step": 320000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023984022464654958, + "loss": 4.3372, + "step": 320500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023974637164287803, + "loss": 4.3523, + "step": 321000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023965251863920648, + "loss": 4.3183, + "step": 321500 + }, + { + "epoch": 0.6, + "learning_rate": 0.00023955866563553498, + "loss": 4.3426, + "step": 322000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023946481263186344, + "loss": 4.3426, + "step": 322500 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002393709596281919, + "loss": 4.3506, + "step": 323000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0002392771066245204, + "loss": 4.3224, + "step": 323500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023918325362084887, + "loss": 4.3441, + "step": 324000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023908940061717732, + "loss": 4.3336, + "step": 324500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023899554761350582, + "loss": 4.3337, + "step": 325000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023890169460983427, + "loss": 4.3369, + "step": 325500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023880784160616272, + "loss": 4.3413, + "step": 326000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023871398860249123, + "loss": 4.328, + "step": 326500 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023862013559881968, + "loss": 4.3425, + "step": 327000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00023852628259514816, + "loss": 4.3429, + "step": 327500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023843242959147663, + "loss": 4.3412, + "step": 328000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0002383385765878051, + "loss": 4.3334, + "step": 328500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023824472358413356, + "loss": 4.3338, + "step": 329000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023815087058046204, + "loss": 4.3422, + "step": 329500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023805701757679052, + "loss": 4.3341, + "step": 330000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023796316457311897, + "loss": 4.339, + "step": 330500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023786931156944747, + "loss": 4.3386, + "step": 331000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023777545856577592, + "loss": 4.333, + "step": 331500 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023768160556210437, + "loss": 4.3387, + "step": 332000 + }, + { + "epoch": 0.62, + "learning_rate": 0.00023758775255843288, + "loss": 4.3313, + "step": 332500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023749389955476133, + "loss": 4.3317, + "step": 333000 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002374000465510898, + "loss": 4.3209, + "step": 333500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023730619354741825, + "loss": 4.331, + "step": 334000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023721234054374676, + "loss": 4.3382, + "step": 334500 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002371184875400752, + "loss": 4.3364, + "step": 335000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023702463453640366, + "loss": 4.3246, + "step": 335500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023693078153273216, + "loss": 4.3123, + "step": 336000 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002368369285290606, + "loss": 4.3352, + "step": 336500 + }, + { + "epoch": 0.63, + "learning_rate": 0.0002367430755253891, + "loss": 4.3253, + "step": 337000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023664922252171757, + "loss": 4.3184, + "step": 337500 + }, + { + "epoch": 0.63, + "learning_rate": 0.00023655536951804605, + "loss": 4.3325, + "step": 338000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002364615165143745, + "loss": 4.3392, + "step": 338500 + }, + { + "epoch": 0.64, + "learning_rate": 0.000236367663510703, + "loss": 4.3291, + "step": 339000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023627381050703145, + "loss": 4.3066, + "step": 339500 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002361799575033599, + "loss": 4.3231, + "step": 340000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002360861044996884, + "loss": 4.3297, + "step": 340500 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023599225149601686, + "loss": 4.3105, + "step": 341000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023589839849234533, + "loss": 4.3169, + "step": 341500 + }, + { + "epoch": 0.64, + "learning_rate": 0.0002358045454886738, + "loss": 4.3291, + "step": 342000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023571069248500226, + "loss": 4.3137, + "step": 342500 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023561683948133074, + "loss": 4.3295, + "step": 343000 + }, + { + "epoch": 0.64, + "learning_rate": 0.00023552298647765922, + "loss": 4.3266, + "step": 343500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002354291334739877, + "loss": 4.3203, + "step": 344000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023533528047031614, + "loss": 4.3302, + "step": 344500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002352414274666446, + "loss": 4.3419, + "step": 345000 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002351475744629731, + "loss": 4.3287, + "step": 345500 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023505372145930155, + "loss": 4.3181, + "step": 346000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023495986845563003, + "loss": 4.3305, + "step": 346500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0002348660154519585, + "loss": 4.3143, + "step": 347000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023477216244828698, + "loss": 4.3286, + "step": 347500 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023467830944461543, + "loss": 4.3343, + "step": 348000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00023458445644094394, + "loss": 4.3125, + "step": 348500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023449060343727239, + "loss": 4.3239, + "step": 349000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023439675043360084, + "loss": 4.3225, + "step": 349500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023430289742992934, + "loss": 4.3198, + "step": 350000 + }, + { + "epoch": 0.66, + "learning_rate": 0.0002342090444262578, + "loss": 4.3288, + "step": 350500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023411519142258627, + "loss": 4.327, + "step": 351000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023402133841891475, + "loss": 4.322, + "step": 351500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023392748541524322, + "loss": 4.3258, + "step": 352000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023383363241157167, + "loss": 4.33, + "step": 352500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023373977940790015, + "loss": 4.3214, + "step": 353000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023364592640422863, + "loss": 4.3336, + "step": 353500 + }, + { + "epoch": 0.66, + "learning_rate": 0.00023355207340055708, + "loss": 4.3066, + "step": 354000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023345822039688558, + "loss": 4.3172, + "step": 354500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023336436739321403, + "loss": 4.3198, + "step": 355000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023327051438954248, + "loss": 4.3232, + "step": 355500 + }, + { + "epoch": 0.67, + "learning_rate": 0.000233176661385871, + "loss": 4.3227, + "step": 356000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023308280838219944, + "loss": 4.3282, + "step": 356500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023298895537852792, + "loss": 4.3354, + "step": 357000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023289510237485637, + "loss": 4.3167, + "step": 357500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023280124937118487, + "loss": 4.3128, + "step": 358000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023270739636751332, + "loss": 4.3079, + "step": 358500 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023261354336384177, + "loss": 4.3187, + "step": 359000 + }, + { + "epoch": 0.67, + "learning_rate": 0.00023251969036017028, + "loss": 4.3335, + "step": 359500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023242583735649873, + "loss": 4.3001, + "step": 360000 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002323319843528272, + "loss": 4.311, + "step": 360500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023223813134915568, + "loss": 4.3033, + "step": 361000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023214427834548416, + "loss": 4.3062, + "step": 361500 + }, + { + "epoch": 0.68, + "learning_rate": 0.0002320504253418126, + "loss": 4.337, + "step": 362000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023195657233814111, + "loss": 4.3261, + "step": 362500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023186271933446956, + "loss": 4.3073, + "step": 363000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023176886633079801, + "loss": 4.3152, + "step": 363500 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023167501332712652, + "loss": 4.3326, + "step": 364000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00023158116032345497, + "loss": 4.3189, + "step": 364500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023148730731978342, + "loss": 4.3118, + "step": 365000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023139345431611192, + "loss": 4.3262, + "step": 365500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023129960131244037, + "loss": 4.3084, + "step": 366000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023120574830876885, + "loss": 4.3094, + "step": 366500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023111189530509733, + "loss": 4.3039, + "step": 367000 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002310180423014258, + "loss": 4.3015, + "step": 367500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023092418929775426, + "loss": 4.2926, + "step": 368000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023083033629408276, + "loss": 4.3182, + "step": 368500 + }, + { + "epoch": 0.69, + "learning_rate": 0.0002307364832904112, + "loss": 4.315, + "step": 369000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023064263028673966, + "loss": 4.3044, + "step": 369500 + }, + { + "epoch": 0.69, + "learning_rate": 0.00023054877728306814, + "loss": 4.289, + "step": 370000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023045492427939662, + "loss": 4.323, + "step": 370500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002303610712757251, + "loss": 4.3132, + "step": 371000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023026721827205354, + "loss": 4.3132, + "step": 371500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00023017336526838205, + "loss": 4.306, + "step": 372000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002300795122647105, + "loss": 4.2933, + "step": 372500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022998565926103895, + "loss": 4.2984, + "step": 373000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022989180625736745, + "loss": 4.2908, + "step": 373500 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002297979532536959, + "loss": 4.3171, + "step": 374000 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022970410025002438, + "loss": 4.3105, + "step": 374500 + }, + { + "epoch": 0.7, + "learning_rate": 0.00022961024724635286, + "loss": 4.3084, + "step": 375000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0002295163942426813, + "loss": 4.3002, + "step": 375500 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002294225412390098, + "loss": 4.2929, + "step": 376000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022932868823533826, + "loss": 4.3018, + "step": 376500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022923483523166674, + "loss": 4.3052, + "step": 377000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002291409822279952, + "loss": 4.314, + "step": 377500 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002290471292243237, + "loss": 4.3259, + "step": 378000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022895327622065215, + "loss": 4.3064, + "step": 378500 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002288594232169806, + "loss": 4.3098, + "step": 379000 + }, + { + "epoch": 0.71, + "learning_rate": 0.0002287655702133091, + "loss": 4.3057, + "step": 379500 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022867171720963755, + "loss": 4.3241, + "step": 380000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00022857786420596603, + "loss": 4.3116, + "step": 380500 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002284840112022945, + "loss": 4.3033, + "step": 381000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022839015819862298, + "loss": 4.3052, + "step": 381500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022829630519495143, + "loss": 4.2986, + "step": 382000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022820245219127989, + "loss": 4.3329, + "step": 382500 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002281085991876084, + "loss": 4.3184, + "step": 383000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022801474618393684, + "loss": 4.3221, + "step": 383500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022792089318026532, + "loss": 4.2981, + "step": 384000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002278270401765938, + "loss": 4.3068, + "step": 384500 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022773318717292227, + "loss": 4.2975, + "step": 385000 + }, + { + "epoch": 0.72, + "learning_rate": 0.00022763933416925072, + "loss": 4.3087, + "step": 385500 + }, + { + "epoch": 0.72, + "learning_rate": 0.0002275454811655792, + "loss": 4.3093, + "step": 386000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022745162816190768, + "loss": 4.3002, + "step": 386500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022735777515823613, + "loss": 4.2916, + "step": 387000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022726392215456463, + "loss": 4.3075, + "step": 387500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022717006915089308, + "loss": 4.3241, + "step": 388000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022707621614722153, + "loss": 4.3041, + "step": 388500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022698236314355004, + "loss": 4.2962, + "step": 389000 + }, + { + "epoch": 0.73, + "learning_rate": 0.0002268885101398785, + "loss": 4.2888, + "step": 389500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022679465713620696, + "loss": 4.2749, + "step": 390000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022670080413253544, + "loss": 4.2951, + "step": 390500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022660695112886392, + "loss": 4.3075, + "step": 391000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00022651309812519237, + "loss": 4.3103, + "step": 391500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022641924512152087, + "loss": 4.3059, + "step": 392000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022632539211784932, + "loss": 4.2875, + "step": 392500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022623153911417778, + "loss": 4.3118, + "step": 393000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022613768611050625, + "loss": 4.3154, + "step": 393500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022604383310683473, + "loss": 4.3082, + "step": 394000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002259499801031632, + "loss": 4.318, + "step": 394500 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022585612709949166, + "loss": 4.3045, + "step": 395000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022576227409582016, + "loss": 4.3018, + "step": 395500 + }, + { + "epoch": 0.74, + "learning_rate": 0.0002256684210921486, + "loss": 4.3095, + "step": 396000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00022557456808847706, + "loss": 4.3, + "step": 396500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022548071508480557, + "loss": 4.3044, + "step": 397000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022538686208113402, + "loss": 4.2872, + "step": 397500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002252930090774625, + "loss": 4.3034, + "step": 398000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022519915607379097, + "loss": 4.2817, + "step": 398500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022510530307011942, + "loss": 4.2997, + "step": 399000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002250114500664479, + "loss": 4.3049, + "step": 399500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022491759706277638, + "loss": 4.2894, + "step": 400000 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022482374405910486, + "loss": 4.2934, + "step": 400500 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002247298910554333, + "loss": 4.2779, + "step": 401000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0002246360380517618, + "loss": 4.2995, + "step": 401500 + }, + { + "epoch": 0.75, + "learning_rate": 0.00022454218504809026, + "loss": 4.2822, + "step": 402000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002244483320444187, + "loss": 4.3028, + "step": 402500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022435447904074722, + "loss": 4.2893, + "step": 403000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022426062603707567, + "loss": 4.2889, + "step": 403500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022416677303340414, + "loss": 4.2974, + "step": 404000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022407292002973262, + "loss": 4.2914, + "step": 404500 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002239790670260611, + "loss": 4.283, + "step": 405000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022388521402238955, + "loss": 4.3096, + "step": 405500 + }, + { + "epoch": 0.76, + "learning_rate": 0.000223791361018718, + "loss": 4.3023, + "step": 406000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0002236975080150465, + "loss": 4.3092, + "step": 406500 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022360365501137495, + "loss": 4.2833, + "step": 407000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00022350980200770343, + "loss": 4.2847, + "step": 407500 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002234159490040319, + "loss": 4.3017, + "step": 408000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022332209600036039, + "loss": 4.2833, + "step": 408500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022322824299668884, + "loss": 4.2866, + "step": 409000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002231343899930173, + "loss": 4.2772, + "step": 409500 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002230405369893458, + "loss": 4.2986, + "step": 410000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022294668398567424, + "loss": 4.2921, + "step": 410500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022285283098200275, + "loss": 4.2811, + "step": 411000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0002227589779783312, + "loss": 4.2975, + "step": 411500 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022266512497465965, + "loss": 4.2839, + "step": 412000 + }, + { + "epoch": 0.77, + "learning_rate": 0.00022257127197098815, + "loss": 4.2911, + "step": 412500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002224774189673166, + "loss": 4.2886, + "step": 413000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022238356596364508, + "loss": 4.2952, + "step": 413500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022228971295997356, + "loss": 4.2975, + "step": 414000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022219585995630203, + "loss": 4.2894, + "step": 414500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022210200695263048, + "loss": 4.292, + "step": 415000 + }, + { + "epoch": 0.78, + "learning_rate": 0.000222008153948959, + "loss": 4.2854, + "step": 415500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022191430094528744, + "loss": 4.2782, + "step": 416000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002218204479416159, + "loss": 4.2933, + "step": 416500 + }, + { + "epoch": 0.78, + "learning_rate": 0.0002217265949379444, + "loss": 4.2914, + "step": 417000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022163274193427284, + "loss": 4.2903, + "step": 417500 + }, + { + "epoch": 0.78, + "learning_rate": 0.00022153888893060132, + "loss": 4.2945, + "step": 418000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022144503592692977, + "loss": 4.2849, + "step": 418500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022135118292325828, + "loss": 4.2944, + "step": 419000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022125732991958673, + "loss": 4.2872, + "step": 419500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022116347691591518, + "loss": 4.2767, + "step": 420000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022106962391224368, + "loss": 4.2938, + "step": 420500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022097577090857213, + "loss": 4.2677, + "step": 421000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002208819179049006, + "loss": 4.2734, + "step": 421500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022078806490122909, + "loss": 4.2786, + "step": 422000 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022069421189755754, + "loss": 4.2816, + "step": 422500 + }, + { + "epoch": 0.79, + "learning_rate": 0.00022060035889388601, + "loss": 4.2743, + "step": 423000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0002205065058902145, + "loss": 4.2944, + "step": 423500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022041265288654297, + "loss": 4.2991, + "step": 424000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022031879988287142, + "loss": 4.2912, + "step": 424500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022022494687919992, + "loss": 4.2999, + "step": 425000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022013109387552837, + "loss": 4.2885, + "step": 425500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00022003724087185682, + "loss": 4.2875, + "step": 426000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021994338786818533, + "loss": 4.3016, + "step": 426500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021984953486451378, + "loss": 4.2757, + "step": 427000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021975568186084226, + "loss": 4.2645, + "step": 427500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00021966182885717073, + "loss": 4.2858, + "step": 428000 + }, + { + "epoch": 0.8, + "learning_rate": 0.0002195679758534992, + "loss": 4.2878, + "step": 428500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021947412284982766, + "loss": 4.271, + "step": 429000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021938026984615617, + "loss": 4.3062, + "step": 429500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021928641684248462, + "loss": 4.2894, + "step": 430000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021919256383881307, + "loss": 4.2885, + "step": 430500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021909871083514154, + "loss": 4.2785, + "step": 431000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021900485783147002, + "loss": 4.2897, + "step": 431500 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002189110048277985, + "loss": 4.2903, + "step": 432000 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021881715182412695, + "loss": 4.2806, + "step": 432500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021872329882045543, + "loss": 4.2764, + "step": 433000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0002186294458167839, + "loss": 4.2873, + "step": 433500 + }, + { + "epoch": 0.81, + "learning_rate": 0.00021853559281311235, + "loss": 4.2777, + "step": 434000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021844173980944086, + "loss": 4.2766, + "step": 434500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002183478868057693, + "loss": 4.2807, + "step": 435000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021825403380209776, + "loss": 4.2786, + "step": 435500 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021816018079842626, + "loss": 4.2924, + "step": 436000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021806632779475471, + "loss": 4.2792, + "step": 436500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002179724747910832, + "loss": 4.284, + "step": 437000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021787862178741167, + "loss": 4.2709, + "step": 437500 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021778476878374015, + "loss": 4.2788, + "step": 438000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002176909157800686, + "loss": 4.2804, + "step": 438500 + }, + { + "epoch": 0.82, + "learning_rate": 0.0002175970627763971, + "loss": 4.2787, + "step": 439000 + }, + { + "epoch": 0.82, + "learning_rate": 0.00021750320977272555, + "loss": 4.2891, + "step": 439500 + }, + { + "epoch": 0.83, + "learning_rate": 0.000217409356769054, + "loss": 4.2787, + "step": 440000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002173155037653825, + "loss": 4.2757, + "step": 440500 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021722165076171096, + "loss": 4.2793, + "step": 441000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021712779775803943, + "loss": 4.2771, + "step": 441500 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021703394475436788, + "loss": 4.2874, + "step": 442000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002169400917506964, + "loss": 4.2867, + "step": 442500 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021684623874702484, + "loss": 4.293, + "step": 443000 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002167523857433533, + "loss": 4.2633, + "step": 443500 + }, + { + "epoch": 0.83, + "learning_rate": 0.0002166585327396818, + "loss": 4.2578, + "step": 444000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00021656467973601024, + "loss": 4.2501, + "step": 444500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021647082673233872, + "loss": 4.2755, + "step": 445000 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002163769737286672, + "loss": 4.2611, + "step": 445500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021628312072499565, + "loss": 4.2594, + "step": 446000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021618926772132413, + "loss": 4.2783, + "step": 446500 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002160954147176526, + "loss": 4.2789, + "step": 447000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021600156171398108, + "loss": 4.2734, + "step": 447500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021590770871030953, + "loss": 4.2652, + "step": 448000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021581385570663804, + "loss": 4.2554, + "step": 448500 + }, + { + "epoch": 0.84, + "learning_rate": 0.0002157200027029665, + "loss": 4.2737, + "step": 449000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021562614969929494, + "loss": 4.2768, + "step": 449500 + }, + { + "epoch": 0.84, + "learning_rate": 0.00021553229669562344, + "loss": 4.2797, + "step": 450000 + }, + { + "epoch": 0.85, + "learning_rate": 0.0002154384436919519, + "loss": 4.2645, + "step": 450500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021534459068828037, + "loss": 4.2993, + "step": 451000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021525073768460885, + "loss": 4.2764, + "step": 451500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021515688468093732, + "loss": 4.2728, + "step": 452000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021506303167726577, + "loss": 4.2797, + "step": 452500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021496917867359428, + "loss": 4.2753, + "step": 453000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021487532566992273, + "loss": 4.2689, + "step": 453500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021478147266625118, + "loss": 4.2779, + "step": 454000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021468761966257966, + "loss": 4.2649, + "step": 454500 + }, + { + "epoch": 0.85, + "learning_rate": 0.00021459376665890813, + "loss": 4.2652, + "step": 455000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002144999136552366, + "loss": 4.2896, + "step": 455500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021440606065156506, + "loss": 4.2937, + "step": 456000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021431220764789354, + "loss": 4.2737, + "step": 456500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021421835464422202, + "loss": 4.2747, + "step": 457000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021412450164055047, + "loss": 4.2596, + "step": 457500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021403064863687897, + "loss": 4.2499, + "step": 458000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021393679563320742, + "loss": 4.2678, + "step": 458500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021384294262953587, + "loss": 4.2618, + "step": 459000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021374908962586438, + "loss": 4.2678, + "step": 459500 + }, + { + "epoch": 0.86, + "learning_rate": 0.00021365523662219283, + "loss": 4.2614, + "step": 460000 + }, + { + "epoch": 0.86, + "learning_rate": 0.0002135613836185213, + "loss": 4.2823, + "step": 460500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021346753061484978, + "loss": 4.2742, + "step": 461000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021337367761117826, + "loss": 4.2699, + "step": 461500 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002132798246075067, + "loss": 4.2681, + "step": 462000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021318597160383521, + "loss": 4.2648, + "step": 462500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021309211860016366, + "loss": 4.277, + "step": 463000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021299826559649212, + "loss": 4.2745, + "step": 463500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021290441259282062, + "loss": 4.2811, + "step": 464000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021281055958914907, + "loss": 4.2692, + "step": 464500 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021271670658547755, + "loss": 4.2891, + "step": 465000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00021262285358180602, + "loss": 4.2878, + "step": 465500 + }, + { + "epoch": 0.87, + "learning_rate": 0.0002125290005781345, + "loss": 4.265, + "step": 466000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021243514757446295, + "loss": 4.2714, + "step": 466500 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002123412945707914, + "loss": 4.2806, + "step": 467000 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002122474415671199, + "loss": 4.2564, + "step": 467500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021215358856344836, + "loss": 4.2518, + "step": 468000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021205973555977684, + "loss": 4.282, + "step": 468500 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002119658825561053, + "loss": 4.2601, + "step": 469000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021187202955243376, + "loss": 4.2715, + "step": 469500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021177817654876224, + "loss": 4.2638, + "step": 470000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00021168432354509072, + "loss": 4.2539, + "step": 470500 + }, + { + "epoch": 0.88, + "learning_rate": 0.0002115904705414192, + "loss": 4.27, + "step": 471000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021149661753774765, + "loss": 4.2664, + "step": 471500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021140276453407615, + "loss": 4.2567, + "step": 472000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002113089115304046, + "loss": 4.271, + "step": 472500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021121505852673305, + "loss": 4.2487, + "step": 473000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021112120552306156, + "loss": 4.2477, + "step": 473500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021102735251939, + "loss": 4.2482, + "step": 474000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021093349951571848, + "loss": 4.2735, + "step": 474500 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021083964651204696, + "loss": 4.2625, + "step": 475000 + }, + { + "epoch": 0.89, + "learning_rate": 0.00021074579350837544, + "loss": 4.2746, + "step": 475500 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002106519405047039, + "loss": 4.2719, + "step": 476000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0002105580875010324, + "loss": 4.2586, + "step": 476500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021046423449736084, + "loss": 4.2766, + "step": 477000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002103703814936893, + "loss": 4.2502, + "step": 477500 + }, + { + "epoch": 0.9, + "learning_rate": 0.0002102765284900178, + "loss": 4.2608, + "step": 478000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021018267548634625, + "loss": 4.2689, + "step": 478500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00021008882248267473, + "loss": 4.2725, + "step": 479000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020999496947900318, + "loss": 4.2519, + "step": 479500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020990111647533165, + "loss": 4.2506, + "step": 480000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020980726347166013, + "loss": 4.2739, + "step": 480500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020971341046798858, + "loss": 4.2557, + "step": 481000 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020961955746431709, + "loss": 4.2609, + "step": 481500 + }, + { + "epoch": 0.9, + "learning_rate": 0.00020952570446064554, + "loss": 4.2623, + "step": 482000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020943185145697399, + "loss": 4.276, + "step": 482500 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002093379984533025, + "loss": 4.2485, + "step": 483000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020924414544963094, + "loss": 4.2676, + "step": 483500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020915029244595942, + "loss": 4.2527, + "step": 484000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0002090564394422879, + "loss": 4.2803, + "step": 484500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020896258643861637, + "loss": 4.2584, + "step": 485000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020886873343494482, + "loss": 4.2635, + "step": 485500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020877488043127333, + "loss": 4.2583, + "step": 486000 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020868102742760178, + "loss": 4.2783, + "step": 486500 + }, + { + "epoch": 0.91, + "learning_rate": 0.00020858717442393023, + "loss": 4.2579, + "step": 487000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020849332142025873, + "loss": 4.2727, + "step": 487500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020839946841658718, + "loss": 4.2704, + "step": 488000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020830561541291566, + "loss": 4.2684, + "step": 488500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020821176240924414, + "loss": 4.2596, + "step": 489000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020811790940557262, + "loss": 4.2519, + "step": 489500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020802405640190107, + "loss": 4.2665, + "step": 490000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020793020339822952, + "loss": 4.2481, + "step": 490500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020783635039455802, + "loss": 4.2664, + "step": 491000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020774249739088647, + "loss": 4.2443, + "step": 491500 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020764864438721495, + "loss": 4.2506, + "step": 492000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00020755479138354343, + "loss": 4.2729, + "step": 492500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020746093837987188, + "loss": 4.2578, + "step": 493000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020736708537620035, + "loss": 4.2583, + "step": 493500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020727323237252883, + "loss": 4.2613, + "step": 494000 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002071793793688573, + "loss": 4.2652, + "step": 494500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020708552636518576, + "loss": 4.2533, + "step": 495000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020699167336151426, + "loss": 4.2594, + "step": 495500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020689782035784271, + "loss": 4.2733, + "step": 496000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020680396735417116, + "loss": 4.2422, + "step": 496500 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020671011435049967, + "loss": 4.2414, + "step": 497000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00020661626134682812, + "loss": 4.2533, + "step": 497500 + }, + { + "epoch": 0.93, + "learning_rate": 0.0002065224083431566, + "loss": 4.2763, + "step": 498000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020642855533948507, + "loss": 4.2528, + "step": 498500 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020633470233581355, + "loss": 4.2631, + "step": 499000 + }, + { + "epoch": 0.94, + "learning_rate": 0.000206240849332142, + "loss": 4.245, + "step": 499500 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002061469963284705, + "loss": 4.2573, + "step": 500000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020605314332479896, + "loss": 4.2552, + "step": 500500 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002059592903211274, + "loss": 4.265, + "step": 501000 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002058654373174559, + "loss": 4.2606, + "step": 501500 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020577158431378436, + "loss": 4.2641, + "step": 502000 + }, + { + "epoch": 0.94, + "learning_rate": 0.00020567773131011284, + "loss": 4.2382, + "step": 502500 + }, + { + "epoch": 0.94, + "learning_rate": 0.0002055838783064413, + "loss": 4.2601, + "step": 503000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020549002530276977, + "loss": 4.2627, + "step": 503500 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020539617229909824, + "loss": 4.2453, + "step": 504000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002053023192954267, + "loss": 4.2559, + "step": 504500 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002052084662917552, + "loss": 4.2667, + "step": 505000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020511461328808365, + "loss": 4.2508, + "step": 505500 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002050207602844121, + "loss": 4.2531, + "step": 506000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002049269072807406, + "loss": 4.2831, + "step": 506500 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020483305427706905, + "loss": 4.2682, + "step": 507000 + }, + { + "epoch": 0.95, + "learning_rate": 0.00020473920127339753, + "loss": 4.2628, + "step": 507500 + }, + { + "epoch": 0.95, + "learning_rate": 0.000204645348269726, + "loss": 4.2403, + "step": 508000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0002045514952660545, + "loss": 4.2521, + "step": 508500 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020445764226238294, + "loss": 4.2387, + "step": 509000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020436378925871144, + "loss": 4.2724, + "step": 509500 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002042699362550399, + "loss": 4.2577, + "step": 510000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020417608325136834, + "loss": 4.2621, + "step": 510500 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020408223024769685, + "loss": 4.248, + "step": 511000 + }, + { + "epoch": 0.96, + "learning_rate": 0.0002039883772440253, + "loss": 4.2503, + "step": 511500 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020389452424035377, + "loss": 4.2477, + "step": 512000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020380067123668225, + "loss": 4.2293, + "step": 512500 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020370681823301073, + "loss": 4.2378, + "step": 513000 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020361296522933918, + "loss": 4.2499, + "step": 513500 + }, + { + "epoch": 0.96, + "learning_rate": 0.00020351911222566766, + "loss": 4.2397, + "step": 514000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020342525922199613, + "loss": 4.2635, + "step": 514500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020333140621832458, + "loss": 4.24, + "step": 515000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020323755321465303, + "loss": 4.2453, + "step": 515500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020314370021098154, + "loss": 4.2573, + "step": 516000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020304984720731, + "loss": 4.249, + "step": 516500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020295599420363847, + "loss": 4.2683, + "step": 517000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020286214119996694, + "loss": 4.2641, + "step": 517500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020276828819629542, + "loss": 4.255, + "step": 518000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020267443519262387, + "loss": 4.2394, + "step": 518500 + }, + { + "epoch": 0.97, + "learning_rate": 0.00020258058218895238, + "loss": 4.2414, + "step": 519000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020248672918528083, + "loss": 4.2488, + "step": 519500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020239287618160928, + "loss": 4.2479, + "step": 520000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020229902317793778, + "loss": 4.2705, + "step": 520500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020220517017426623, + "loss": 4.2416, + "step": 521000 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002021113171705947, + "loss": 4.2642, + "step": 521500 + }, + { + "epoch": 0.98, + "learning_rate": 0.0002020174641669232, + "loss": 4.2613, + "step": 522000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020192361116325166, + "loss": 4.2429, + "step": 522500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020182975815958011, + "loss": 4.2542, + "step": 523000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020173590515590862, + "loss": 4.244, + "step": 523500 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020164205215223707, + "loss": 4.2483, + "step": 524000 + }, + { + "epoch": 0.98, + "learning_rate": 0.00020154819914856552, + "loss": 4.2476, + "step": 524500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020145434614489402, + "loss": 4.2525, + "step": 525000 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020136049314122247, + "loss": 4.2519, + "step": 525500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020126664013755092, + "loss": 4.2441, + "step": 526000 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020117278713387943, + "loss": 4.2348, + "step": 526500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020107893413020788, + "loss": 4.2528, + "step": 527000 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020098508112653636, + "loss": 4.2333, + "step": 527500 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002008912281228648, + "loss": 4.2449, + "step": 528000 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002007973751191933, + "loss": 4.2469, + "step": 528500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020070352211552176, + "loss": 4.2464, + "step": 529000 + }, + { + "epoch": 0.99, + "learning_rate": 0.0002006096691118502, + "loss": 4.2355, + "step": 529500 + }, + { + "epoch": 0.99, + "learning_rate": 0.00020051581610817872, + "loss": 4.2236, + "step": 530000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020042196310450717, + "loss": 4.2422, + "step": 530500 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020032811010083564, + "loss": 4.2556, + "step": 531000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020023425709716412, + "loss": 4.2515, + "step": 531500 + }, + { + "epoch": 1.0, + "learning_rate": 0.0002001404040934926, + "loss": 4.2271, + "step": 532000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00020004655108982105, + "loss": 4.2359, + "step": 532500 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019995269808614955, + "loss": 4.2297, + "step": 533000 + }, + { + "epoch": 1.0, + "learning_rate": 0.000199858845082478, + "loss": 4.2488, + "step": 533500 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019976499207880646, + "loss": 4.2258, + "step": 534000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00019967113907513496, + "loss": 4.2224, + "step": 534500 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001995772860714634, + "loss": 4.2312, + "step": 535000 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001994834330677919, + "loss": 4.2321, + "step": 535500 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019938958006412036, + "loss": 4.2091, + "step": 536000 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019929572706044882, + "loss": 4.224, + "step": 536500 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001992018740567773, + "loss": 4.2276, + "step": 537000 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019910802105310577, + "loss": 4.259, + "step": 537500 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019901416804943425, + "loss": 4.2215, + "step": 538000 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001989203150457627, + "loss": 4.2088, + "step": 538500 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019882646204209115, + "loss": 4.2335, + "step": 539000 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019873260903841965, + "loss": 4.2255, + "step": 539500 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001986387560347481, + "loss": 4.2327, + "step": 540000 + }, + { + "epoch": 1.01, + "learning_rate": 0.00019854490303107658, + "loss": 4.2239, + "step": 540500 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019845105002740506, + "loss": 4.2246, + "step": 541000 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019835719702373354, + "loss": 4.2427, + "step": 541500 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019826334402006199, + "loss": 4.2361, + "step": 542000 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001981694910163905, + "loss": 4.2247, + "step": 542500 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019807563801271894, + "loss": 4.2346, + "step": 543000 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001979817850090474, + "loss": 4.2269, + "step": 543500 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001978879320053759, + "loss": 4.2321, + "step": 544000 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019779407900170435, + "loss": 4.2198, + "step": 544500 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019770022599803282, + "loss": 4.2388, + "step": 545000 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001976063729943613, + "loss": 4.23, + "step": 545500 + }, + { + "epoch": 1.02, + "learning_rate": 0.00019751251999068978, + "loss": 4.2329, + "step": 546000 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019741866698701823, + "loss": 4.2312, + "step": 546500 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001973248139833467, + "loss": 4.2313, + "step": 547000 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019723096097967518, + "loss": 4.2346, + "step": 547500 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019713710797600363, + "loss": 4.243, + "step": 548000 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019704325497233214, + "loss": 4.2174, + "step": 548500 + }, + { + "epoch": 1.03, + "learning_rate": 0.0001969494019686606, + "loss": 4.2352, + "step": 549000 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019685554896498904, + "loss": 4.2285, + "step": 549500 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019676169596131754, + "loss": 4.2227, + "step": 550000 + }, + { + "epoch": 1.03, + "learning_rate": 0.000196667842957646, + "loss": 4.215, + "step": 550500 + }, + { + "epoch": 1.03, + "learning_rate": 0.00019657398995397447, + "loss": 4.2237, + "step": 551000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019648013695030292, + "loss": 4.2057, + "step": 551500 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019638628394663143, + "loss": 4.2184, + "step": 552000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019629243094295988, + "loss": 4.2099, + "step": 552500 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019619857793928833, + "loss": 4.2225, + "step": 553000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019610472493561683, + "loss": 4.2369, + "step": 553500 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019601087193194528, + "loss": 4.2086, + "step": 554000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019591701892827376, + "loss": 4.2253, + "step": 554500 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019582316592460224, + "loss": 4.2251, + "step": 555000 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001957293129209307, + "loss": 4.2226, + "step": 555500 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019563545991725916, + "loss": 4.2374, + "step": 556000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00019554160691358767, + "loss": 4.2233, + "step": 556500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019544775390991612, + "loss": 4.228, + "step": 557000 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019535390090624457, + "loss": 4.2256, + "step": 557500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019526004790257307, + "loss": 4.2303, + "step": 558000 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019516619489890152, + "loss": 4.2313, + "step": 558500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019507234189523, + "loss": 4.2416, + "step": 559000 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019497848889155848, + "loss": 4.2075, + "step": 559500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019488463588788693, + "loss": 4.2282, + "step": 560000 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001947907828842154, + "loss": 4.2386, + "step": 560500 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019469692988054388, + "loss": 4.1995, + "step": 561000 + }, + { + "epoch": 1.05, + "learning_rate": 0.00019460307687687236, + "loss": 4.2351, + "step": 561500 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001945092238732008, + "loss": 4.2391, + "step": 562000 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019441537086952932, + "loss": 4.2291, + "step": 562500 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019432151786585777, + "loss": 4.2197, + "step": 563000 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019422766486218622, + "loss": 4.2284, + "step": 563500 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001941338118585147, + "loss": 4.24, + "step": 564000 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019403995885484317, + "loss": 4.2367, + "step": 564500 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019394610585117165, + "loss": 4.2241, + "step": 565000 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001938522528475001, + "loss": 4.2227, + "step": 565500 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001937583998438286, + "loss": 4.2124, + "step": 566000 + }, + { + "epoch": 1.06, + "learning_rate": 0.00019366454684015705, + "loss": 4.2236, + "step": 566500 + }, + { + "epoch": 1.06, + "learning_rate": 0.0001935706938364855, + "loss": 4.2193, + "step": 567000 + }, + { + "epoch": 1.07, + "learning_rate": 0.000193476840832814, + "loss": 4.2251, + "step": 567500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019338298782914246, + "loss": 4.2165, + "step": 568000 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019328913482547094, + "loss": 4.2178, + "step": 568500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019319528182179941, + "loss": 4.2291, + "step": 569000 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001931014288181279, + "loss": 4.2278, + "step": 569500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019300757581445634, + "loss": 4.2115, + "step": 570000 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019291372281078482, + "loss": 4.2327, + "step": 570500 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001928198698071133, + "loss": 4.2181, + "step": 571000 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019272601680344175, + "loss": 4.2258, + "step": 571500 + }, + { + "epoch": 1.07, + "learning_rate": 0.00019263216379977025, + "loss": 4.2262, + "step": 572000 + }, + { + "epoch": 1.07, + "learning_rate": 0.0001925383107960987, + "loss": 4.2313, + "step": 572500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019244445779242715, + "loss": 4.2121, + "step": 573000 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019235060478875566, + "loss": 4.2348, + "step": 573500 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001922567517850841, + "loss": 4.2235, + "step": 574000 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019216289878141258, + "loss": 4.2236, + "step": 574500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019206904577774103, + "loss": 4.2327, + "step": 575000 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019197519277406954, + "loss": 4.2335, + "step": 575500 + }, + { + "epoch": 1.08, + "learning_rate": 0.000191881339770398, + "loss": 4.2289, + "step": 576000 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019178748676672644, + "loss": 4.2433, + "step": 576500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019169363376305494, + "loss": 4.2198, + "step": 577000 + }, + { + "epoch": 1.08, + "learning_rate": 0.0001915997807593834, + "loss": 4.2281, + "step": 577500 + }, + { + "epoch": 1.08, + "learning_rate": 0.00019150592775571187, + "loss": 4.2201, + "step": 578000 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019141207475204035, + "loss": 4.2255, + "step": 578500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019131822174836883, + "loss": 4.2203, + "step": 579000 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019122436874469728, + "loss": 4.2217, + "step": 579500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019113051574102578, + "loss": 4.223, + "step": 580000 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019103666273735423, + "loss": 4.2309, + "step": 580500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019094280973368268, + "loss": 4.2395, + "step": 581000 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019084895673001119, + "loss": 4.2214, + "step": 581500 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019075510372633964, + "loss": 4.2269, + "step": 582000 + }, + { + "epoch": 1.09, + "learning_rate": 0.00019066125072266811, + "loss": 4.1937, + "step": 582500 + }, + { + "epoch": 1.09, + "learning_rate": 0.0001905673977189966, + "loss": 4.2311, + "step": 583000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019047354471532504, + "loss": 4.2116, + "step": 583500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019037969171165352, + "loss": 4.2534, + "step": 584000 + }, + { + "epoch": 1.1, + "learning_rate": 0.000190285838707982, + "loss": 4.2268, + "step": 584500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019019198570431047, + "loss": 4.2081, + "step": 585000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019009813270063892, + "loss": 4.2207, + "step": 585500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00019000427969696743, + "loss": 4.2161, + "step": 586000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018991042669329588, + "loss": 4.2327, + "step": 586500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018981657368962433, + "loss": 4.2175, + "step": 587000 + }, + { + "epoch": 1.1, + "learning_rate": 0.0001897227206859528, + "loss": 4.2299, + "step": 587500 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018962886768228128, + "loss": 4.2199, + "step": 588000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00018953501467860976, + "loss": 4.2339, + "step": 588500 + }, + { + "epoch": 1.11, + "learning_rate": 0.0001894411616749382, + "loss": 4.2331, + "step": 589000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018934730867126672, + "loss": 4.2203, + "step": 589500 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018925345566759517, + "loss": 4.2066, + "step": 590000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018915960266392362, + "loss": 4.2226, + "step": 590500 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018906574966025212, + "loss": 4.2288, + "step": 591000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018897189665658057, + "loss": 4.2285, + "step": 591500 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018887804365290905, + "loss": 4.1987, + "step": 592000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018878419064923753, + "loss": 4.2278, + "step": 592500 + }, + { + "epoch": 1.11, + "learning_rate": 0.000188690337645566, + "loss": 4.209, + "step": 593000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018859648464189445, + "loss": 4.233, + "step": 593500 + }, + { + "epoch": 1.11, + "learning_rate": 0.00018850263163822293, + "loss": 4.2264, + "step": 594000 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001884087786345514, + "loss": 4.2198, + "step": 594500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018831492563087986, + "loss": 4.2152, + "step": 595000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018822107262720836, + "loss": 4.2281, + "step": 595500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018812721962353681, + "loss": 4.215, + "step": 596000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018803336661986526, + "loss": 4.238, + "step": 596500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018793951361619377, + "loss": 4.2216, + "step": 597000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018784566061252222, + "loss": 4.2376, + "step": 597500 + }, + { + "epoch": 1.12, + "learning_rate": 0.0001877518076088507, + "loss": 4.2052, + "step": 598000 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018765795460517917, + "loss": 4.2181, + "step": 598500 + }, + { + "epoch": 1.12, + "learning_rate": 0.00018756410160150765, + "loss": 4.2172, + "step": 599000 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001874702485978361, + "loss": 4.2144, + "step": 599500 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018737639559416455, + "loss": 4.2155, + "step": 600000 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018728254259049306, + "loss": 4.2061, + "step": 600500 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001871886895868215, + "loss": 4.2325, + "step": 601000 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018709483658314998, + "loss": 4.203, + "step": 601500 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018700098357947846, + "loss": 4.2066, + "step": 602000 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018690713057580694, + "loss": 4.2303, + "step": 602500 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001868132775721354, + "loss": 4.2242, + "step": 603000 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001867194245684639, + "loss": 4.2095, + "step": 603500 + }, + { + "epoch": 1.13, + "learning_rate": 0.00018662557156479234, + "loss": 4.2156, + "step": 604000 + }, + { + "epoch": 1.13, + "learning_rate": 0.0001865317185611208, + "loss": 4.2042, + "step": 604500 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001864378655574493, + "loss": 4.2106, + "step": 605000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018634401255377775, + "loss": 4.208, + "step": 605500 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018625015955010623, + "loss": 4.2211, + "step": 606000 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001861563065464347, + "loss": 4.2134, + "step": 606500 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018606245354276316, + "loss": 4.225, + "step": 607000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018596860053909163, + "loss": 4.2293, + "step": 607500 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001858747475354201, + "loss": 4.2237, + "step": 608000 + }, + { + "epoch": 1.14, + "learning_rate": 0.0001857808945317486, + "loss": 4.2054, + "step": 608500 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018568704152807704, + "loss": 4.2071, + "step": 609000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00018559318852440554, + "loss": 4.2144, + "step": 609500 + }, + { + "epoch": 1.15, + "learning_rate": 0.000185499335520734, + "loss": 4.2135, + "step": 610000 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018540548251706244, + "loss": 4.2218, + "step": 610500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018531162951339095, + "loss": 4.2164, + "step": 611000 + }, + { + "epoch": 1.15, + "learning_rate": 0.0001852177765097194, + "loss": 4.224, + "step": 611500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018512392350604788, + "loss": 4.2092, + "step": 612000 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018503007050237633, + "loss": 4.2103, + "step": 612500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018493621749870483, + "loss": 4.2111, + "step": 613000 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018484236449503328, + "loss": 4.2203, + "step": 613500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018474851149136173, + "loss": 4.2308, + "step": 614000 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018465465848769024, + "loss": 4.2132, + "step": 614500 + }, + { + "epoch": 1.15, + "learning_rate": 0.00018456080548401869, + "loss": 4.2204, + "step": 615000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018446695248034716, + "loss": 4.2039, + "step": 615500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018437309947667564, + "loss": 4.2223, + "step": 616000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018427924647300412, + "loss": 4.2024, + "step": 616500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018418539346933257, + "loss": 4.1935, + "step": 617000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018409154046566105, + "loss": 4.214, + "step": 617500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018399768746198952, + "loss": 4.226, + "step": 618000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018390383445831797, + "loss": 4.2093, + "step": 618500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018380998145464648, + "loss": 4.2317, + "step": 619000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018371612845097493, + "loss": 4.2188, + "step": 619500 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018362227544730338, + "loss": 4.2172, + "step": 620000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00018352842244363188, + "loss": 4.2123, + "step": 620500 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018343456943996033, + "loss": 4.2063, + "step": 621000 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001833407164362888, + "loss": 4.2137, + "step": 621500 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001832468634326173, + "loss": 4.2104, + "step": 622000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018315301042894577, + "loss": 4.2196, + "step": 622500 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018305915742527422, + "loss": 4.2209, + "step": 623000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018296530442160267, + "loss": 4.2221, + "step": 623500 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018287145141793117, + "loss": 4.1967, + "step": 624000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018277759841425962, + "loss": 4.2119, + "step": 624500 + }, + { + "epoch": 1.17, + "learning_rate": 0.0001826837454105881, + "loss": 4.2446, + "step": 625000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00018258989240691658, + "loss": 4.2099, + "step": 625500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018249603940324505, + "loss": 4.2067, + "step": 626000 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001824021863995735, + "loss": 4.2227, + "step": 626500 + }, + { + "epoch": 1.18, + "learning_rate": 0.000182308333395902, + "loss": 4.2201, + "step": 627000 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018221448039223046, + "loss": 4.209, + "step": 627500 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001821206273885589, + "loss": 4.2072, + "step": 628000 + }, + { + "epoch": 1.18, + "learning_rate": 0.0001820267743848874, + "loss": 4.1922, + "step": 628500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018193292138121586, + "loss": 4.2127, + "step": 629000 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018183906837754434, + "loss": 4.2231, + "step": 629500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018174521537387282, + "loss": 4.2032, + "step": 630000 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018165136237020127, + "loss": 4.2178, + "step": 630500 + }, + { + "epoch": 1.18, + "learning_rate": 0.00018155750936652975, + "loss": 4.2204, + "step": 631000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018146365636285822, + "loss": 4.2148, + "step": 631500 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001813698033591867, + "loss": 4.2049, + "step": 632000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018127595035551515, + "loss": 4.2093, + "step": 632500 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018118209735184366, + "loss": 4.2281, + "step": 633000 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001810882443481721, + "loss": 4.1951, + "step": 633500 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018099439134450056, + "loss": 4.2293, + "step": 634000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018090053834082906, + "loss": 4.1989, + "step": 634500 + }, + { + "epoch": 1.19, + "learning_rate": 0.0001808066853371575, + "loss": 4.2071, + "step": 635000 + }, + { + "epoch": 1.19, + "learning_rate": 0.000180712832333486, + "loss": 4.2136, + "step": 635500 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018061897932981444, + "loss": 4.2002, + "step": 636000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00018052512632614294, + "loss": 4.213, + "step": 636500 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001804312733224714, + "loss": 4.1991, + "step": 637000 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018033742031879984, + "loss": 4.2209, + "step": 637500 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018024356731512835, + "loss": 4.2063, + "step": 638000 + }, + { + "epoch": 1.2, + "learning_rate": 0.0001801497143114568, + "loss": 4.182, + "step": 638500 + }, + { + "epoch": 1.2, + "learning_rate": 0.00018005586130778528, + "loss": 4.2142, + "step": 639000 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017996200830411375, + "loss": 4.2075, + "step": 639500 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017986815530044223, + "loss": 4.2225, + "step": 640000 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017977430229677068, + "loss": 4.1956, + "step": 640500 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017968044929309916, + "loss": 4.2198, + "step": 641000 + }, + { + "epoch": 1.2, + "learning_rate": 0.00017958659628942764, + "loss": 4.2175, + "step": 641500 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001794927432857561, + "loss": 4.2163, + "step": 642000 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001793988902820846, + "loss": 4.2066, + "step": 642500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017930503727841304, + "loss": 4.1946, + "step": 643000 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001792111842747415, + "loss": 4.203, + "step": 643500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017911733127107, + "loss": 4.2222, + "step": 644000 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017902347826739845, + "loss": 4.2074, + "step": 644500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017892962526372692, + "loss": 4.2008, + "step": 645000 + }, + { + "epoch": 1.21, + "learning_rate": 0.0001788357722600554, + "loss": 4.1977, + "step": 645500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017874191925638388, + "loss": 4.2094, + "step": 646000 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017864806625271233, + "loss": 4.1833, + "step": 646500 + }, + { + "epoch": 1.21, + "learning_rate": 0.00017855421324904083, + "loss": 4.2017, + "step": 647000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017846036024536928, + "loss": 4.1936, + "step": 647500 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017836650724169773, + "loss": 4.1945, + "step": 648000 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001782726542380262, + "loss": 4.2016, + "step": 648500 + }, + { + "epoch": 1.22, + "learning_rate": 0.0001781788012343547, + "loss": 4.2021, + "step": 649000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017808494823068317, + "loss": 4.2121, + "step": 649500 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017799109522701162, + "loss": 4.2016, + "step": 650000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017789724222334012, + "loss": 4.1895, + "step": 650500 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017780338921966857, + "loss": 4.2204, + "step": 651000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017770953621599702, + "loss": 4.1967, + "step": 651500 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017761568321232553, + "loss": 4.1896, + "step": 652000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00017752183020865398, + "loss": 4.2053, + "step": 652500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017742797720498245, + "loss": 4.217, + "step": 653000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017733412420131093, + "loss": 4.2119, + "step": 653500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017724027119763938, + "loss": 4.1935, + "step": 654000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017714641819396786, + "loss": 4.2168, + "step": 654500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017705256519029634, + "loss": 4.2104, + "step": 655000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017695871218662481, + "loss": 4.2074, + "step": 655500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017686485918295326, + "loss": 4.1953, + "step": 656000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017677100617928177, + "loss": 4.2014, + "step": 656500 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017667715317561022, + "loss": 4.2013, + "step": 657000 + }, + { + "epoch": 1.23, + "learning_rate": 0.00017658330017193867, + "loss": 4.1982, + "step": 657500 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017648944716826717, + "loss": 4.1911, + "step": 658000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017639559416459562, + "loss": 4.2104, + "step": 658500 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001763017411609241, + "loss": 4.2123, + "step": 659000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017620788815725258, + "loss": 4.2062, + "step": 659500 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017611403515358106, + "loss": 4.2148, + "step": 660000 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001760201821499095, + "loss": 4.1897, + "step": 660500 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017592632914623796, + "loss": 4.1842, + "step": 661000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017583247614256646, + "loss": 4.1976, + "step": 661500 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001757386231388949, + "loss": 4.1973, + "step": 662000 + }, + { + "epoch": 1.24, + "learning_rate": 0.0001756447701352234, + "loss": 4.1975, + "step": 662500 + }, + { + "epoch": 1.24, + "learning_rate": 0.00017555091713155187, + "loss": 4.1829, + "step": 663000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017545706412788034, + "loss": 4.2162, + "step": 663500 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001753632111242088, + "loss": 4.1734, + "step": 664000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017526935812053727, + "loss": 4.2041, + "step": 664500 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017517550511686575, + "loss": 4.1929, + "step": 665000 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001750816521131942, + "loss": 4.2058, + "step": 665500 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001749877991095227, + "loss": 4.1985, + "step": 666000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017489394610585115, + "loss": 4.2067, + "step": 666500 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001748000931021796, + "loss": 4.1944, + "step": 667000 + }, + { + "epoch": 1.25, + "learning_rate": 0.0001747062400985081, + "loss": 4.2133, + "step": 667500 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017461238709483656, + "loss": 4.1892, + "step": 668000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00017451853409116504, + "loss": 4.2097, + "step": 668500 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017442468108749351, + "loss": 4.2034, + "step": 669000 + }, + { + "epoch": 1.26, + "learning_rate": 0.000174330828083822, + "loss": 4.1851, + "step": 669500 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017423697508015044, + "loss": 4.212, + "step": 670000 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017414312207647895, + "loss": 4.2136, + "step": 670500 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001740492690728074, + "loss": 4.1989, + "step": 671000 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017395541606913585, + "loss": 4.2107, + "step": 671500 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017386156306546432, + "loss": 4.1927, + "step": 672000 + }, + { + "epoch": 1.26, + "learning_rate": 0.0001737677100617928, + "loss": 4.1823, + "step": 672500 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017367385705812128, + "loss": 4.1967, + "step": 673000 + }, + { + "epoch": 1.26, + "learning_rate": 0.00017358000405444973, + "loss": 4.1905, + "step": 673500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017348615105077823, + "loss": 4.2168, + "step": 674000 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017339229804710668, + "loss": 4.1814, + "step": 674500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017329844504343514, + "loss": 4.2118, + "step": 675000 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017320459203976364, + "loss": 4.2121, + "step": 675500 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001731107390360921, + "loss": 4.192, + "step": 676000 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017301688603242057, + "loss": 4.1885, + "step": 676500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017292303302874904, + "loss": 4.2103, + "step": 677000 + }, + { + "epoch": 1.27, + "learning_rate": 0.0001728291800250775, + "loss": 4.1964, + "step": 677500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017273532702140597, + "loss": 4.1836, + "step": 678000 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017264147401773445, + "loss": 4.2167, + "step": 678500 + }, + { + "epoch": 1.27, + "learning_rate": 0.00017254762101406293, + "loss": 4.1908, + "step": 679000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017245376801039138, + "loss": 4.2083, + "step": 679500 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017235991500671988, + "loss": 4.2022, + "step": 680000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017226606200304833, + "loss": 4.1904, + "step": 680500 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017217220899937678, + "loss": 4.2128, + "step": 681000 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001720783559957053, + "loss": 4.2046, + "step": 681500 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017198450299203374, + "loss": 4.2045, + "step": 682000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017189064998836222, + "loss": 4.2121, + "step": 682500 + }, + { + "epoch": 1.28, + "learning_rate": 0.0001717967969846907, + "loss": 4.1857, + "step": 683000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017170294398101917, + "loss": 4.2036, + "step": 683500 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017160909097734762, + "loss": 4.2028, + "step": 684000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00017151523797367607, + "loss": 4.1933, + "step": 684500 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017142138497000458, + "loss": 4.1952, + "step": 685000 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017132753196633303, + "loss": 4.1993, + "step": 685500 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001712336789626615, + "loss": 4.187, + "step": 686000 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017113982595898998, + "loss": 4.2031, + "step": 686500 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017104597295531846, + "loss": 4.198, + "step": 687000 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001709521199516469, + "loss": 4.2016, + "step": 687500 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017085826694797539, + "loss": 4.2017, + "step": 688000 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017076441394430386, + "loss": 4.1881, + "step": 688500 + }, + { + "epoch": 1.29, + "learning_rate": 0.0001706705609406323, + "loss": 4.1924, + "step": 689000 + }, + { + "epoch": 1.29, + "learning_rate": 0.00017057670793696082, + "loss": 4.1897, + "step": 689500 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017048285493328927, + "loss": 4.1903, + "step": 690000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017038900192961772, + "loss": 4.1983, + "step": 690500 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017029514892594622, + "loss": 4.199, + "step": 691000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017020129592227467, + "loss": 4.216, + "step": 691500 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017010744291860315, + "loss": 4.1864, + "step": 692000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00017001358991493163, + "loss": 4.1839, + "step": 692500 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001699197369112601, + "loss": 4.2082, + "step": 693000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016982588390758856, + "loss": 4.1988, + "step": 693500 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016973203090391706, + "loss": 4.2104, + "step": 694000 + }, + { + "epoch": 1.3, + "learning_rate": 0.0001696381779002455, + "loss": 4.1891, + "step": 694500 + }, + { + "epoch": 1.3, + "learning_rate": 0.00016954432489657396, + "loss": 4.1901, + "step": 695000 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016945047189290247, + "loss": 4.1977, + "step": 695500 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016935661888923092, + "loss": 4.1946, + "step": 696000 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001692627658855594, + "loss": 4.1983, + "step": 696500 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016916891288188784, + "loss": 4.1952, + "step": 697000 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016907505987821635, + "loss": 4.2005, + "step": 697500 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001689812068745448, + "loss": 4.1933, + "step": 698000 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016888735387087325, + "loss": 4.2106, + "step": 698500 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016879350086720175, + "loss": 4.1811, + "step": 699000 + }, + { + "epoch": 1.31, + "learning_rate": 0.0001686996478635302, + "loss": 4.1948, + "step": 699500 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016860579485985865, + "loss": 4.2029, + "step": 700000 + }, + { + "epoch": 1.31, + "learning_rate": 0.00016851194185618716, + "loss": 4.1943, + "step": 700500 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001684180888525156, + "loss": 4.185, + "step": 701000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016832423584884409, + "loss": 4.1702, + "step": 701500 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016823038284517256, + "loss": 4.1864, + "step": 702000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016813652984150104, + "loss": 4.1979, + "step": 702500 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001680426768378295, + "loss": 4.1825, + "step": 703000 + }, + { + "epoch": 1.32, + "learning_rate": 0.000167948823834158, + "loss": 4.2068, + "step": 703500 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016785497083048645, + "loss": 4.1958, + "step": 704000 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001677611178268149, + "loss": 4.1965, + "step": 704500 + }, + { + "epoch": 1.32, + "learning_rate": 0.0001676672648231434, + "loss": 4.1962, + "step": 705000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00016757341181947185, + "loss": 4.1855, + "step": 705500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016747955881580033, + "loss": 4.1735, + "step": 706000 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001673857058121288, + "loss": 4.1977, + "step": 706500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016729185280845728, + "loss": 4.1997, + "step": 707000 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016719799980478573, + "loss": 4.1971, + "step": 707500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016710414680111424, + "loss": 4.1821, + "step": 708000 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001670102937974427, + "loss": 4.1996, + "step": 708500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016691644079377114, + "loss": 4.1816, + "step": 709000 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016682258779009962, + "loss": 4.1945, + "step": 709500 + }, + { + "epoch": 1.33, + "learning_rate": 0.0001667287347864281, + "loss": 4.2, + "step": 710000 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016663488178275654, + "loss": 4.1884, + "step": 710500 + }, + { + "epoch": 1.33, + "learning_rate": 0.00016654102877908502, + "loss": 4.2082, + "step": 711000 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001664471757754135, + "loss": 4.1925, + "step": 711500 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016635332277174198, + "loss": 4.1997, + "step": 712000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016625946976807043, + "loss": 4.178, + "step": 712500 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016616561676439893, + "loss": 4.1777, + "step": 713000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016607176376072738, + "loss": 4.1983, + "step": 713500 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016597791075705583, + "loss": 4.1841, + "step": 714000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016588405775338434, + "loss": 4.192, + "step": 714500 + }, + { + "epoch": 1.34, + "learning_rate": 0.0001657902047497128, + "loss": 4.1998, + "step": 715000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016569635174604126, + "loss": 4.1924, + "step": 715500 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016560249874236974, + "loss": 4.1828, + "step": 716000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00016550864573869822, + "loss": 4.1877, + "step": 716500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016541479273502667, + "loss": 4.182, + "step": 717000 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016532093973135517, + "loss": 4.1934, + "step": 717500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016522708672768362, + "loss": 4.21, + "step": 718000 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016513323372401207, + "loss": 4.1916, + "step": 718500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016503938072034058, + "loss": 4.1703, + "step": 719000 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016494552771666903, + "loss": 4.1835, + "step": 719500 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001648516747129975, + "loss": 4.1846, + "step": 720000 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016475782170932596, + "loss": 4.1917, + "step": 720500 + }, + { + "epoch": 1.35, + "learning_rate": 0.00016466396870565443, + "loss": 4.1941, + "step": 721000 + }, + { + "epoch": 1.35, + "learning_rate": 0.0001645701157019829, + "loss": 4.1991, + "step": 721500 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016447626269831136, + "loss": 4.1985, + "step": 722000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016438240969463987, + "loss": 4.193, + "step": 722500 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016428855669096832, + "loss": 4.1767, + "step": 723000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016419470368729677, + "loss": 4.1779, + "step": 723500 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016410085068362527, + "loss": 4.2041, + "step": 724000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016400699767995372, + "loss": 4.1821, + "step": 724500 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001639131446762822, + "loss": 4.2033, + "step": 725000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016381929167261068, + "loss": 4.1786, + "step": 725500 + }, + { + "epoch": 1.36, + "learning_rate": 0.00016372543866893915, + "loss": 4.1741, + "step": 726000 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001636315856652676, + "loss": 4.2011, + "step": 726500 + }, + { + "epoch": 1.36, + "learning_rate": 0.0001635377326615961, + "loss": 4.1889, + "step": 727000 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016344387965792456, + "loss": 4.1788, + "step": 727500 + }, + { + "epoch": 1.37, + "learning_rate": 0.000163350026654253, + "loss": 4.1782, + "step": 728000 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016325617365058151, + "loss": 4.1972, + "step": 728500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016316232064690996, + "loss": 4.1815, + "step": 729000 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016306846764323844, + "loss": 4.1941, + "step": 729500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016297461463956692, + "loss": 4.2002, + "step": 730000 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001628807616358954, + "loss": 4.1908, + "step": 730500 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016278690863222385, + "loss": 4.1763, + "step": 731000 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016269305562855232, + "loss": 4.1936, + "step": 731500 + }, + { + "epoch": 1.37, + "learning_rate": 0.0001625992026248808, + "loss": 4.1784, + "step": 732000 + }, + { + "epoch": 1.37, + "learning_rate": 0.00016250534962120925, + "loss": 4.1885, + "step": 732500 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016241149661753773, + "loss": 4.1771, + "step": 733000 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001623176436138662, + "loss": 4.191, + "step": 733500 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016222379061019466, + "loss": 4.1733, + "step": 734000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016212993760652313, + "loss": 4.1905, + "step": 734500 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001620360846028516, + "loss": 4.1791, + "step": 735000 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001619422315991801, + "loss": 4.1753, + "step": 735500 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016184837859550854, + "loss": 4.1822, + "step": 736000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016175452559183704, + "loss": 4.1787, + "step": 736500 + }, + { + "epoch": 1.38, + "learning_rate": 0.0001616606725881655, + "loss": 4.1932, + "step": 737000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00016156681958449394, + "loss": 4.1827, + "step": 737500 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016147296658082245, + "loss": 4.1915, + "step": 738000 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001613791135771509, + "loss": 4.1811, + "step": 738500 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016128526057347938, + "loss": 4.1798, + "step": 739000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016119140756980785, + "loss": 4.1899, + "step": 739500 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016109755456613633, + "loss": 4.2125, + "step": 740000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016100370156246478, + "loss": 4.1874, + "step": 740500 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001609098485587933, + "loss": 4.1687, + "step": 741000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016081599555512174, + "loss": 4.1913, + "step": 741500 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001607221425514502, + "loss": 4.1759, + "step": 742000 + }, + { + "epoch": 1.39, + "learning_rate": 0.0001606282895477787, + "loss": 4.1903, + "step": 742500 + }, + { + "epoch": 1.39, + "learning_rate": 0.00016053443654410714, + "loss": 4.1879, + "step": 743000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016044058354043562, + "loss": 4.1911, + "step": 743500 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001603467305367641, + "loss": 4.1772, + "step": 744000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016025287753309255, + "loss": 4.2081, + "step": 744500 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016015902452942102, + "loss": 4.1775, + "step": 745000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00016006517152574948, + "loss": 4.2105, + "step": 745500 + }, + { + "epoch": 1.4, + "learning_rate": 0.00015997131852207798, + "loss": 4.193, + "step": 746000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00015987746551840643, + "loss": 4.1761, + "step": 746500 + }, + { + "epoch": 1.4, + "learning_rate": 0.00015978361251473488, + "loss": 4.1868, + "step": 747000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00015968975951106338, + "loss": 4.171, + "step": 747500 + }, + { + "epoch": 1.4, + "learning_rate": 0.00015959590650739184, + "loss": 4.1779, + "step": 748000 + }, + { + "epoch": 1.4, + "learning_rate": 0.0001595020535037203, + "loss": 4.1982, + "step": 748500 + }, + { + "epoch": 1.41, + "learning_rate": 0.0001594082005000488, + "loss": 4.186, + "step": 749000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015931434749637727, + "loss": 4.1693, + "step": 749500 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015922049449270572, + "loss": 4.1854, + "step": 750000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015912664148903422, + "loss": 4.1853, + "step": 750500 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015903278848536267, + "loss": 4.1884, + "step": 751000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015893893548169112, + "loss": 4.1863, + "step": 751500 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015884508247801963, + "loss": 4.192, + "step": 752000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015875122947434808, + "loss": 4.1714, + "step": 752500 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015865737647067656, + "loss": 4.1813, + "step": 753000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00015856352346700503, + "loss": 4.1751, + "step": 753500 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001584696704633335, + "loss": 4.1841, + "step": 754000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015837581745966196, + "loss": 4.1959, + "step": 754500 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015828196445599044, + "loss": 4.1777, + "step": 755000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015818811145231892, + "loss": 4.1776, + "step": 755500 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015809425844864737, + "loss": 4.1805, + "step": 756000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015800040544497584, + "loss": 4.1792, + "step": 756500 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015790655244130432, + "loss": 4.1975, + "step": 757000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015781269943763277, + "loss": 4.1828, + "step": 757500 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015771884643396125, + "loss": 4.182, + "step": 758000 + }, + { + "epoch": 1.42, + "learning_rate": 0.00015762499343028973, + "loss": 4.1866, + "step": 758500 + }, + { + "epoch": 1.42, + "learning_rate": 0.0001575311404266182, + "loss": 4.1729, + "step": 759000 + }, + { + "epoch": 1.43, + "learning_rate": 0.00015743728742294665, + "loss": 4.1691, + "step": 759500 + }, + { + "epoch": 1.43, + "learning_rate": 0.00015734343441927516, + "loss": 4.1738, + "step": 760000 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001572495814156036, + "loss": 4.186, + "step": 760500 + }, + { + "epoch": 1.43, + "learning_rate": 0.00015715572841193206, + "loss": 4.1774, + "step": 761000 + }, + { + "epoch": 1.43, + "learning_rate": 0.00015706187540826056, + "loss": 4.1862, + "step": 761500 + }, + { + "epoch": 1.43, + "learning_rate": 0.000156968022404589, + "loss": 4.1813, + "step": 762000 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001568741694009175, + "loss": 4.1995, + "step": 762500 + }, + { + "epoch": 1.43, + "learning_rate": 0.00015678031639724597, + "loss": 4.1895, + "step": 763000 + }, + { + "epoch": 1.43, + "learning_rate": 0.00015668646339357445, + "loss": 4.1856, + "step": 763500 + }, + { + "epoch": 1.43, + "learning_rate": 0.0001565926103899029, + "loss": 4.1886, + "step": 764000 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001564987573862314, + "loss": 4.185, + "step": 764500 + }, + { + "epoch": 1.44, + "learning_rate": 0.00015640490438255985, + "loss": 4.1698, + "step": 765000 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001563110513788883, + "loss": 4.1734, + "step": 765500 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001562171983752168, + "loss": 4.1807, + "step": 766000 + }, + { + "epoch": 1.44, + "learning_rate": 0.00015612334537154526, + "loss": 4.1649, + "step": 766500 + }, + { + "epoch": 1.44, + "learning_rate": 0.00015602949236787373, + "loss": 4.1836, + "step": 767000 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001559356393642022, + "loss": 4.1665, + "step": 767500 + }, + { + "epoch": 1.44, + "learning_rate": 0.00015584178636053066, + "loss": 4.1743, + "step": 768000 + }, + { + "epoch": 1.44, + "learning_rate": 0.00015574793335685914, + "loss": 4.1779, + "step": 768500 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001556540803531876, + "loss": 4.1824, + "step": 769000 + }, + { + "epoch": 1.44, + "learning_rate": 0.0001555602273495161, + "loss": 4.19, + "step": 769500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015546637434584454, + "loss": 4.1761, + "step": 770000 + }, + { + "epoch": 1.45, + "learning_rate": 0.000155372521342173, + "loss": 4.1614, + "step": 770500 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001552786683385015, + "loss": 4.1856, + "step": 771000 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015518481533482995, + "loss": 4.1783, + "step": 771500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015509096233115843, + "loss": 4.1799, + "step": 772000 + }, + { + "epoch": 1.45, + "learning_rate": 0.0001549971093274869, + "loss": 4.1716, + "step": 772500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015490325632381538, + "loss": 4.1699, + "step": 773000 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015480940332014383, + "loss": 4.181, + "step": 773500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015471555031647234, + "loss": 4.1692, + "step": 774000 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015462169731280079, + "loss": 4.1682, + "step": 774500 + }, + { + "epoch": 1.45, + "learning_rate": 0.00015452784430912924, + "loss": 4.1863, + "step": 775000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015443399130545774, + "loss": 4.1728, + "step": 775500 + }, + { + "epoch": 1.46, + "learning_rate": 0.0001543401383017862, + "loss": 4.1685, + "step": 776000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015424628529811467, + "loss": 4.1935, + "step": 776500 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015415243229444315, + "loss": 4.1843, + "step": 777000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015405857929077162, + "loss": 4.1879, + "step": 777500 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015396472628710007, + "loss": 4.1884, + "step": 778000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015387087328342855, + "loss": 4.1629, + "step": 778500 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015377702027975703, + "loss": 4.1926, + "step": 779000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015368316727608548, + "loss": 4.174, + "step": 779500 + }, + { + "epoch": 1.46, + "learning_rate": 0.00015358931427241398, + "loss": 4.1655, + "step": 780000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015349546126874243, + "loss": 4.1828, + "step": 780500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015340160826507088, + "loss": 4.1699, + "step": 781000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015330775526139936, + "loss": 4.1737, + "step": 781500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015321390225772784, + "loss": 4.1799, + "step": 782000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015312004925405632, + "loss": 4.1621, + "step": 782500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015302619625038477, + "loss": 4.1555, + "step": 783000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015293234324671327, + "loss": 4.1955, + "step": 783500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015283849024304172, + "loss": 4.1794, + "step": 784000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015274463723937017, + "loss": 4.1778, + "step": 784500 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015265078423569868, + "loss": 4.1704, + "step": 785000 + }, + { + "epoch": 1.47, + "learning_rate": 0.00015255693123202713, + "loss": 4.1734, + "step": 785500 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001524630782283556, + "loss": 4.1817, + "step": 786000 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015236922522468408, + "loss": 4.172, + "step": 786500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015227537222101256, + "loss": 4.168, + "step": 787000 + }, + { + "epoch": 1.48, + "learning_rate": 0.000152181519217341, + "loss": 4.1887, + "step": 787500 + }, + { + "epoch": 1.48, + "learning_rate": 0.0001520876662136695, + "loss": 4.1689, + "step": 788000 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015199381320999796, + "loss": 4.1787, + "step": 788500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015189996020632641, + "loss": 4.1706, + "step": 789000 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015180610720265492, + "loss": 4.1888, + "step": 789500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015171225419898337, + "loss": 4.1842, + "step": 790000 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015161840119531185, + "loss": 4.1832, + "step": 790500 + }, + { + "epoch": 1.48, + "learning_rate": 0.00015152454819164032, + "loss": 4.165, + "step": 791000 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015143069518796877, + "loss": 4.1736, + "step": 791500 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015133684218429725, + "loss": 4.1654, + "step": 792000 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015124298918062573, + "loss": 4.1936, + "step": 792500 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001511491361769542, + "loss": 4.1598, + "step": 793000 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015105528317328266, + "loss": 4.1692, + "step": 793500 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001509614301696111, + "loss": 4.1788, + "step": 794000 + }, + { + "epoch": 1.49, + "learning_rate": 0.0001508675771659396, + "loss": 4.1794, + "step": 794500 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015077372416226806, + "loss": 4.1873, + "step": 795000 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015067987115859654, + "loss": 4.1755, + "step": 795500 + }, + { + "epoch": 1.49, + "learning_rate": 0.00015058601815492502, + "loss": 4.1757, + "step": 796000 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001504921651512535, + "loss": 4.1948, + "step": 796500 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015039831214758194, + "loss": 4.1691, + "step": 797000 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015030445914391045, + "loss": 4.1865, + "step": 797500 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001502106061402389, + "loss": 4.1511, + "step": 798000 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015011675313656735, + "loss": 4.1769, + "step": 798500 + }, + { + "epoch": 1.5, + "learning_rate": 0.00015002290013289585, + "loss": 4.1835, + "step": 799000 + }, + { + "epoch": 1.5, + "learning_rate": 0.0001499290471292243, + "loss": 4.1644, + "step": 799500 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014983519412555278, + "loss": 4.1547, + "step": 800000 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014974134112188126, + "loss": 4.1744, + "step": 800500 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014964748811820974, + "loss": 4.1791, + "step": 801000 + }, + { + "epoch": 1.5, + "learning_rate": 0.00014955363511453821, + "loss": 4.1763, + "step": 801500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014945978211086666, + "loss": 4.1831, + "step": 802000 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014936592910719514, + "loss": 4.1795, + "step": 802500 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001492720761035236, + "loss": 4.1889, + "step": 803000 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014917822309985207, + "loss": 4.1814, + "step": 803500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014908437009618055, + "loss": 4.1669, + "step": 804000 + }, + { + "epoch": 1.51, + "learning_rate": 0.000148990517092509, + "loss": 4.1695, + "step": 804500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014889666408883747, + "loss": 4.1616, + "step": 805000 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014880281108516595, + "loss": 4.1777, + "step": 805500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014870895808149443, + "loss": 4.1715, + "step": 806000 + }, + { + "epoch": 1.51, + "learning_rate": 0.0001486151050778229, + "loss": 4.181, + "step": 806500 + }, + { + "epoch": 1.51, + "learning_rate": 0.00014852125207415138, + "loss": 4.1551, + "step": 807000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014842739907047983, + "loss": 4.1772, + "step": 807500 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001483335460668083, + "loss": 4.1738, + "step": 808000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014823969306313676, + "loss": 4.1522, + "step": 808500 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014814584005946524, + "loss": 4.1626, + "step": 809000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014805198705579372, + "loss": 4.164, + "step": 809500 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001479581340521222, + "loss": 4.1861, + "step": 810000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014786428104845067, + "loss": 4.161, + "step": 810500 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014777042804477915, + "loss": 4.1602, + "step": 811000 + }, + { + "epoch": 1.52, + "learning_rate": 0.0001476765750411076, + "loss": 4.1881, + "step": 811500 + }, + { + "epoch": 1.52, + "learning_rate": 0.00014758272203743608, + "loss": 4.1601, + "step": 812000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014748886903376455, + "loss": 4.1681, + "step": 812500 + }, + { + "epoch": 1.53, + "learning_rate": 0.000147395016030093, + "loss": 4.1703, + "step": 813000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014730116302642148, + "loss": 4.1818, + "step": 813500 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014720731002274996, + "loss": 4.154, + "step": 814000 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001471134570190784, + "loss": 4.1879, + "step": 814500 + }, + { + "epoch": 1.53, + "learning_rate": 0.0001470196040154069, + "loss": 4.1641, + "step": 815000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014692575101173536, + "loss": 4.1616, + "step": 815500 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014683189800806384, + "loss": 4.18, + "step": 816000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014673804500439232, + "loss": 4.1826, + "step": 816500 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014664419200072077, + "loss": 4.164, + "step": 817000 + }, + { + "epoch": 1.53, + "learning_rate": 0.00014655033899704925, + "loss": 4.1907, + "step": 817500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014645648599337772, + "loss": 4.1632, + "step": 818000 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014636263298970618, + "loss": 4.1794, + "step": 818500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014626877998603465, + "loss": 4.1945, + "step": 819000 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014617492698236313, + "loss": 4.1681, + "step": 819500 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001460810739786916, + "loss": 4.1524, + "step": 820000 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014598722097502008, + "loss": 4.1832, + "step": 820500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014589336797134854, + "loss": 4.1577, + "step": 821000 + }, + { + "epoch": 1.54, + "learning_rate": 0.000145799514967677, + "loss": 4.1752, + "step": 821500 + }, + { + "epoch": 1.54, + "learning_rate": 0.0001457056619640055, + "loss": 4.1663, + "step": 822000 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014561180896033394, + "loss": 4.1744, + "step": 822500 + }, + { + "epoch": 1.54, + "learning_rate": 0.00014551795595666242, + "loss": 4.1754, + "step": 823000 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001454241029529909, + "loss": 4.1776, + "step": 823500 + }, + { + "epoch": 1.55, + "learning_rate": 0.00014533024994931937, + "loss": 4.195, + "step": 824000 + }, + { + "epoch": 1.55, + "learning_rate": 0.00014523639694564785, + "loss": 4.1709, + "step": 824500 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001451425439419763, + "loss": 4.176, + "step": 825000 + }, + { + "epoch": 1.55, + "learning_rate": 0.00014504869093830478, + "loss": 4.1478, + "step": 825500 + }, + { + "epoch": 1.55, + "learning_rate": 0.00014495483793463326, + "loss": 4.1835, + "step": 826000 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001448609849309617, + "loss": 4.1651, + "step": 826500 + }, + { + "epoch": 1.55, + "learning_rate": 0.00014476713192729018, + "loss": 4.1713, + "step": 827000 + }, + { + "epoch": 1.55, + "learning_rate": 0.00014467327892361866, + "loss": 4.1648, + "step": 827500 + }, + { + "epoch": 1.55, + "learning_rate": 0.0001445794259199471, + "loss": 4.151, + "step": 828000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001444855729162756, + "loss": 4.1574, + "step": 828500 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014439171991260407, + "loss": 4.1701, + "step": 829000 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014429786690893254, + "loss": 4.1571, + "step": 829500 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014420401390526102, + "loss": 4.1607, + "step": 830000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001441101609015895, + "loss": 4.164, + "step": 830500 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014401630789791795, + "loss": 4.1665, + "step": 831000 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014392245489424643, + "loss": 4.1664, + "step": 831500 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001438286018905749, + "loss": 4.167, + "step": 832000 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014373474888690335, + "loss": 4.1545, + "step": 832500 + }, + { + "epoch": 1.56, + "learning_rate": 0.00014364089588323183, + "loss": 4.1721, + "step": 833000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0001435470428795603, + "loss": 4.166, + "step": 833500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014345318987588879, + "loss": 4.1592, + "step": 834000 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014335933687221726, + "loss": 4.1648, + "step": 834500 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001432654838685457, + "loss": 4.1613, + "step": 835000 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001431716308648742, + "loss": 4.1674, + "step": 835500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014307777786120267, + "loss": 4.1785, + "step": 836000 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014298392485753112, + "loss": 4.1788, + "step": 836500 + }, + { + "epoch": 1.57, + "learning_rate": 0.0001428900718538596, + "loss": 4.1522, + "step": 837000 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014279621885018807, + "loss": 4.1723, + "step": 837500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014270236584651652, + "loss": 4.1717, + "step": 838000 + }, + { + "epoch": 1.57, + "learning_rate": 0.000142608512842845, + "loss": 4.1589, + "step": 838500 + }, + { + "epoch": 1.57, + "learning_rate": 0.00014251465983917348, + "loss": 4.1685, + "step": 839000 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014242080683550196, + "loss": 4.1608, + "step": 839500 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014232695383183043, + "loss": 4.157, + "step": 840000 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014223310082815888, + "loss": 4.1499, + "step": 840500 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014213924782448736, + "loss": 4.1432, + "step": 841000 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014204539482081584, + "loss": 4.1689, + "step": 841500 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001419515418171443, + "loss": 4.1645, + "step": 842000 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014185768881347277, + "loss": 4.1648, + "step": 842500 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014176383580980124, + "loss": 4.1825, + "step": 843000 + }, + { + "epoch": 1.58, + "learning_rate": 0.00014166998280612972, + "loss": 4.1707, + "step": 843500 + }, + { + "epoch": 1.58, + "learning_rate": 0.0001415761298024582, + "loss": 4.1767, + "step": 844000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014148227679878665, + "loss": 4.1638, + "step": 844500 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014138842379511513, + "loss": 4.1606, + "step": 845000 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001412945707914436, + "loss": 4.1743, + "step": 845500 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014120071778777205, + "loss": 4.1715, + "step": 846000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014110686478410053, + "loss": 4.16, + "step": 846500 + }, + { + "epoch": 1.59, + "learning_rate": 0.000141013011780429, + "loss": 4.1645, + "step": 847000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014091915877675749, + "loss": 4.1855, + "step": 847500 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014082530577308596, + "loss": 4.1694, + "step": 848000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014073145276941441, + "loss": 4.1787, + "step": 848500 + }, + { + "epoch": 1.59, + "learning_rate": 0.0001406375997657429, + "loss": 4.153, + "step": 849000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00014054374676207137, + "loss": 4.1593, + "step": 849500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00014044989375839985, + "loss": 4.1566, + "step": 850000 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001403560407547283, + "loss": 4.1693, + "step": 850500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00014026218775105677, + "loss": 4.1543, + "step": 851000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00014016833474738522, + "loss": 4.1569, + "step": 851500 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001400744817437137, + "loss": 4.1569, + "step": 852000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013998062874004218, + "loss": 4.1807, + "step": 852500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013988677573637066, + "loss": 4.1469, + "step": 853000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013979292273269913, + "loss": 4.1802, + "step": 853500 + }, + { + "epoch": 1.6, + "learning_rate": 0.0001396990697290276, + "loss": 4.1752, + "step": 854000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013960521672535606, + "loss": 4.1669, + "step": 854500 + }, + { + "epoch": 1.6, + "learning_rate": 0.00013951136372168454, + "loss": 4.1645, + "step": 855000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013941751071801302, + "loss": 4.1575, + "step": 855500 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013932365771434147, + "loss": 4.1702, + "step": 856000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013922980471066994, + "loss": 4.142, + "step": 856500 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013913595170699842, + "loss": 4.149, + "step": 857000 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001390420987033269, + "loss": 4.144, + "step": 857500 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013894824569965538, + "loss": 4.1731, + "step": 858000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013885439269598383, + "loss": 4.1451, + "step": 858500 + }, + { + "epoch": 1.61, + "learning_rate": 0.0001387605396923123, + "loss": 4.1602, + "step": 859000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013866668668864078, + "loss": 4.1705, + "step": 859500 + }, + { + "epoch": 1.61, + "learning_rate": 0.00013857283368496923, + "loss": 4.1884, + "step": 860000 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001384789806812977, + "loss": 4.1839, + "step": 860500 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013838512767762619, + "loss": 4.1571, + "step": 861000 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013829127467395464, + "loss": 4.1432, + "step": 861500 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013819742167028311, + "loss": 4.1425, + "step": 862000 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001381035686666116, + "loss": 4.1456, + "step": 862500 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013800971566294007, + "loss": 4.1528, + "step": 863000 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013791586265926855, + "loss": 4.1685, + "step": 863500 + }, + { + "epoch": 1.62, + "learning_rate": 0.000137822009655597, + "loss": 4.1539, + "step": 864000 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013772815665192547, + "loss": 4.171, + "step": 864500 + }, + { + "epoch": 1.62, + "learning_rate": 0.00013763430364825395, + "loss": 4.1472, + "step": 865000 + }, + { + "epoch": 1.62, + "learning_rate": 0.0001375404506445824, + "loss": 4.1517, + "step": 865500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013744659764091088, + "loss": 4.1509, + "step": 866000 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013735274463723936, + "loss": 4.1623, + "step": 866500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013725889163356783, + "loss": 4.1633, + "step": 867000 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001371650386298963, + "loss": 4.1658, + "step": 867500 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001370711856262248, + "loss": 4.1715, + "step": 868000 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013697733262255324, + "loss": 4.1552, + "step": 868500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013688347961888172, + "loss": 4.1719, + "step": 869000 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013678962661521017, + "loss": 4.1733, + "step": 869500 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013669577361153864, + "loss": 4.1662, + "step": 870000 + }, + { + "epoch": 1.63, + "learning_rate": 0.00013660192060786712, + "loss": 4.1711, + "step": 870500 + }, + { + "epoch": 1.63, + "learning_rate": 0.0001365080676041956, + "loss": 4.1749, + "step": 871000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013641421460052408, + "loss": 4.1627, + "step": 871500 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013632036159685253, + "loss": 4.1471, + "step": 872000 + }, + { + "epoch": 1.64, + "learning_rate": 0.000136226508593181, + "loss": 4.1572, + "step": 872500 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013613265558950948, + "loss": 4.1741, + "step": 873000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013603880258583796, + "loss": 4.1635, + "step": 873500 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001359449495821664, + "loss": 4.1609, + "step": 874000 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001358510965784949, + "loss": 4.1593, + "step": 874500 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013575724357482334, + "loss": 4.1554, + "step": 875000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00013566339057115181, + "loss": 4.1531, + "step": 875500 + }, + { + "epoch": 1.64, + "learning_rate": 0.0001355695375674803, + "loss": 4.1634, + "step": 876000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013547568456380877, + "loss": 4.1664, + "step": 876500 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013538183156013725, + "loss": 4.143, + "step": 877000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013528797855646572, + "loss": 4.1369, + "step": 877500 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013519412555279417, + "loss": 4.149, + "step": 878000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013510027254912265, + "loss": 4.1502, + "step": 878500 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013500641954545113, + "loss": 4.1547, + "step": 879000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013491256654177958, + "loss": 4.1675, + "step": 879500 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013481871353810806, + "loss": 4.1688, + "step": 880000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00013472486053443653, + "loss": 4.1762, + "step": 880500 + }, + { + "epoch": 1.65, + "learning_rate": 0.000134631007530765, + "loss": 4.1329, + "step": 881000 + }, + { + "epoch": 1.65, + "learning_rate": 0.0001345371545270935, + "loss": 4.1668, + "step": 881500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013444330152342194, + "loss": 4.1542, + "step": 882000 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013434944851975042, + "loss": 4.1452, + "step": 882500 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001342555955160789, + "loss": 4.1658, + "step": 883000 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013416174251240734, + "loss": 4.1588, + "step": 883500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013406788950873582, + "loss": 4.1566, + "step": 884000 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001339740365050643, + "loss": 4.1473, + "step": 884500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013388018350139275, + "loss": 4.1476, + "step": 885000 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013378633049772123, + "loss": 4.1725, + "step": 885500 + }, + { + "epoch": 1.66, + "learning_rate": 0.0001336924774940497, + "loss": 4.1553, + "step": 886000 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013359862449037818, + "loss": 4.161, + "step": 886500 + }, + { + "epoch": 1.66, + "learning_rate": 0.00013350477148670666, + "loss": 4.1336, + "step": 887000 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001334109184830351, + "loss": 4.1471, + "step": 887500 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001333170654793636, + "loss": 4.1533, + "step": 888000 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013322321247569206, + "loss": 4.1537, + "step": 888500 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013312935947202052, + "loss": 4.1666, + "step": 889000 + }, + { + "epoch": 1.67, + "learning_rate": 0.000133035506468349, + "loss": 4.1584, + "step": 889500 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013294165346467747, + "loss": 4.1578, + "step": 890000 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013284780046100595, + "loss": 4.1589, + "step": 890500 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013275394745733442, + "loss": 4.1773, + "step": 891000 + }, + { + "epoch": 1.67, + "learning_rate": 0.0001326600944536629, + "loss": 4.1608, + "step": 891500 + }, + { + "epoch": 1.67, + "learning_rate": 0.00013256624144999135, + "loss": 4.1466, + "step": 892000 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013247238844631983, + "loss": 4.1565, + "step": 892500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013237853544264828, + "loss": 4.1605, + "step": 893000 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013228468243897676, + "loss": 4.1482, + "step": 893500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013219082943530524, + "loss": 4.1617, + "step": 894000 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001320969764316337, + "loss": 4.1526, + "step": 894500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013200312342796216, + "loss": 4.1671, + "step": 895000 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013190927042429064, + "loss": 4.1766, + "step": 895500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013181541742061912, + "loss": 4.1628, + "step": 896000 + }, + { + "epoch": 1.68, + "learning_rate": 0.0001317215644169476, + "loss": 4.1754, + "step": 896500 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013162771141327607, + "loss": 4.1571, + "step": 897000 + }, + { + "epoch": 1.68, + "learning_rate": 0.00013153385840960452, + "loss": 4.1459, + "step": 897500 + }, + { + "epoch": 1.69, + "learning_rate": 0.000131440005405933, + "loss": 4.1665, + "step": 898000 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013134615240226148, + "loss": 4.1266, + "step": 898500 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013125229939858993, + "loss": 4.1453, + "step": 899000 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001311584463949184, + "loss": 4.1517, + "step": 899500 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013106459339124688, + "loss": 4.1438, + "step": 900000 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013097074038757536, + "loss": 4.1525, + "step": 900500 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013087688738390384, + "loss": 4.1584, + "step": 901000 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001307830343802323, + "loss": 4.1575, + "step": 901500 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013068918137656077, + "loss": 4.146, + "step": 902000 + }, + { + "epoch": 1.69, + "learning_rate": 0.00013059532837288924, + "loss": 4.1607, + "step": 902500 + }, + { + "epoch": 1.69, + "learning_rate": 0.0001305014753692177, + "loss": 4.1482, + "step": 903000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013040762236554617, + "loss": 4.1457, + "step": 903500 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013031376936187465, + "loss": 4.1477, + "step": 904000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013021991635820313, + "loss": 4.1523, + "step": 904500 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001301260633545316, + "loss": 4.1349, + "step": 905000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00013003221035086005, + "loss": 4.147, + "step": 905500 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012993835734718853, + "loss": 4.1542, + "step": 906000 + }, + { + "epoch": 1.7, + "learning_rate": 0.000129844504343517, + "loss": 4.1514, + "step": 906500 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012975065133984546, + "loss": 4.1542, + "step": 907000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00012965679833617394, + "loss": 4.1556, + "step": 907500 + }, + { + "epoch": 1.7, + "learning_rate": 0.0001295629453325024, + "loss": 4.1406, + "step": 908000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012946909232883086, + "loss": 4.1554, + "step": 908500 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012937523932515934, + "loss": 4.1343, + "step": 909000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012928138632148782, + "loss": 4.1464, + "step": 909500 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001291875333178163, + "loss": 4.1572, + "step": 910000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012909368031414477, + "loss": 4.1512, + "step": 910500 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012899982731047322, + "loss": 4.1333, + "step": 911000 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001289059743068017, + "loss": 4.1432, + "step": 911500 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012881212130313018, + "loss": 4.152, + "step": 912000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012871826829945863, + "loss": 4.1375, + "step": 912500 + }, + { + "epoch": 1.71, + "learning_rate": 0.0001286244152957871, + "loss": 4.1629, + "step": 913000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00012853056229211558, + "loss": 4.1557, + "step": 913500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012843670928844406, + "loss": 4.149, + "step": 914000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012834285628477254, + "loss": 4.1455, + "step": 914500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012824900328110102, + "loss": 4.1546, + "step": 915000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012815515027742947, + "loss": 4.1494, + "step": 915500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012806129727375794, + "loss": 4.1478, + "step": 916000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012796744427008642, + "loss": 4.1756, + "step": 916500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012787359126641487, + "loss": 4.1373, + "step": 917000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012777973826274335, + "loss": 4.1653, + "step": 917500 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012768588525907183, + "loss": 4.1449, + "step": 918000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00012759203225540028, + "loss": 4.1569, + "step": 918500 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012749817925172875, + "loss": 4.1697, + "step": 919000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012740432624805723, + "loss": 4.1371, + "step": 919500 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001273104732443857, + "loss": 4.1391, + "step": 920000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012721662024071419, + "loss": 4.1325, + "step": 920500 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012712276723704264, + "loss": 4.133, + "step": 921000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012702891423337111, + "loss": 4.1564, + "step": 921500 + }, + { + "epoch": 1.73, + "learning_rate": 0.0001269350612296996, + "loss": 4.1415, + "step": 922000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012684120822602804, + "loss": 4.1426, + "step": 922500 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012674735522235652, + "loss": 4.1483, + "step": 923000 + }, + { + "epoch": 1.73, + "learning_rate": 0.000126653502218685, + "loss": 4.1296, + "step": 923500 + }, + { + "epoch": 1.73, + "learning_rate": 0.00012655964921501347, + "loss": 4.1324, + "step": 924000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012646579621134195, + "loss": 4.164, + "step": 924500 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001263719432076704, + "loss": 4.1306, + "step": 925000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012627809020399888, + "loss": 4.1526, + "step": 925500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012618423720032736, + "loss": 4.1502, + "step": 926000 + }, + { + "epoch": 1.74, + "learning_rate": 0.0001260903841966558, + "loss": 4.1489, + "step": 926500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012599653119298428, + "loss": 4.1272, + "step": 927000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012590267818931276, + "loss": 4.1326, + "step": 927500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012580882518564124, + "loss": 4.1592, + "step": 928000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012571497218196972, + "loss": 4.1425, + "step": 928500 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012562111917829817, + "loss": 4.1498, + "step": 929000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00012552726617462664, + "loss": 4.145, + "step": 929500 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012543341317095512, + "loss": 4.1473, + "step": 930000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012533956016728357, + "loss": 4.1499, + "step": 930500 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012524570716361205, + "loss": 4.1531, + "step": 931000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012515185415994053, + "loss": 4.1434, + "step": 931500 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012505800115626898, + "loss": 4.1528, + "step": 932000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012496414815259745, + "loss": 4.1428, + "step": 932500 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012487029514892593, + "loss": 4.1576, + "step": 933000 + }, + { + "epoch": 1.75, + "learning_rate": 0.0001247764421452544, + "loss": 4.1558, + "step": 933500 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012468258914158289, + "loss": 4.1521, + "step": 934000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00012458873613791136, + "loss": 4.1473, + "step": 934500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012449488313423981, + "loss": 4.1403, + "step": 935000 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001244010301305683, + "loss": 4.1466, + "step": 935500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012430717712689674, + "loss": 4.1479, + "step": 936000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012421332412322522, + "loss": 4.154, + "step": 936500 + }, + { + "epoch": 1.76, + "learning_rate": 0.0001241194711195537, + "loss": 4.1392, + "step": 937000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012402561811588217, + "loss": 4.1386, + "step": 937500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012393176511221065, + "loss": 4.1508, + "step": 938000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012383791210853913, + "loss": 4.1375, + "step": 938500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012374405910486758, + "loss": 4.157, + "step": 939000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012365020610119606, + "loss": 4.1461, + "step": 939500 + }, + { + "epoch": 1.76, + "learning_rate": 0.00012355635309752453, + "loss": 4.156, + "step": 940000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012346250009385298, + "loss": 4.1311, + "step": 940500 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012336864709018146, + "loss": 4.1446, + "step": 941000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012327479408650994, + "loss": 4.1588, + "step": 941500 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001231809410828384, + "loss": 4.1286, + "step": 942000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012308708807916687, + "loss": 4.1618, + "step": 942500 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012299323507549534, + "loss": 4.1153, + "step": 943000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012289938207182382, + "loss": 4.1735, + "step": 943500 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001228055290681523, + "loss": 4.1568, + "step": 944000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012271167606448075, + "loss": 4.1195, + "step": 944500 + }, + { + "epoch": 1.77, + "learning_rate": 0.00012261782306080923, + "loss": 4.1557, + "step": 945000 + }, + { + "epoch": 1.77, + "learning_rate": 0.0001225239700571377, + "loss": 4.1538, + "step": 945500 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012243011705346615, + "loss": 4.1532, + "step": 946000 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012233626404979463, + "loss": 4.1455, + "step": 946500 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001222424110461231, + "loss": 4.1525, + "step": 947000 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001221485580424516, + "loss": 4.1732, + "step": 947500 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012205470503878005, + "loss": 4.1564, + "step": 948000 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012196085203510851, + "loss": 4.1306, + "step": 948500 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012186699903143699, + "loss": 4.1487, + "step": 949000 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012177314602776547, + "loss": 4.1409, + "step": 949500 + }, + { + "epoch": 1.78, + "learning_rate": 0.00012167929302409393, + "loss": 4.1265, + "step": 950000 + }, + { + "epoch": 1.78, + "learning_rate": 0.0001215854400204224, + "loss": 4.1365, + "step": 950500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012149158701675087, + "loss": 4.1518, + "step": 951000 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012139773401307934, + "loss": 4.1391, + "step": 951500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012130388100940782, + "loss": 4.1373, + "step": 952000 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001212100280057363, + "loss": 4.1258, + "step": 952500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012111617500206476, + "loss": 4.1463, + "step": 953000 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012102232199839323, + "loss": 4.1345, + "step": 953500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012092846899472168, + "loss": 4.1452, + "step": 954000 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012083461599105016, + "loss": 4.1288, + "step": 954500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012074076298737864, + "loss": 4.1593, + "step": 955000 + }, + { + "epoch": 1.79, + "learning_rate": 0.0001206469099837071, + "loss": 4.1404, + "step": 955500 + }, + { + "epoch": 1.79, + "learning_rate": 0.00012055305698003558, + "loss": 4.1375, + "step": 956000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012045920397636406, + "loss": 4.1524, + "step": 956500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012036535097269251, + "loss": 4.1575, + "step": 957000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012027149796902099, + "loss": 4.1392, + "step": 957500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012017764496534946, + "loss": 4.1581, + "step": 958000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00012008379196167793, + "loss": 4.136, + "step": 958500 + }, + { + "epoch": 1.8, + "learning_rate": 0.0001199899389580064, + "loss": 4.137, + "step": 959000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011989608595433487, + "loss": 4.1558, + "step": 959500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011980223295066335, + "loss": 4.154, + "step": 960000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011970837994699182, + "loss": 4.1409, + "step": 960500 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011961452694332027, + "loss": 4.1322, + "step": 961000 + }, + { + "epoch": 1.8, + "learning_rate": 0.00011952067393964875, + "loss": 4.1709, + "step": 961500 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011942682093597723, + "loss": 4.1564, + "step": 962000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011933296793230569, + "loss": 4.1596, + "step": 962500 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011923911492863417, + "loss": 4.1356, + "step": 963000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011914526192496265, + "loss": 4.1275, + "step": 963500 + }, + { + "epoch": 1.81, + "learning_rate": 0.0001190514089212911, + "loss": 4.1554, + "step": 964000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011895755591761958, + "loss": 4.1327, + "step": 964500 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011886370291394805, + "loss": 4.1317, + "step": 965000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011876984991027652, + "loss": 4.1325, + "step": 965500 + }, + { + "epoch": 1.81, + "learning_rate": 0.000118675996906605, + "loss": 4.1607, + "step": 966000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00011858214390293346, + "loss": 4.1461, + "step": 966500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011848829089926194, + "loss": 4.1407, + "step": 967000 + }, + { + "epoch": 1.82, + "learning_rate": 0.0001183944378955904, + "loss": 4.1523, + "step": 967500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011830058489191886, + "loss": 4.1324, + "step": 968000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011820673188824734, + "loss": 4.1391, + "step": 968500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011811287888457582, + "loss": 4.1511, + "step": 969000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011801902588090428, + "loss": 4.1539, + "step": 969500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011792517287723276, + "loss": 4.1431, + "step": 970000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011783131987356124, + "loss": 4.1339, + "step": 970500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011773746686988969, + "loss": 4.145, + "step": 971000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011764361386621816, + "loss": 4.1357, + "step": 971500 + }, + { + "epoch": 1.82, + "learning_rate": 0.00011754976086254663, + "loss": 4.1461, + "step": 972000 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001174559078588751, + "loss": 4.1355, + "step": 972500 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011736205485520358, + "loss": 4.1309, + "step": 973000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011726820185153203, + "loss": 4.1298, + "step": 973500 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011717434884786051, + "loss": 4.1269, + "step": 974000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011708049584418899, + "loss": 4.1468, + "step": 974500 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011698664284051745, + "loss": 4.1476, + "step": 975000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011689278983684593, + "loss": 4.1415, + "step": 975500 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001167989368331744, + "loss": 4.1372, + "step": 976000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011670508382950287, + "loss": 4.1227, + "step": 976500 + }, + { + "epoch": 1.83, + "learning_rate": 0.00011661123082583135, + "loss": 4.1526, + "step": 977000 + }, + { + "epoch": 1.83, + "learning_rate": 0.0001165173778221598, + "loss": 4.1404, + "step": 977500 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011642352481848828, + "loss": 4.1542, + "step": 978000 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011632967181481675, + "loss": 4.1278, + "step": 978500 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011623581881114522, + "loss": 4.121, + "step": 979000 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001161419658074737, + "loss": 4.1483, + "step": 979500 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011604811280380217, + "loss": 4.135, + "step": 980000 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011595425980013062, + "loss": 4.1195, + "step": 980500 + }, + { + "epoch": 1.84, + "learning_rate": 0.0001158604067964591, + "loss": 4.1515, + "step": 981000 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011576655379278758, + "loss": 4.1385, + "step": 981500 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011567270078911604, + "loss": 4.1471, + "step": 982000 + }, + { + "epoch": 1.84, + "learning_rate": 0.00011557884778544452, + "loss": 4.1415, + "step": 982500 + }, + { + "epoch": 1.85, + "learning_rate": 0.000115484994781773, + "loss": 4.1453, + "step": 983000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011539114177810146, + "loss": 4.1373, + "step": 983500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011529728877442992, + "loss": 4.1174, + "step": 984000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011520343577075839, + "loss": 4.1484, + "step": 984500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011510958276708686, + "loss": 4.1352, + "step": 985000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011501572976341534, + "loss": 4.1491, + "step": 985500 + }, + { + "epoch": 1.85, + "learning_rate": 0.0001149218767597438, + "loss": 4.1569, + "step": 986000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011482802375607228, + "loss": 4.1185, + "step": 986500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011473417075240076, + "loss": 4.1379, + "step": 987000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011464031774872921, + "loss": 4.1233, + "step": 987500 + }, + { + "epoch": 1.85, + "learning_rate": 0.00011454646474505769, + "loss": 4.124, + "step": 988000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011445261174138617, + "loss": 4.1467, + "step": 988500 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011435875873771463, + "loss": 4.159, + "step": 989000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011426490573404311, + "loss": 4.138, + "step": 989500 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011417105273037157, + "loss": 4.1624, + "step": 990000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011407719972670003, + "loss": 4.1434, + "step": 990500 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011398334672302851, + "loss": 4.1239, + "step": 991000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011388949371935698, + "loss": 4.1452, + "step": 991500 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011379564071568545, + "loss": 4.1212, + "step": 992000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011370178771201393, + "loss": 4.1346, + "step": 992500 + }, + { + "epoch": 1.86, + "learning_rate": 0.0001136079347083424, + "loss": 4.1433, + "step": 993000 + }, + { + "epoch": 1.86, + "learning_rate": 0.00011351408170467087, + "loss": 4.1496, + "step": 993500 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011342022870099935, + "loss": 4.1362, + "step": 994000 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001133263756973278, + "loss": 4.1491, + "step": 994500 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011323252269365628, + "loss": 4.1326, + "step": 995000 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011313866968998475, + "loss": 4.1242, + "step": 995500 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011304481668631322, + "loss": 4.1487, + "step": 996000 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001129509636826417, + "loss": 4.1224, + "step": 996500 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011285711067897015, + "loss": 4.1172, + "step": 997000 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011276325767529862, + "loss": 4.132, + "step": 997500 + }, + { + "epoch": 1.87, + "learning_rate": 0.0001126694046716271, + "loss": 4.1388, + "step": 998000 + }, + { + "epoch": 1.87, + "learning_rate": 0.00011257555166795557, + "loss": 4.1333, + "step": 998500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011248169866428404, + "loss": 4.142, + "step": 999000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011238784566061252, + "loss": 4.1385, + "step": 999500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011229399265694098, + "loss": 4.1118, + "step": 1000000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011220013965326946, + "loss": 4.1553, + "step": 1000500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011210628664959793, + "loss": 4.1319, + "step": 1001000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011201243364592639, + "loss": 4.1237, + "step": 1001500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011191858064225487, + "loss": 4.1383, + "step": 1002000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011182472763858333, + "loss": 4.1455, + "step": 1002500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011173087463491181, + "loss": 4.1282, + "step": 1003000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011163702163124029, + "loss": 4.1575, + "step": 1003500 + }, + { + "epoch": 1.88, + "learning_rate": 0.00011154316862756874, + "loss": 4.1647, + "step": 1004000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011144931562389721, + "loss": 4.1119, + "step": 1004500 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011135546262022569, + "loss": 4.1236, + "step": 1005000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011126160961655415, + "loss": 4.1447, + "step": 1005500 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011116775661288263, + "loss": 4.1236, + "step": 1006000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011107390360921111, + "loss": 4.1291, + "step": 1006500 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011098005060553957, + "loss": 4.1438, + "step": 1007000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011088619760186804, + "loss": 4.1411, + "step": 1007500 + }, + { + "epoch": 1.89, + "learning_rate": 0.0001107923445981965, + "loss": 4.1428, + "step": 1008000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011069849159452498, + "loss": 4.1509, + "step": 1008500 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011060463859085346, + "loss": 4.1412, + "step": 1009000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00011051078558718192, + "loss": 4.1482, + "step": 1009500 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001104169325835104, + "loss": 4.1383, + "step": 1010000 + }, + { + "epoch": 1.9, + "learning_rate": 0.00011032307957983887, + "loss": 4.1465, + "step": 1010500 + }, + { + "epoch": 1.9, + "learning_rate": 0.00011022922657616732, + "loss": 4.1444, + "step": 1011000 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001101353735724958, + "loss": 4.1404, + "step": 1011500 + }, + { + "epoch": 1.9, + "learning_rate": 0.00011004152056882428, + "loss": 4.1512, + "step": 1012000 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010994766756515274, + "loss": 4.1348, + "step": 1012500 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010985381456148122, + "loss": 4.1288, + "step": 1013000 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001097599615578097, + "loss": 4.1211, + "step": 1013500 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010966610855413815, + "loss": 4.1403, + "step": 1014000 + }, + { + "epoch": 1.9, + "learning_rate": 0.00010957225555046663, + "loss": 4.1321, + "step": 1014500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010947840254679509, + "loss": 4.1273, + "step": 1015000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010938454954312357, + "loss": 4.1445, + "step": 1015500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010929069653945204, + "loss": 4.15, + "step": 1016000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010919684353578051, + "loss": 4.128, + "step": 1016500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010910299053210899, + "loss": 4.1393, + "step": 1017000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010900913752843746, + "loss": 4.1423, + "step": 1017500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010891528452476591, + "loss": 4.1301, + "step": 1018000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010882143152109439, + "loss": 4.1201, + "step": 1018500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010872757851742287, + "loss": 4.1287, + "step": 1019000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010863372551375133, + "loss": 4.1387, + "step": 1019500 + }, + { + "epoch": 1.91, + "learning_rate": 0.00010853987251007981, + "loss": 4.1147, + "step": 1020000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010844601950640826, + "loss": 4.1294, + "step": 1020500 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010835216650273674, + "loss": 4.13, + "step": 1021000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010825831349906521, + "loss": 4.1342, + "step": 1021500 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010816446049539368, + "loss": 4.1351, + "step": 1022000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010807060749172216, + "loss": 4.1385, + "step": 1022500 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010797675448805063, + "loss": 4.1422, + "step": 1023000 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001078829014843791, + "loss": 4.1245, + "step": 1023500 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010778904848070757, + "loss": 4.1172, + "step": 1024000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010769519547703604, + "loss": 4.1479, + "step": 1024500 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001076013424733645, + "loss": 4.1247, + "step": 1025000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00010750748946969298, + "loss": 4.1183, + "step": 1025500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010741363646602144, + "loss": 4.1272, + "step": 1026000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010731978346234992, + "loss": 4.1143, + "step": 1026500 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001072259304586784, + "loss": 4.1418, + "step": 1027000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010713207745500685, + "loss": 4.1353, + "step": 1027500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010703822445133533, + "loss": 4.1316, + "step": 1028000 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001069443714476638, + "loss": 4.1255, + "step": 1028500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010685051844399227, + "loss": 4.119, + "step": 1029000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010675666544032074, + "loss": 4.1291, + "step": 1029500 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010666281243664922, + "loss": 4.1499, + "step": 1030000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00010656895943297769, + "loss": 4.1192, + "step": 1030500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010647510642930615, + "loss": 4.1316, + "step": 1031000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010638125342563463, + "loss": 4.1289, + "step": 1031500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010628740042196309, + "loss": 4.1488, + "step": 1032000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010619354741829157, + "loss": 4.1376, + "step": 1032500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010609969441462003, + "loss": 4.1437, + "step": 1033000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010600584141094851, + "loss": 4.134, + "step": 1033500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010591198840727699, + "loss": 4.1261, + "step": 1034000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010581813540360544, + "loss": 4.1293, + "step": 1034500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010572428239993392, + "loss": 4.1303, + "step": 1035000 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010563042939626239, + "loss": 4.1335, + "step": 1035500 + }, + { + "epoch": 1.94, + "learning_rate": 0.00010553657639259086, + "loss": 4.1341, + "step": 1036000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010544272338891933, + "loss": 4.1472, + "step": 1036500 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010534887038524781, + "loss": 4.1489, + "step": 1037000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010525501738157626, + "loss": 4.1246, + "step": 1037500 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010516116437790474, + "loss": 4.1247, + "step": 1038000 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001050673113742332, + "loss": 4.133, + "step": 1038500 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010497345837056168, + "loss": 4.1452, + "step": 1039000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010487960536689016, + "loss": 4.1345, + "step": 1039500 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010478575236321862, + "loss": 4.1238, + "step": 1040000 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001046918993595471, + "loss": 4.1177, + "step": 1040500 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010459804635587558, + "loss": 4.1273, + "step": 1041000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00010450419335220403, + "loss": 4.1347, + "step": 1041500 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001044103403485325, + "loss": 4.1266, + "step": 1042000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010431648734486098, + "loss": 4.1178, + "step": 1042500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010422263434118945, + "loss": 4.1322, + "step": 1043000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010412878133751792, + "loss": 4.147, + "step": 1043500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010403492833384637, + "loss": 4.1121, + "step": 1044000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010394107533017485, + "loss": 4.1315, + "step": 1044500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010384722232650333, + "loss": 4.1236, + "step": 1045000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010375336932283179, + "loss": 4.1159, + "step": 1045500 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010365951631916027, + "loss": 4.1236, + "step": 1046000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00010356566331548875, + "loss": 4.1284, + "step": 1046500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010347181031181721, + "loss": 4.135, + "step": 1047000 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010337795730814569, + "loss": 4.1498, + "step": 1047500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010328410430447415, + "loss": 4.1342, + "step": 1048000 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010319025130080262, + "loss": 4.1236, + "step": 1048500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010309639829713109, + "loss": 4.1221, + "step": 1049000 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010300254529345957, + "loss": 4.1553, + "step": 1049500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010290869228978803, + "loss": 4.1391, + "step": 1050000 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010281483928611651, + "loss": 4.1206, + "step": 1050500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010272098628244496, + "loss": 4.1113, + "step": 1051000 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010262713327877344, + "loss": 4.1332, + "step": 1051500 + }, + { + "epoch": 1.97, + "learning_rate": 0.00010253328027510192, + "loss": 4.1186, + "step": 1052000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010243942727143038, + "loss": 4.1325, + "step": 1052500 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010234557426775886, + "loss": 4.1181, + "step": 1053000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010225172126408734, + "loss": 4.1301, + "step": 1053500 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001021578682604158, + "loss": 4.1245, + "step": 1054000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010206401525674426, + "loss": 4.128, + "step": 1054500 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010197016225307274, + "loss": 4.1392, + "step": 1055000 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001018763092494012, + "loss": 4.1226, + "step": 1055500 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010178245624572968, + "loss": 4.1193, + "step": 1056000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010168860324205815, + "loss": 4.126, + "step": 1056500 + }, + { + "epoch": 1.98, + "learning_rate": 0.00010159475023838662, + "loss": 4.1291, + "step": 1057000 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001015008972347151, + "loss": 4.1259, + "step": 1057500 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010140704423104355, + "loss": 4.1266, + "step": 1058000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010131319122737203, + "loss": 4.1414, + "step": 1058500 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001012193382237005, + "loss": 4.1284, + "step": 1059000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010112548522002897, + "loss": 4.1388, + "step": 1059500 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010103163221635745, + "loss": 4.1371, + "step": 1060000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010093777921268592, + "loss": 4.1161, + "step": 1060500 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010084392620901437, + "loss": 4.128, + "step": 1061000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010075007320534285, + "loss": 4.1318, + "step": 1061500 + }, + { + "epoch": 1.99, + "learning_rate": 0.00010065622020167133, + "loss": 4.113, + "step": 1062000 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001005623671979998, + "loss": 4.1257, + "step": 1062500 + }, + { + "epoch": 2.0, + "learning_rate": 0.00010046851419432827, + "loss": 4.1213, + "step": 1063000 + }, + { + "epoch": 2.0, + "learning_rate": 0.00010037466119065673, + "loss": 4.1073, + "step": 1063500 + }, + { + "epoch": 2.0, + "learning_rate": 0.00010028080818698521, + "loss": 4.1265, + "step": 1064000 + }, + { + "epoch": 2.0, + "learning_rate": 0.00010018695518331369, + "loss": 4.115, + "step": 1064500 + }, + { + "epoch": 2.0, + "learning_rate": 0.00010009310217964214, + "loss": 4.1257, + "step": 1065000 + }, + { + "epoch": 2.0, + "learning_rate": 9.999924917597062e-05, + "loss": 4.1176, + "step": 1065500 + }, + { + "epoch": 2.0, + "learning_rate": 9.99053961722991e-05, + "loss": 4.1204, + "step": 1066000 + }, + { + "epoch": 2.0, + "learning_rate": 9.981154316862756e-05, + "loss": 4.1265, + "step": 1066500 + }, + { + "epoch": 2.0, + "learning_rate": 9.971769016495604e-05, + "loss": 4.1058, + "step": 1067000 + }, + { + "epoch": 2.0, + "learning_rate": 9.962383716128451e-05, + "loss": 4.105, + "step": 1067500 + }, + { + "epoch": 2.0, + "learning_rate": 9.952998415761296e-05, + "loss": 4.1223, + "step": 1068000 + }, + { + "epoch": 2.01, + "learning_rate": 9.943613115394144e-05, + "loss": 4.086, + "step": 1068500 + }, + { + "epoch": 2.01, + "learning_rate": 9.93422781502699e-05, + "loss": 4.1134, + "step": 1069000 + }, + { + "epoch": 2.01, + "learning_rate": 9.924842514659838e-05, + "loss": 4.1087, + "step": 1069500 + }, + { + "epoch": 2.01, + "learning_rate": 9.915457214292686e-05, + "loss": 4.1157, + "step": 1070000 + }, + { + "epoch": 2.01, + "learning_rate": 9.906071913925532e-05, + "loss": 4.1426, + "step": 1070500 + }, + { + "epoch": 2.01, + "learning_rate": 9.896686613558379e-05, + "loss": 4.1268, + "step": 1071000 + }, + { + "epoch": 2.01, + "learning_rate": 9.887301313191227e-05, + "loss": 4.1173, + "step": 1071500 + }, + { + "epoch": 2.01, + "learning_rate": 9.877916012824073e-05, + "loss": 4.1073, + "step": 1072000 + }, + { + "epoch": 2.01, + "learning_rate": 9.86853071245692e-05, + "loss": 4.1115, + "step": 1072500 + }, + { + "epoch": 2.01, + "learning_rate": 9.859145412089768e-05, + "loss": 4.1194, + "step": 1073000 + }, + { + "epoch": 2.02, + "learning_rate": 9.849760111722615e-05, + "loss": 4.1053, + "step": 1073500 + }, + { + "epoch": 2.02, + "learning_rate": 9.840374811355463e-05, + "loss": 4.0859, + "step": 1074000 + }, + { + "epoch": 2.02, + "learning_rate": 9.830989510988308e-05, + "loss": 4.1224, + "step": 1074500 + }, + { + "epoch": 2.02, + "learning_rate": 9.821604210621155e-05, + "loss": 4.123, + "step": 1075000 + }, + { + "epoch": 2.02, + "learning_rate": 9.812218910254003e-05, + "loss": 4.1193, + "step": 1075500 + }, + { + "epoch": 2.02, + "learning_rate": 9.80283360988685e-05, + "loss": 4.1113, + "step": 1076000 + }, + { + "epoch": 2.02, + "learning_rate": 9.793448309519697e-05, + "loss": 4.1098, + "step": 1076500 + }, + { + "epoch": 2.02, + "learning_rate": 9.784063009152545e-05, + "loss": 4.101, + "step": 1077000 + }, + { + "epoch": 2.02, + "learning_rate": 9.77467770878539e-05, + "loss": 4.1108, + "step": 1077500 + }, + { + "epoch": 2.02, + "learning_rate": 9.765292408418238e-05, + "loss": 4.1113, + "step": 1078000 + }, + { + "epoch": 2.02, + "learning_rate": 9.755907108051085e-05, + "loss": 4.1069, + "step": 1078500 + }, + { + "epoch": 2.03, + "learning_rate": 9.746521807683932e-05, + "loss": 4.107, + "step": 1079000 + }, + { + "epoch": 2.03, + "learning_rate": 9.73713650731678e-05, + "loss": 4.1067, + "step": 1079500 + }, + { + "epoch": 2.03, + "learning_rate": 9.727751206949627e-05, + "loss": 4.1144, + "step": 1080000 + }, + { + "epoch": 2.03, + "learning_rate": 9.718365906582474e-05, + "loss": 4.1251, + "step": 1080500 + }, + { + "epoch": 2.03, + "learning_rate": 9.708980606215321e-05, + "loss": 4.1126, + "step": 1081000 + }, + { + "epoch": 2.03, + "learning_rate": 9.699595305848166e-05, + "loss": 4.124, + "step": 1081500 + }, + { + "epoch": 2.03, + "learning_rate": 9.690210005481014e-05, + "loss": 4.1106, + "step": 1082000 + }, + { + "epoch": 2.03, + "learning_rate": 9.680824705113862e-05, + "loss": 4.1016, + "step": 1082500 + }, + { + "epoch": 2.03, + "learning_rate": 9.671439404746708e-05, + "loss": 4.085, + "step": 1083000 + }, + { + "epoch": 2.03, + "learning_rate": 9.662054104379556e-05, + "loss": 4.0947, + "step": 1083500 + }, + { + "epoch": 2.03, + "learning_rate": 9.652668804012404e-05, + "loss": 4.1078, + "step": 1084000 + }, + { + "epoch": 2.04, + "learning_rate": 9.643283503645249e-05, + "loss": 4.1102, + "step": 1084500 + }, + { + "epoch": 2.04, + "learning_rate": 9.633898203278097e-05, + "loss": 4.1315, + "step": 1085000 + }, + { + "epoch": 2.04, + "learning_rate": 9.624512902910944e-05, + "loss": 4.1096, + "step": 1085500 + }, + { + "epoch": 2.04, + "learning_rate": 9.615127602543791e-05, + "loss": 4.1068, + "step": 1086000 + }, + { + "epoch": 2.04, + "learning_rate": 9.605742302176638e-05, + "loss": 4.1076, + "step": 1086500 + }, + { + "epoch": 2.04, + "learning_rate": 9.596357001809485e-05, + "loss": 4.0988, + "step": 1087000 + }, + { + "epoch": 2.04, + "learning_rate": 9.586971701442333e-05, + "loss": 4.1168, + "step": 1087500 + }, + { + "epoch": 2.04, + "learning_rate": 9.577586401075179e-05, + "loss": 4.1267, + "step": 1088000 + }, + { + "epoch": 2.04, + "learning_rate": 9.568201100708025e-05, + "loss": 4.105, + "step": 1088500 + }, + { + "epoch": 2.04, + "learning_rate": 9.558815800340873e-05, + "loss": 4.1078, + "step": 1089000 + }, + { + "epoch": 2.05, + "learning_rate": 9.549430499973721e-05, + "loss": 4.1318, + "step": 1089500 + }, + { + "epoch": 2.05, + "learning_rate": 9.540045199606567e-05, + "loss": 4.1029, + "step": 1090000 + }, + { + "epoch": 2.05, + "learning_rate": 9.530659899239415e-05, + "loss": 4.1197, + "step": 1090500 + }, + { + "epoch": 2.05, + "learning_rate": 9.521274598872263e-05, + "loss": 4.1039, + "step": 1091000 + }, + { + "epoch": 2.05, + "learning_rate": 9.511889298505108e-05, + "loss": 4.1121, + "step": 1091500 + }, + { + "epoch": 2.05, + "learning_rate": 9.502503998137955e-05, + "loss": 4.1012, + "step": 1092000 + }, + { + "epoch": 2.05, + "learning_rate": 9.493118697770802e-05, + "loss": 4.1103, + "step": 1092500 + }, + { + "epoch": 2.05, + "learning_rate": 9.48373339740365e-05, + "loss": 4.0982, + "step": 1093000 + }, + { + "epoch": 2.05, + "learning_rate": 9.474348097036497e-05, + "loss": 4.1139, + "step": 1093500 + }, + { + "epoch": 2.05, + "learning_rate": 9.464962796669344e-05, + "loss": 4.1062, + "step": 1094000 + }, + { + "epoch": 2.05, + "learning_rate": 9.45557749630219e-05, + "loss": 4.1152, + "step": 1094500 + }, + { + "epoch": 2.06, + "learning_rate": 9.446192195935038e-05, + "loss": 4.11, + "step": 1095000 + }, + { + "epoch": 2.06, + "learning_rate": 9.436806895567884e-05, + "loss": 4.1151, + "step": 1095500 + }, + { + "epoch": 2.06, + "learning_rate": 9.427421595200732e-05, + "loss": 4.1043, + "step": 1096000 + }, + { + "epoch": 2.06, + "learning_rate": 9.41803629483358e-05, + "loss": 4.1052, + "step": 1096500 + }, + { + "epoch": 2.06, + "learning_rate": 9.408650994466426e-05, + "loss": 4.1109, + "step": 1097000 + }, + { + "epoch": 2.06, + "learning_rate": 9.399265694099274e-05, + "loss": 4.0984, + "step": 1097500 + }, + { + "epoch": 2.06, + "learning_rate": 9.389880393732122e-05, + "loss": 4.1029, + "step": 1098000 + }, + { + "epoch": 2.06, + "learning_rate": 9.380495093364967e-05, + "loss": 4.1077, + "step": 1098500 + }, + { + "epoch": 2.06, + "learning_rate": 9.371109792997814e-05, + "loss": 4.1014, + "step": 1099000 + }, + { + "epoch": 2.06, + "learning_rate": 9.361724492630661e-05, + "loss": 4.1199, + "step": 1099500 + }, + { + "epoch": 2.06, + "learning_rate": 9.352339192263508e-05, + "loss": 4.1035, + "step": 1100000 + }, + { + "epoch": 2.07, + "learning_rate": 9.342953891896356e-05, + "loss": 4.0974, + "step": 1100500 + }, + { + "epoch": 2.07, + "learning_rate": 9.333568591529201e-05, + "loss": 4.1303, + "step": 1101000 + }, + { + "epoch": 2.07, + "learning_rate": 9.324183291162049e-05, + "loss": 4.1143, + "step": 1101500 + }, + { + "epoch": 2.07, + "learning_rate": 9.314797990794897e-05, + "loss": 4.1117, + "step": 1102000 + }, + { + "epoch": 2.07, + "learning_rate": 9.305412690427743e-05, + "loss": 4.0956, + "step": 1102500 + }, + { + "epoch": 2.07, + "learning_rate": 9.296027390060591e-05, + "loss": 4.1063, + "step": 1103000 + }, + { + "epoch": 2.07, + "learning_rate": 9.286642089693439e-05, + "loss": 4.1058, + "step": 1103500 + }, + { + "epoch": 2.07, + "learning_rate": 9.277256789326285e-05, + "loss": 4.1128, + "step": 1104000 + }, + { + "epoch": 2.07, + "learning_rate": 9.267871488959133e-05, + "loss": 4.1244, + "step": 1104500 + }, + { + "epoch": 2.07, + "learning_rate": 9.258486188591978e-05, + "loss": 4.12, + "step": 1105000 + }, + { + "epoch": 2.08, + "learning_rate": 9.249100888224826e-05, + "loss": 4.1167, + "step": 1105500 + }, + { + "epoch": 2.08, + "learning_rate": 9.239715587857673e-05, + "loss": 4.1086, + "step": 1106000 + }, + { + "epoch": 2.08, + "learning_rate": 9.23033028749052e-05, + "loss": 4.1212, + "step": 1106500 + }, + { + "epoch": 2.08, + "learning_rate": 9.220944987123367e-05, + "loss": 4.1176, + "step": 1107000 + }, + { + "epoch": 2.08, + "learning_rate": 9.211559686756215e-05, + "loss": 4.1025, + "step": 1107500 + }, + { + "epoch": 2.08, + "learning_rate": 9.20217438638906e-05, + "loss": 4.104, + "step": 1108000 + }, + { + "epoch": 2.08, + "learning_rate": 9.192789086021908e-05, + "loss": 4.1013, + "step": 1108500 + }, + { + "epoch": 2.08, + "learning_rate": 9.183403785654756e-05, + "loss": 4.1294, + "step": 1109000 + }, + { + "epoch": 2.08, + "learning_rate": 9.174018485287602e-05, + "loss": 4.1124, + "step": 1109500 + }, + { + "epoch": 2.08, + "learning_rate": 9.16463318492045e-05, + "loss": 4.1205, + "step": 1110000 + }, + { + "epoch": 2.08, + "learning_rate": 9.155247884553296e-05, + "loss": 4.1046, + "step": 1110500 + }, + { + "epoch": 2.09, + "learning_rate": 9.145862584186144e-05, + "loss": 4.1032, + "step": 1111000 + }, + { + "epoch": 2.09, + "learning_rate": 9.13647728381899e-05, + "loss": 4.0988, + "step": 1111500 + }, + { + "epoch": 2.09, + "learning_rate": 9.127091983451837e-05, + "loss": 4.129, + "step": 1112000 + }, + { + "epoch": 2.09, + "learning_rate": 9.117706683084684e-05, + "loss": 4.1121, + "step": 1112500 + }, + { + "epoch": 2.09, + "learning_rate": 9.108321382717532e-05, + "loss": 4.1229, + "step": 1113000 + }, + { + "epoch": 2.09, + "learning_rate": 9.098936082350379e-05, + "loss": 4.1127, + "step": 1113500 + }, + { + "epoch": 2.09, + "learning_rate": 9.089550781983226e-05, + "loss": 4.1041, + "step": 1114000 + }, + { + "epoch": 2.09, + "learning_rate": 9.080165481616074e-05, + "loss": 4.1169, + "step": 1114500 + }, + { + "epoch": 2.09, + "learning_rate": 9.070780181248919e-05, + "loss": 4.1025, + "step": 1115000 + }, + { + "epoch": 2.09, + "learning_rate": 9.061394880881767e-05, + "loss": 4.0978, + "step": 1115500 + }, + { + "epoch": 2.09, + "learning_rate": 9.052009580514615e-05, + "loss": 4.0989, + "step": 1116000 + }, + { + "epoch": 2.1, + "learning_rate": 9.042624280147461e-05, + "loss": 4.1119, + "step": 1116500 + }, + { + "epoch": 2.1, + "learning_rate": 9.033238979780309e-05, + "loss": 4.1034, + "step": 1117000 + }, + { + "epoch": 2.1, + "learning_rate": 9.023853679413155e-05, + "loss": 4.0963, + "step": 1117500 + }, + { + "epoch": 2.1, + "learning_rate": 9.014468379046001e-05, + "loss": 4.1093, + "step": 1118000 + }, + { + "epoch": 2.1, + "learning_rate": 9.005083078678849e-05, + "loss": 4.0885, + "step": 1118500 + }, + { + "epoch": 2.1, + "learning_rate": 8.995697778311696e-05, + "loss": 4.0972, + "step": 1119000 + }, + { + "epoch": 2.1, + "learning_rate": 8.986312477944543e-05, + "loss": 4.0909, + "step": 1119500 + }, + { + "epoch": 2.1, + "learning_rate": 8.976927177577391e-05, + "loss": 4.1121, + "step": 1120000 + }, + { + "epoch": 2.1, + "learning_rate": 8.967541877210237e-05, + "loss": 4.0916, + "step": 1120500 + }, + { + "epoch": 2.1, + "learning_rate": 8.958156576843085e-05, + "loss": 4.0999, + "step": 1121000 + }, + { + "epoch": 2.11, + "learning_rate": 8.948771276475933e-05, + "loss": 4.1013, + "step": 1121500 + }, + { + "epoch": 2.11, + "learning_rate": 8.939385976108778e-05, + "loss": 4.1082, + "step": 1122000 + }, + { + "epoch": 2.11, + "learning_rate": 8.930000675741626e-05, + "loss": 4.1003, + "step": 1122500 + }, + { + "epoch": 2.11, + "learning_rate": 8.920615375374472e-05, + "loss": 4.1027, + "step": 1123000 + }, + { + "epoch": 2.11, + "learning_rate": 8.91123007500732e-05, + "loss": 4.1203, + "step": 1123500 + }, + { + "epoch": 2.11, + "learning_rate": 8.901844774640168e-05, + "loss": 4.1085, + "step": 1124000 + }, + { + "epoch": 2.11, + "learning_rate": 8.892459474273013e-05, + "loss": 4.1124, + "step": 1124500 + }, + { + "epoch": 2.11, + "learning_rate": 8.88307417390586e-05, + "loss": 4.1267, + "step": 1125000 + }, + { + "epoch": 2.11, + "learning_rate": 8.873688873538708e-05, + "loss": 4.1079, + "step": 1125500 + }, + { + "epoch": 2.11, + "learning_rate": 8.864303573171554e-05, + "loss": 4.1296, + "step": 1126000 + }, + { + "epoch": 2.11, + "learning_rate": 8.854918272804402e-05, + "loss": 4.1093, + "step": 1126500 + }, + { + "epoch": 2.12, + "learning_rate": 8.84553297243725e-05, + "loss": 4.1036, + "step": 1127000 + }, + { + "epoch": 2.12, + "learning_rate": 8.836147672070096e-05, + "loss": 4.1202, + "step": 1127500 + }, + { + "epoch": 2.12, + "learning_rate": 8.826762371702944e-05, + "loss": 4.1145, + "step": 1128000 + }, + { + "epoch": 2.12, + "learning_rate": 8.81737707133579e-05, + "loss": 4.1051, + "step": 1128500 + }, + { + "epoch": 2.12, + "learning_rate": 8.807991770968637e-05, + "loss": 4.0903, + "step": 1129000 + }, + { + "epoch": 2.12, + "learning_rate": 8.798606470601485e-05, + "loss": 4.1086, + "step": 1129500 + }, + { + "epoch": 2.12, + "learning_rate": 8.789221170234331e-05, + "loss": 4.1134, + "step": 1130000 + }, + { + "epoch": 2.12, + "learning_rate": 8.779835869867179e-05, + "loss": 4.1093, + "step": 1130500 + }, + { + "epoch": 2.12, + "learning_rate": 8.770450569500026e-05, + "loss": 4.0988, + "step": 1131000 + }, + { + "epoch": 2.12, + "learning_rate": 8.761065269132871e-05, + "loss": 4.1104, + "step": 1131500 + }, + { + "epoch": 2.12, + "learning_rate": 8.751679968765719e-05, + "loss": 4.1203, + "step": 1132000 + }, + { + "epoch": 2.13, + "learning_rate": 8.742294668398567e-05, + "loss": 4.0959, + "step": 1132500 + }, + { + "epoch": 2.13, + "learning_rate": 8.732909368031413e-05, + "loss": 4.1168, + "step": 1133000 + }, + { + "epoch": 2.13, + "learning_rate": 8.723524067664261e-05, + "loss": 4.1101, + "step": 1133500 + }, + { + "epoch": 2.13, + "learning_rate": 8.714138767297109e-05, + "loss": 4.1177, + "step": 1134000 + }, + { + "epoch": 2.13, + "learning_rate": 8.704753466929955e-05, + "loss": 4.0992, + "step": 1134500 + }, + { + "epoch": 2.13, + "learning_rate": 8.695368166562802e-05, + "loss": 4.1014, + "step": 1135000 + }, + { + "epoch": 2.13, + "learning_rate": 8.685982866195648e-05, + "loss": 4.096, + "step": 1135500 + }, + { + "epoch": 2.13, + "learning_rate": 8.676597565828496e-05, + "loss": 4.0882, + "step": 1136000 + }, + { + "epoch": 2.13, + "learning_rate": 8.667212265461343e-05, + "loss": 4.1013, + "step": 1136500 + }, + { + "epoch": 2.13, + "learning_rate": 8.65782696509419e-05, + "loss": 4.1161, + "step": 1137000 + }, + { + "epoch": 2.14, + "learning_rate": 8.648441664727038e-05, + "loss": 4.0952, + "step": 1137500 + }, + { + "epoch": 2.14, + "learning_rate": 8.639056364359885e-05, + "loss": 4.1192, + "step": 1138000 + }, + { + "epoch": 2.14, + "learning_rate": 8.62967106399273e-05, + "loss": 4.1057, + "step": 1138500 + }, + { + "epoch": 2.14, + "learning_rate": 8.620285763625578e-05, + "loss": 4.0951, + "step": 1139000 + }, + { + "epoch": 2.14, + "learning_rate": 8.610900463258426e-05, + "loss": 4.1038, + "step": 1139500 + }, + { + "epoch": 2.14, + "learning_rate": 8.601515162891272e-05, + "loss": 4.0837, + "step": 1140000 + }, + { + "epoch": 2.14, + "learning_rate": 8.59212986252412e-05, + "loss": 4.1054, + "step": 1140500 + }, + { + "epoch": 2.14, + "learning_rate": 8.582744562156966e-05, + "loss": 4.1175, + "step": 1141000 + }, + { + "epoch": 2.14, + "learning_rate": 8.573359261789813e-05, + "loss": 4.0941, + "step": 1141500 + }, + { + "epoch": 2.14, + "learning_rate": 8.56397396142266e-05, + "loss": 4.0994, + "step": 1142000 + }, + { + "epoch": 2.14, + "learning_rate": 8.554588661055507e-05, + "loss": 4.1103, + "step": 1142500 + }, + { + "epoch": 2.15, + "learning_rate": 8.545203360688355e-05, + "loss": 4.1026, + "step": 1143000 + }, + { + "epoch": 2.15, + "learning_rate": 8.535818060321202e-05, + "loss": 4.1127, + "step": 1143500 + }, + { + "epoch": 2.15, + "learning_rate": 8.526432759954049e-05, + "loss": 4.1059, + "step": 1144000 + }, + { + "epoch": 2.15, + "learning_rate": 8.517047459586897e-05, + "loss": 4.1182, + "step": 1144500 + }, + { + "epoch": 2.15, + "learning_rate": 8.507662159219744e-05, + "loss": 4.1071, + "step": 1145000 + }, + { + "epoch": 2.15, + "learning_rate": 8.498276858852589e-05, + "loss": 4.1002, + "step": 1145500 + }, + { + "epoch": 2.15, + "learning_rate": 8.488891558485437e-05, + "loss": 4.1182, + "step": 1146000 + }, + { + "epoch": 2.15, + "learning_rate": 8.479506258118285e-05, + "loss": 4.1034, + "step": 1146500 + }, + { + "epoch": 2.15, + "learning_rate": 8.470120957751131e-05, + "loss": 4.1135, + "step": 1147000 + }, + { + "epoch": 2.15, + "learning_rate": 8.460735657383979e-05, + "loss": 4.1225, + "step": 1147500 + }, + { + "epoch": 2.15, + "learning_rate": 8.451350357016824e-05, + "loss": 4.1207, + "step": 1148000 + }, + { + "epoch": 2.16, + "learning_rate": 8.441965056649672e-05, + "loss": 4.1078, + "step": 1148500 + }, + { + "epoch": 2.16, + "learning_rate": 8.43257975628252e-05, + "loss": 4.0904, + "step": 1149000 + }, + { + "epoch": 2.16, + "learning_rate": 8.423194455915366e-05, + "loss": 4.1124, + "step": 1149500 + }, + { + "epoch": 2.16, + "learning_rate": 8.413809155548214e-05, + "loss": 4.1007, + "step": 1150000 + }, + { + "epoch": 2.16, + "learning_rate": 8.404423855181061e-05, + "loss": 4.0937, + "step": 1150500 + }, + { + "epoch": 2.16, + "learning_rate": 8.395038554813908e-05, + "loss": 4.1043, + "step": 1151000 + }, + { + "epoch": 2.16, + "learning_rate": 8.385653254446755e-05, + "loss": 4.1088, + "step": 1151500 + }, + { + "epoch": 2.16, + "learning_rate": 8.376267954079602e-05, + "loss": 4.1029, + "step": 1152000 + }, + { + "epoch": 2.16, + "learning_rate": 8.366882653712448e-05, + "loss": 4.1002, + "step": 1152500 + }, + { + "epoch": 2.16, + "learning_rate": 8.357497353345296e-05, + "loss": 4.111, + "step": 1153000 + }, + { + "epoch": 2.17, + "learning_rate": 8.348112052978142e-05, + "loss": 4.1253, + "step": 1153500 + }, + { + "epoch": 2.17, + "learning_rate": 8.33872675261099e-05, + "loss": 4.1056, + "step": 1154000 + }, + { + "epoch": 2.17, + "learning_rate": 8.329341452243838e-05, + "loss": 4.0936, + "step": 1154500 + }, + { + "epoch": 2.17, + "learning_rate": 8.319956151876683e-05, + "loss": 4.1043, + "step": 1155000 + }, + { + "epoch": 2.17, + "learning_rate": 8.31057085150953e-05, + "loss": 4.0881, + "step": 1155500 + }, + { + "epoch": 2.17, + "learning_rate": 8.301185551142378e-05, + "loss": 4.0945, + "step": 1156000 + }, + { + "epoch": 2.17, + "learning_rate": 8.291800250775225e-05, + "loss": 4.1038, + "step": 1156500 + }, + { + "epoch": 2.17, + "learning_rate": 8.282414950408072e-05, + "loss": 4.1109, + "step": 1157000 + }, + { + "epoch": 2.17, + "learning_rate": 8.27302965004092e-05, + "loss": 4.1039, + "step": 1157500 + }, + { + "epoch": 2.17, + "learning_rate": 8.263644349673765e-05, + "loss": 4.1109, + "step": 1158000 + }, + { + "epoch": 2.17, + "learning_rate": 8.254259049306613e-05, + "loss": 4.104, + "step": 1158500 + }, + { + "epoch": 2.18, + "learning_rate": 8.24487374893946e-05, + "loss": 4.1112, + "step": 1159000 + }, + { + "epoch": 2.18, + "learning_rate": 8.235488448572307e-05, + "loss": 4.1087, + "step": 1159500 + }, + { + "epoch": 2.18, + "learning_rate": 8.226103148205155e-05, + "loss": 4.123, + "step": 1160000 + }, + { + "epoch": 2.18, + "learning_rate": 8.216717847838001e-05, + "loss": 4.1012, + "step": 1160500 + }, + { + "epoch": 2.18, + "learning_rate": 8.207332547470849e-05, + "loss": 4.119, + "step": 1161000 + }, + { + "epoch": 2.18, + "learning_rate": 8.197947247103697e-05, + "loss": 4.1136, + "step": 1161500 + }, + { + "epoch": 2.18, + "learning_rate": 8.188561946736542e-05, + "loss": 4.1048, + "step": 1162000 + }, + { + "epoch": 2.18, + "learning_rate": 8.17917664636939e-05, + "loss": 4.0936, + "step": 1162500 + }, + { + "epoch": 2.18, + "learning_rate": 8.169791346002237e-05, + "loss": 4.1135, + "step": 1163000 + }, + { + "epoch": 2.18, + "learning_rate": 8.160406045635084e-05, + "loss": 4.1141, + "step": 1163500 + }, + { + "epoch": 2.18, + "learning_rate": 8.151020745267931e-05, + "loss": 4.1368, + "step": 1164000 + }, + { + "epoch": 2.19, + "learning_rate": 8.141635444900779e-05, + "loss": 4.0932, + "step": 1164500 + }, + { + "epoch": 2.19, + "learning_rate": 8.132250144533624e-05, + "loss": 4.1007, + "step": 1165000 + }, + { + "epoch": 2.19, + "learning_rate": 8.122864844166472e-05, + "loss": 4.0984, + "step": 1165500 + }, + { + "epoch": 2.19, + "learning_rate": 8.113479543799318e-05, + "loss": 4.1073, + "step": 1166000 + }, + { + "epoch": 2.19, + "learning_rate": 8.104094243432166e-05, + "loss": 4.1027, + "step": 1166500 + }, + { + "epoch": 2.19, + "learning_rate": 8.094708943065014e-05, + "loss": 4.0946, + "step": 1167000 + }, + { + "epoch": 2.19, + "learning_rate": 8.08532364269786e-05, + "loss": 4.108, + "step": 1167500 + }, + { + "epoch": 2.19, + "learning_rate": 8.075938342330708e-05, + "loss": 4.1134, + "step": 1168000 + }, + { + "epoch": 2.19, + "learning_rate": 8.066553041963554e-05, + "loss": 4.1216, + "step": 1168500 + }, + { + "epoch": 2.19, + "learning_rate": 8.0571677415964e-05, + "loss": 4.0972, + "step": 1169000 + }, + { + "epoch": 2.2, + "learning_rate": 8.047782441229248e-05, + "loss": 4.1026, + "step": 1169500 + }, + { + "epoch": 2.2, + "learning_rate": 8.038397140862096e-05, + "loss": 4.0985, + "step": 1170000 + }, + { + "epoch": 2.2, + "learning_rate": 8.029011840494942e-05, + "loss": 4.0966, + "step": 1170500 + }, + { + "epoch": 2.2, + "learning_rate": 8.01962654012779e-05, + "loss": 4.1163, + "step": 1171000 + }, + { + "epoch": 2.2, + "learning_rate": 8.010241239760635e-05, + "loss": 4.0955, + "step": 1171500 + }, + { + "epoch": 2.2, + "learning_rate": 8.000855939393483e-05, + "loss": 4.1089, + "step": 1172000 + }, + { + "epoch": 2.2, + "learning_rate": 7.991470639026331e-05, + "loss": 4.1012, + "step": 1172500 + }, + { + "epoch": 2.2, + "learning_rate": 7.982085338659177e-05, + "loss": 4.102, + "step": 1173000 + }, + { + "epoch": 2.2, + "learning_rate": 7.972700038292025e-05, + "loss": 4.1096, + "step": 1173500 + }, + { + "epoch": 2.2, + "learning_rate": 7.963314737924873e-05, + "loss": 4.0962, + "step": 1174000 + }, + { + "epoch": 2.2, + "learning_rate": 7.953929437557719e-05, + "loss": 4.107, + "step": 1174500 + }, + { + "epoch": 2.21, + "learning_rate": 7.944544137190565e-05, + "loss": 4.0989, + "step": 1175000 + }, + { + "epoch": 2.21, + "learning_rate": 7.935158836823413e-05, + "loss": 4.106, + "step": 1175500 + }, + { + "epoch": 2.21, + "learning_rate": 7.92577353645626e-05, + "loss": 4.1022, + "step": 1176000 + }, + { + "epoch": 2.21, + "learning_rate": 7.916388236089107e-05, + "loss": 4.1093, + "step": 1176500 + }, + { + "epoch": 2.21, + "learning_rate": 7.907002935721955e-05, + "loss": 4.0966, + "step": 1177000 + }, + { + "epoch": 2.21, + "learning_rate": 7.897617635354801e-05, + "loss": 4.1013, + "step": 1177500 + }, + { + "epoch": 2.21, + "learning_rate": 7.888232334987649e-05, + "loss": 4.107, + "step": 1178000 + }, + { + "epoch": 2.21, + "learning_rate": 7.878847034620494e-05, + "loss": 4.1036, + "step": 1178500 + }, + { + "epoch": 2.21, + "learning_rate": 7.869461734253342e-05, + "loss": 4.1103, + "step": 1179000 + }, + { + "epoch": 2.21, + "learning_rate": 7.86007643388619e-05, + "loss": 4.1004, + "step": 1179500 + }, + { + "epoch": 2.21, + "learning_rate": 7.850691133519036e-05, + "loss": 4.1094, + "step": 1180000 + }, + { + "epoch": 2.22, + "learning_rate": 7.841305833151884e-05, + "loss": 4.0902, + "step": 1180500 + }, + { + "epoch": 2.22, + "learning_rate": 7.831920532784732e-05, + "loss": 4.1044, + "step": 1181000 + }, + { + "epoch": 2.22, + "learning_rate": 7.822535232417577e-05, + "loss": 4.102, + "step": 1181500 + }, + { + "epoch": 2.22, + "learning_rate": 7.813149932050424e-05, + "loss": 4.1137, + "step": 1182000 + }, + { + "epoch": 2.22, + "learning_rate": 7.803764631683272e-05, + "loss": 4.1135, + "step": 1182500 + }, + { + "epoch": 2.22, + "learning_rate": 7.794379331316118e-05, + "loss": 4.1144, + "step": 1183000 + }, + { + "epoch": 2.22, + "learning_rate": 7.784994030948966e-05, + "loss": 4.1082, + "step": 1183500 + }, + { + "epoch": 2.22, + "learning_rate": 7.775608730581813e-05, + "loss": 4.1202, + "step": 1184000 + }, + { + "epoch": 2.22, + "learning_rate": 7.76622343021466e-05, + "loss": 4.1067, + "step": 1184500 + }, + { + "epoch": 2.22, + "learning_rate": 7.756838129847508e-05, + "loss": 4.0964, + "step": 1185000 + }, + { + "epoch": 2.23, + "learning_rate": 7.747452829480353e-05, + "loss": 4.1089, + "step": 1185500 + }, + { + "epoch": 2.23, + "learning_rate": 7.738067529113201e-05, + "loss": 4.1162, + "step": 1186000 + }, + { + "epoch": 2.23, + "learning_rate": 7.728682228746049e-05, + "loss": 4.112, + "step": 1186500 + }, + { + "epoch": 2.23, + "learning_rate": 7.719296928378895e-05, + "loss": 4.1055, + "step": 1187000 + }, + { + "epoch": 2.23, + "learning_rate": 7.709911628011743e-05, + "loss": 4.1009, + "step": 1187500 + }, + { + "epoch": 2.23, + "learning_rate": 7.70052632764459e-05, + "loss": 4.0818, + "step": 1188000 + }, + { + "epoch": 2.23, + "learning_rate": 7.691141027277435e-05, + "loss": 4.1135, + "step": 1188500 + }, + { + "epoch": 2.23, + "learning_rate": 7.681755726910283e-05, + "loss": 4.0935, + "step": 1189000 + }, + { + "epoch": 2.23, + "learning_rate": 7.67237042654313e-05, + "loss": 4.1257, + "step": 1189500 + }, + { + "epoch": 2.23, + "learning_rate": 7.662985126175977e-05, + "loss": 4.1024, + "step": 1190000 + }, + { + "epoch": 2.23, + "learning_rate": 7.653599825808825e-05, + "loss": 4.0994, + "step": 1190500 + }, + { + "epoch": 2.24, + "learning_rate": 7.644214525441671e-05, + "loss": 4.1024, + "step": 1191000 + }, + { + "epoch": 2.24, + "learning_rate": 7.634829225074519e-05, + "loss": 4.1263, + "step": 1191500 + }, + { + "epoch": 2.24, + "learning_rate": 7.625443924707366e-05, + "loss": 4.1153, + "step": 1192000 + }, + { + "epoch": 2.24, + "learning_rate": 7.616058624340212e-05, + "loss": 4.0918, + "step": 1192500 + }, + { + "epoch": 2.24, + "learning_rate": 7.60667332397306e-05, + "loss": 4.1032, + "step": 1193000 + }, + { + "epoch": 2.24, + "learning_rate": 7.597288023605907e-05, + "loss": 4.1051, + "step": 1193500 + }, + { + "epoch": 2.24, + "learning_rate": 7.587902723238754e-05, + "loss": 4.1022, + "step": 1194000 + }, + { + "epoch": 2.24, + "learning_rate": 7.578517422871602e-05, + "loss": 4.106, + "step": 1194500 + }, + { + "epoch": 2.24, + "learning_rate": 7.569132122504449e-05, + "loss": 4.1124, + "step": 1195000 + }, + { + "epoch": 2.24, + "learning_rate": 7.559746822137294e-05, + "loss": 4.0926, + "step": 1195500 + }, + { + "epoch": 2.24, + "learning_rate": 7.550361521770142e-05, + "loss": 4.1068, + "step": 1196000 + }, + { + "epoch": 2.25, + "learning_rate": 7.540976221402988e-05, + "loss": 4.1004, + "step": 1196500 + }, + { + "epoch": 2.25, + "learning_rate": 7.531590921035836e-05, + "loss": 4.0918, + "step": 1197000 + }, + { + "epoch": 2.25, + "learning_rate": 7.522205620668684e-05, + "loss": 4.0949, + "step": 1197500 + }, + { + "epoch": 2.25, + "learning_rate": 7.51282032030153e-05, + "loss": 4.0948, + "step": 1198000 + }, + { + "epoch": 2.25, + "learning_rate": 7.503435019934377e-05, + "loss": 4.0955, + "step": 1198500 + }, + { + "epoch": 2.25, + "learning_rate": 7.494049719567224e-05, + "loss": 4.0958, + "step": 1199000 + }, + { + "epoch": 2.25, + "learning_rate": 7.484664419200072e-05, + "loss": 4.1228, + "step": 1199500 + }, + { + "epoch": 2.25, + "learning_rate": 7.475279118832919e-05, + "loss": 4.094, + "step": 1200000 + }, + { + "epoch": 2.25, + "learning_rate": 7.465893818465765e-05, + "loss": 4.1212, + "step": 1200500 + }, + { + "epoch": 2.25, + "learning_rate": 7.456508518098613e-05, + "loss": 4.1101, + "step": 1201000 + }, + { + "epoch": 2.26, + "learning_rate": 7.44712321773146e-05, + "loss": 4.1151, + "step": 1201500 + }, + { + "epoch": 2.26, + "learning_rate": 7.437737917364307e-05, + "loss": 4.1059, + "step": 1202000 + }, + { + "epoch": 2.26, + "learning_rate": 7.428352616997153e-05, + "loss": 4.1077, + "step": 1202500 + }, + { + "epoch": 2.26, + "learning_rate": 7.418967316630001e-05, + "loss": 4.0735, + "step": 1203000 + }, + { + "epoch": 2.26, + "learning_rate": 7.409582016262847e-05, + "loss": 4.1093, + "step": 1203500 + }, + { + "epoch": 2.26, + "learning_rate": 7.400196715895695e-05, + "loss": 4.0987, + "step": 1204000 + }, + { + "epoch": 2.26, + "learning_rate": 7.390811415528541e-05, + "loss": 4.093, + "step": 1204500 + }, + { + "epoch": 2.26, + "learning_rate": 7.381426115161389e-05, + "loss": 4.0789, + "step": 1205000 + }, + { + "epoch": 2.26, + "learning_rate": 7.372040814794236e-05, + "loss": 4.0952, + "step": 1205500 + }, + { + "epoch": 2.26, + "learning_rate": 7.362655514427083e-05, + "loss": 4.1015, + "step": 1206000 + }, + { + "epoch": 2.26, + "learning_rate": 7.35327021405993e-05, + "loss": 4.1063, + "step": 1206500 + }, + { + "epoch": 2.27, + "learning_rate": 7.343884913692777e-05, + "loss": 4.0828, + "step": 1207000 + }, + { + "epoch": 2.27, + "learning_rate": 7.334499613325624e-05, + "loss": 4.0942, + "step": 1207500 + }, + { + "epoch": 2.27, + "learning_rate": 7.325114312958472e-05, + "loss": 4.1159, + "step": 1208000 + }, + { + "epoch": 2.27, + "learning_rate": 7.31572901259132e-05, + "loss": 4.1063, + "step": 1208500 + }, + { + "epoch": 2.27, + "learning_rate": 7.306343712224166e-05, + "loss": 4.0955, + "step": 1209000 + }, + { + "epoch": 2.27, + "learning_rate": 7.296958411857012e-05, + "loss": 4.1058, + "step": 1209500 + }, + { + "epoch": 2.27, + "learning_rate": 7.287573111489859e-05, + "loss": 4.112, + "step": 1210000 + }, + { + "epoch": 2.27, + "learning_rate": 7.278187811122706e-05, + "loss": 4.0972, + "step": 1210500 + }, + { + "epoch": 2.27, + "learning_rate": 7.268802510755554e-05, + "loss": 4.1061, + "step": 1211000 + }, + { + "epoch": 2.27, + "learning_rate": 7.2594172103884e-05, + "loss": 4.094, + "step": 1211500 + }, + { + "epoch": 2.27, + "learning_rate": 7.250031910021247e-05, + "loss": 4.1078, + "step": 1212000 + }, + { + "epoch": 2.28, + "learning_rate": 7.240646609654095e-05, + "loss": 4.1167, + "step": 1212500 + }, + { + "epoch": 2.28, + "learning_rate": 7.231261309286942e-05, + "loss": 4.0967, + "step": 1213000 + }, + { + "epoch": 2.28, + "learning_rate": 7.221876008919789e-05, + "loss": 4.1071, + "step": 1213500 + }, + { + "epoch": 2.28, + "learning_rate": 7.212490708552636e-05, + "loss": 4.1017, + "step": 1214000 + }, + { + "epoch": 2.28, + "learning_rate": 7.203105408185483e-05, + "loss": 4.1092, + "step": 1214500 + }, + { + "epoch": 2.28, + "learning_rate": 7.19372010781833e-05, + "loss": 4.0823, + "step": 1215000 + }, + { + "epoch": 2.28, + "learning_rate": 7.184334807451177e-05, + "loss": 4.1099, + "step": 1215500 + }, + { + "epoch": 2.28, + "learning_rate": 7.174949507084025e-05, + "loss": 4.093, + "step": 1216000 + }, + { + "epoch": 2.28, + "learning_rate": 7.165564206716871e-05, + "loss": 4.089, + "step": 1216500 + }, + { + "epoch": 2.28, + "learning_rate": 7.156178906349717e-05, + "loss": 4.08, + "step": 1217000 + }, + { + "epoch": 2.29, + "learning_rate": 7.146793605982565e-05, + "loss": 4.1275, + "step": 1217500 + }, + { + "epoch": 2.29, + "learning_rate": 7.137408305615413e-05, + "loss": 4.1133, + "step": 1218000 + }, + { + "epoch": 2.29, + "learning_rate": 7.128023005248259e-05, + "loss": 4.0937, + "step": 1218500 + }, + { + "epoch": 2.29, + "learning_rate": 7.118637704881106e-05, + "loss": 4.111, + "step": 1219000 + }, + { + "epoch": 2.29, + "learning_rate": 7.109252404513953e-05, + "loss": 4.1103, + "step": 1219500 + }, + { + "epoch": 2.29, + "learning_rate": 7.099867104146801e-05, + "loss": 4.1054, + "step": 1220000 + }, + { + "epoch": 2.29, + "learning_rate": 7.090481803779648e-05, + "loss": 4.0776, + "step": 1220500 + }, + { + "epoch": 2.29, + "learning_rate": 7.081096503412494e-05, + "loss": 4.0992, + "step": 1221000 + }, + { + "epoch": 2.29, + "learning_rate": 7.071711203045342e-05, + "loss": 4.0908, + "step": 1221500 + }, + { + "epoch": 2.29, + "learning_rate": 7.062325902678188e-05, + "loss": 4.0832, + "step": 1222000 + }, + { + "epoch": 2.29, + "learning_rate": 7.052940602311036e-05, + "loss": 4.1159, + "step": 1222500 + }, + { + "epoch": 2.3, + "learning_rate": 7.043555301943884e-05, + "loss": 4.0935, + "step": 1223000 + }, + { + "epoch": 2.3, + "learning_rate": 7.03417000157673e-05, + "loss": 4.0953, + "step": 1223500 + }, + { + "epoch": 2.3, + "learning_rate": 7.024784701209576e-05, + "loss": 4.1198, + "step": 1224000 + }, + { + "epoch": 2.3, + "learning_rate": 7.015399400842424e-05, + "loss": 4.078, + "step": 1224500 + }, + { + "epoch": 2.3, + "learning_rate": 7.006014100475272e-05, + "loss": 4.0921, + "step": 1225000 + }, + { + "epoch": 2.3, + "learning_rate": 6.996628800108118e-05, + "loss": 4.0981, + "step": 1225500 + }, + { + "epoch": 2.3, + "learning_rate": 6.987243499740965e-05, + "loss": 4.0983, + "step": 1226000 + }, + { + "epoch": 2.3, + "learning_rate": 6.977858199373812e-05, + "loss": 4.0922, + "step": 1226500 + }, + { + "epoch": 2.3, + "learning_rate": 6.968472899006659e-05, + "loss": 4.1017, + "step": 1227000 + }, + { + "epoch": 2.3, + "learning_rate": 6.959087598639506e-05, + "loss": 4.1147, + "step": 1227500 + }, + { + "epoch": 2.31, + "learning_rate": 6.949702298272353e-05, + "loss": 4.0916, + "step": 1228000 + }, + { + "epoch": 2.31, + "learning_rate": 6.9403169979052e-05, + "loss": 4.0987, + "step": 1228500 + }, + { + "epoch": 2.31, + "learning_rate": 6.930931697538047e-05, + "loss": 4.1038, + "step": 1229000 + }, + { + "epoch": 2.31, + "learning_rate": 6.921546397170895e-05, + "loss": 4.1036, + "step": 1229500 + }, + { + "epoch": 2.31, + "learning_rate": 6.912161096803742e-05, + "loss": 4.1167, + "step": 1230000 + }, + { + "epoch": 2.31, + "learning_rate": 6.902775796436589e-05, + "loss": 4.0901, + "step": 1230500 + }, + { + "epoch": 2.31, + "learning_rate": 6.893390496069435e-05, + "loss": 4.0888, + "step": 1231000 + }, + { + "epoch": 2.31, + "learning_rate": 6.884005195702283e-05, + "loss": 4.078, + "step": 1231500 + }, + { + "epoch": 2.31, + "learning_rate": 6.874619895335131e-05, + "loss": 4.0876, + "step": 1232000 + }, + { + "epoch": 2.31, + "learning_rate": 6.865234594967977e-05, + "loss": 4.0953, + "step": 1232500 + }, + { + "epoch": 2.31, + "learning_rate": 6.855849294600823e-05, + "loss": 4.1031, + "step": 1233000 + }, + { + "epoch": 2.32, + "learning_rate": 6.84646399423367e-05, + "loss": 4.0961, + "step": 1233500 + }, + { + "epoch": 2.32, + "learning_rate": 6.837078693866518e-05, + "loss": 4.0956, + "step": 1234000 + }, + { + "epoch": 2.32, + "learning_rate": 6.827693393499365e-05, + "loss": 4.1023, + "step": 1234500 + }, + { + "epoch": 2.32, + "learning_rate": 6.818308093132212e-05, + "loss": 4.0994, + "step": 1235000 + }, + { + "epoch": 2.32, + "learning_rate": 6.80892279276506e-05, + "loss": 4.1028, + "step": 1235500 + }, + { + "epoch": 2.32, + "learning_rate": 6.799537492397906e-05, + "loss": 4.0757, + "step": 1236000 + }, + { + "epoch": 2.32, + "learning_rate": 6.790152192030754e-05, + "loss": 4.1009, + "step": 1236500 + }, + { + "epoch": 2.32, + "learning_rate": 6.7807668916636e-05, + "loss": 4.0908, + "step": 1237000 + }, + { + "epoch": 2.32, + "learning_rate": 6.771381591296448e-05, + "loss": 4.1133, + "step": 1237500 + }, + { + "epoch": 2.32, + "learning_rate": 6.761996290929294e-05, + "loss": 4.1037, + "step": 1238000 + }, + { + "epoch": 2.32, + "learning_rate": 6.752610990562142e-05, + "loss": 4.0813, + "step": 1238500 + }, + { + "epoch": 2.33, + "learning_rate": 6.743225690194988e-05, + "loss": 4.1041, + "step": 1239000 + }, + { + "epoch": 2.33, + "learning_rate": 6.733840389827836e-05, + "loss": 4.1025, + "step": 1239500 + }, + { + "epoch": 2.33, + "learning_rate": 6.724455089460682e-05, + "loss": 4.0822, + "step": 1240000 + }, + { + "epoch": 2.33, + "learning_rate": 6.715069789093529e-05, + "loss": 4.1142, + "step": 1240500 + }, + { + "epoch": 2.33, + "learning_rate": 6.705684488726376e-05, + "loss": 4.0983, + "step": 1241000 + }, + { + "epoch": 2.33, + "learning_rate": 6.696299188359224e-05, + "loss": 4.0867, + "step": 1241500 + }, + { + "epoch": 2.33, + "learning_rate": 6.68691388799207e-05, + "loss": 4.0851, + "step": 1242000 + }, + { + "epoch": 2.33, + "learning_rate": 6.677528587624917e-05, + "loss": 4.1189, + "step": 1242500 + }, + { + "epoch": 2.33, + "learning_rate": 6.668143287257765e-05, + "loss": 4.0714, + "step": 1243000 + }, + { + "epoch": 2.33, + "learning_rate": 6.658757986890612e-05, + "loss": 4.0986, + "step": 1243500 + }, + { + "epoch": 2.34, + "learning_rate": 6.649372686523459e-05, + "loss": 4.1201, + "step": 1244000 + }, + { + "epoch": 2.34, + "learning_rate": 6.639987386156307e-05, + "loss": 4.1051, + "step": 1244500 + }, + { + "epoch": 2.34, + "learning_rate": 6.630602085789153e-05, + "loss": 4.0986, + "step": 1245000 + }, + { + "epoch": 2.34, + "learning_rate": 6.621216785422e-05, + "loss": 4.1061, + "step": 1245500 + }, + { + "epoch": 2.34, + "learning_rate": 6.611831485054847e-05, + "loss": 4.1017, + "step": 1246000 + }, + { + "epoch": 2.34, + "learning_rate": 6.602446184687695e-05, + "loss": 4.0975, + "step": 1246500 + }, + { + "epoch": 2.34, + "learning_rate": 6.593060884320541e-05, + "loss": 4.0997, + "step": 1247000 + }, + { + "epoch": 2.34, + "learning_rate": 6.583675583953388e-05, + "loss": 4.1023, + "step": 1247500 + }, + { + "epoch": 2.34, + "learning_rate": 6.574290283586235e-05, + "loss": 4.0847, + "step": 1248000 + }, + { + "epoch": 2.34, + "learning_rate": 6.564904983219083e-05, + "loss": 4.0966, + "step": 1248500 + }, + { + "epoch": 2.34, + "learning_rate": 6.55551968285193e-05, + "loss": 4.0887, + "step": 1249000 + }, + { + "epoch": 2.35, + "learning_rate": 6.546134382484776e-05, + "loss": 4.0982, + "step": 1249500 + }, + { + "epoch": 2.35, + "learning_rate": 6.536749082117624e-05, + "loss": 4.0799, + "step": 1250000 + }, + { + "epoch": 2.35, + "learning_rate": 6.52736378175047e-05, + "loss": 4.092, + "step": 1250500 + }, + { + "epoch": 2.35, + "learning_rate": 6.517978481383318e-05, + "loss": 4.0915, + "step": 1251000 + }, + { + "epoch": 2.35, + "learning_rate": 6.508593181016164e-05, + "loss": 4.0926, + "step": 1251500 + }, + { + "epoch": 2.35, + "learning_rate": 6.499207880649012e-05, + "loss": 4.0992, + "step": 1252000 + }, + { + "epoch": 2.35, + "learning_rate": 6.489822580281858e-05, + "loss": 4.1042, + "step": 1252500 + }, + { + "epoch": 2.35, + "learning_rate": 6.480437279914706e-05, + "loss": 4.0993, + "step": 1253000 + }, + { + "epoch": 2.35, + "learning_rate": 6.471051979547554e-05, + "loss": 4.0977, + "step": 1253500 + }, + { + "epoch": 2.35, + "learning_rate": 6.4616666791804e-05, + "loss": 4.1063, + "step": 1254000 + }, + { + "epoch": 2.35, + "learning_rate": 6.452281378813247e-05, + "loss": 4.1063, + "step": 1254500 + }, + { + "epoch": 2.36, + "learning_rate": 6.442896078446094e-05, + "loss": 4.0871, + "step": 1255000 + }, + { + "epoch": 2.36, + "learning_rate": 6.43351077807894e-05, + "loss": 4.0933, + "step": 1255500 + }, + { + "epoch": 2.36, + "learning_rate": 6.424125477711788e-05, + "loss": 4.0835, + "step": 1256000 + }, + { + "epoch": 2.36, + "learning_rate": 6.414740177344635e-05, + "loss": 4.1049, + "step": 1256500 + }, + { + "epoch": 2.36, + "learning_rate": 6.405354876977483e-05, + "loss": 4.0941, + "step": 1257000 + }, + { + "epoch": 2.36, + "learning_rate": 6.395969576610329e-05, + "loss": 4.1, + "step": 1257500 + }, + { + "epoch": 2.36, + "learning_rate": 6.386584276243177e-05, + "loss": 4.102, + "step": 1258000 + }, + { + "epoch": 2.36, + "learning_rate": 6.377198975876023e-05, + "loss": 4.0943, + "step": 1258500 + }, + { + "epoch": 2.36, + "learning_rate": 6.367813675508871e-05, + "loss": 4.1028, + "step": 1259000 + }, + { + "epoch": 2.36, + "learning_rate": 6.358428375141717e-05, + "loss": 4.0894, + "step": 1259500 + }, + { + "epoch": 2.37, + "learning_rate": 6.349043074774565e-05, + "loss": 4.0958, + "step": 1260000 + }, + { + "epoch": 2.37, + "learning_rate": 6.339657774407411e-05, + "loss": 4.1097, + "step": 1260500 + }, + { + "epoch": 2.37, + "learning_rate": 6.330272474040259e-05, + "loss": 4.1162, + "step": 1261000 + }, + { + "epoch": 2.37, + "learning_rate": 6.320887173673105e-05, + "loss": 4.0978, + "step": 1261500 + }, + { + "epoch": 2.37, + "learning_rate": 6.311501873305952e-05, + "loss": 4.0811, + "step": 1262000 + }, + { + "epoch": 2.37, + "learning_rate": 6.3021165729388e-05, + "loss": 4.0884, + "step": 1262500 + }, + { + "epoch": 2.37, + "learning_rate": 6.292731272571647e-05, + "loss": 4.0902, + "step": 1263000 + }, + { + "epoch": 2.37, + "learning_rate": 6.283345972204494e-05, + "loss": 4.0878, + "step": 1263500 + }, + { + "epoch": 2.37, + "learning_rate": 6.27396067183734e-05, + "loss": 4.0935, + "step": 1264000 + }, + { + "epoch": 2.37, + "learning_rate": 6.264575371470188e-05, + "loss": 4.1081, + "step": 1264500 + }, + { + "epoch": 2.37, + "learning_rate": 6.255190071103036e-05, + "loss": 4.112, + "step": 1265000 + }, + { + "epoch": 2.38, + "learning_rate": 6.245804770735882e-05, + "loss": 4.0864, + "step": 1265500 + }, + { + "epoch": 2.38, + "learning_rate": 6.23641947036873e-05, + "loss": 4.1276, + "step": 1266000 + }, + { + "epoch": 2.38, + "learning_rate": 6.227034170001576e-05, + "loss": 4.0768, + "step": 1266500 + }, + { + "epoch": 2.38, + "learning_rate": 6.217648869634424e-05, + "loss": 4.095, + "step": 1267000 + }, + { + "epoch": 2.38, + "learning_rate": 6.20826356926727e-05, + "loss": 4.1089, + "step": 1267500 + }, + { + "epoch": 2.38, + "learning_rate": 6.198878268900118e-05, + "loss": 4.092, + "step": 1268000 + }, + { + "epoch": 2.38, + "learning_rate": 6.189492968532964e-05, + "loss": 4.07, + "step": 1268500 + }, + { + "epoch": 2.38, + "learning_rate": 6.180107668165811e-05, + "loss": 4.1085, + "step": 1269000 + }, + { + "epoch": 2.38, + "learning_rate": 6.170722367798658e-05, + "loss": 4.0897, + "step": 1269500 + }, + { + "epoch": 2.38, + "learning_rate": 6.161337067431506e-05, + "loss": 4.1174, + "step": 1270000 + }, + { + "epoch": 2.38, + "learning_rate": 6.151951767064353e-05, + "loss": 4.0826, + "step": 1270500 + }, + { + "epoch": 2.39, + "learning_rate": 6.142566466697199e-05, + "loss": 4.0797, + "step": 1271000 + }, + { + "epoch": 2.39, + "learning_rate": 6.133181166330047e-05, + "loss": 4.0879, + "step": 1271500 + }, + { + "epoch": 2.39, + "learning_rate": 6.123795865962894e-05, + "loss": 4.1003, + "step": 1272000 + }, + { + "epoch": 2.39, + "learning_rate": 6.114410565595741e-05, + "loss": 4.0919, + "step": 1272500 + }, + { + "epoch": 2.39, + "learning_rate": 6.105025265228587e-05, + "loss": 4.1016, + "step": 1273000 + }, + { + "epoch": 2.39, + "learning_rate": 6.095639964861435e-05, + "loss": 4.0906, + "step": 1273500 + }, + { + "epoch": 2.39, + "learning_rate": 6.086254664494282e-05, + "loss": 4.1043, + "step": 1274000 + }, + { + "epoch": 2.39, + "learning_rate": 6.076869364127129e-05, + "loss": 4.0874, + "step": 1274500 + }, + { + "epoch": 2.39, + "learning_rate": 6.067484063759976e-05, + "loss": 4.0982, + "step": 1275000 + }, + { + "epoch": 2.39, + "learning_rate": 6.058098763392823e-05, + "loss": 4.0894, + "step": 1275500 + }, + { + "epoch": 2.4, + "learning_rate": 6.04871346302567e-05, + "loss": 4.0958, + "step": 1276000 + }, + { + "epoch": 2.4, + "learning_rate": 6.039328162658517e-05, + "loss": 4.0812, + "step": 1276500 + }, + { + "epoch": 2.4, + "learning_rate": 6.0299428622913644e-05, + "loss": 4.0904, + "step": 1277000 + }, + { + "epoch": 2.4, + "learning_rate": 6.0205575619242115e-05, + "loss": 4.0993, + "step": 1277500 + }, + { + "epoch": 2.4, + "learning_rate": 6.011172261557058e-05, + "loss": 4.0997, + "step": 1278000 + }, + { + "epoch": 2.4, + "learning_rate": 6.001786961189905e-05, + "loss": 4.1008, + "step": 1278500 + }, + { + "epoch": 2.4, + "learning_rate": 5.992401660822753e-05, + "loss": 4.1016, + "step": 1279000 + }, + { + "epoch": 2.4, + "learning_rate": 5.9830163604556e-05, + "loss": 4.0974, + "step": 1279500 + }, + { + "epoch": 2.4, + "learning_rate": 5.973631060088446e-05, + "loss": 4.0807, + "step": 1280000 + }, + { + "epoch": 2.4, + "learning_rate": 5.964245759721294e-05, + "loss": 4.0798, + "step": 1280500 + }, + { + "epoch": 2.4, + "learning_rate": 5.954860459354141e-05, + "loss": 4.0915, + "step": 1281000 + }, + { + "epoch": 2.41, + "learning_rate": 5.945475158986987e-05, + "loss": 4.077, + "step": 1281500 + }, + { + "epoch": 2.41, + "learning_rate": 5.9360898586198344e-05, + "loss": 4.1096, + "step": 1282000 + }, + { + "epoch": 2.41, + "learning_rate": 5.926704558252682e-05, + "loss": 4.0865, + "step": 1282500 + }, + { + "epoch": 2.41, + "learning_rate": 5.917319257885529e-05, + "loss": 4.083, + "step": 1283000 + }, + { + "epoch": 2.41, + "learning_rate": 5.9079339575183756e-05, + "loss": 4.0864, + "step": 1283500 + }, + { + "epoch": 2.41, + "learning_rate": 5.898548657151223e-05, + "loss": 4.0902, + "step": 1284000 + }, + { + "epoch": 2.41, + "learning_rate": 5.8891633567840704e-05, + "loss": 4.0884, + "step": 1284500 + }, + { + "epoch": 2.41, + "learning_rate": 5.879778056416917e-05, + "loss": 4.1099, + "step": 1285000 + }, + { + "epoch": 2.41, + "learning_rate": 5.870392756049764e-05, + "loss": 4.0829, + "step": 1285500 + }, + { + "epoch": 2.41, + "learning_rate": 5.8610074556826116e-05, + "loss": 4.0975, + "step": 1286000 + }, + { + "epoch": 2.41, + "learning_rate": 5.851622155315458e-05, + "loss": 4.1111, + "step": 1286500 + }, + { + "epoch": 2.42, + "learning_rate": 5.842236854948305e-05, + "loss": 4.103, + "step": 1287000 + }, + { + "epoch": 2.42, + "learning_rate": 5.832851554581153e-05, + "loss": 4.0883, + "step": 1287500 + }, + { + "epoch": 2.42, + "learning_rate": 5.823466254214e-05, + "loss": 4.0897, + "step": 1288000 + }, + { + "epoch": 2.42, + "learning_rate": 5.814080953846846e-05, + "loss": 4.0964, + "step": 1288500 + }, + { + "epoch": 2.42, + "learning_rate": 5.804695653479693e-05, + "loss": 4.0823, + "step": 1289000 + }, + { + "epoch": 2.42, + "learning_rate": 5.795310353112541e-05, + "loss": 4.0833, + "step": 1289500 + }, + { + "epoch": 2.42, + "learning_rate": 5.7859250527453874e-05, + "loss": 4.0981, + "step": 1290000 + }, + { + "epoch": 2.42, + "learning_rate": 5.7765397523782345e-05, + "loss": 4.0781, + "step": 1290500 + }, + { + "epoch": 2.42, + "learning_rate": 5.7671544520110815e-05, + "loss": 4.0967, + "step": 1291000 + }, + { + "epoch": 2.42, + "learning_rate": 5.7577691516439286e-05, + "loss": 4.0889, + "step": 1291500 + }, + { + "epoch": 2.43, + "learning_rate": 5.748383851276776e-05, + "loss": 4.0886, + "step": 1292000 + }, + { + "epoch": 2.43, + "learning_rate": 5.738998550909623e-05, + "loss": 4.0745, + "step": 1292500 + }, + { + "epoch": 2.43, + "learning_rate": 5.7296132505424705e-05, + "loss": 4.1027, + "step": 1293000 + }, + { + "epoch": 2.43, + "learning_rate": 5.720227950175317e-05, + "loss": 4.0981, + "step": 1293500 + }, + { + "epoch": 2.43, + "learning_rate": 5.710842649808164e-05, + "loss": 4.0851, + "step": 1294000 + }, + { + "epoch": 2.43, + "learning_rate": 5.701457349441011e-05, + "loss": 4.0905, + "step": 1294500 + }, + { + "epoch": 2.43, + "learning_rate": 5.692072049073858e-05, + "loss": 4.0839, + "step": 1295000 + }, + { + "epoch": 2.43, + "learning_rate": 5.682686748706705e-05, + "loss": 4.0818, + "step": 1295500 + }, + { + "epoch": 2.43, + "learning_rate": 5.673301448339552e-05, + "loss": 4.085, + "step": 1296000 + }, + { + "epoch": 2.43, + "learning_rate": 5.6639161479724e-05, + "loss": 4.0981, + "step": 1296500 + }, + { + "epoch": 2.43, + "learning_rate": 5.654530847605246e-05, + "loss": 4.0758, + "step": 1297000 + }, + { + "epoch": 2.44, + "learning_rate": 5.6451455472380934e-05, + "loss": 4.078, + "step": 1297500 + }, + { + "epoch": 2.44, + "learning_rate": 5.63576024687094e-05, + "loss": 4.0894, + "step": 1298000 + }, + { + "epoch": 2.44, + "learning_rate": 5.6263749465037875e-05, + "loss": 4.088, + "step": 1298500 + }, + { + "epoch": 2.44, + "learning_rate": 5.6169896461366346e-05, + "loss": 4.0978, + "step": 1299000 + }, + { + "epoch": 2.44, + "learning_rate": 5.6076043457694816e-05, + "loss": 4.0809, + "step": 1299500 + }, + { + "epoch": 2.44, + "learning_rate": 5.598219045402328e-05, + "loss": 4.0888, + "step": 1300000 + }, + { + "epoch": 2.44, + "learning_rate": 5.588833745035176e-05, + "loss": 4.1212, + "step": 1300500 + }, + { + "epoch": 2.44, + "learning_rate": 5.579448444668023e-05, + "loss": 4.0825, + "step": 1301000 + }, + { + "epoch": 2.44, + "learning_rate": 5.570063144300869e-05, + "loss": 4.1032, + "step": 1301500 + }, + { + "epoch": 2.44, + "learning_rate": 5.560677843933717e-05, + "loss": 4.0911, + "step": 1302000 + }, + { + "epoch": 2.44, + "learning_rate": 5.551292543566564e-05, + "loss": 4.0875, + "step": 1302500 + }, + { + "epoch": 2.45, + "learning_rate": 5.541907243199411e-05, + "loss": 4.0608, + "step": 1303000 + }, + { + "epoch": 2.45, + "learning_rate": 5.5325219428322575e-05, + "loss": 4.0953, + "step": 1303500 + }, + { + "epoch": 2.45, + "learning_rate": 5.523136642465105e-05, + "loss": 4.0945, + "step": 1304000 + }, + { + "epoch": 2.45, + "learning_rate": 5.513751342097952e-05, + "loss": 4.1076, + "step": 1304500 + }, + { + "epoch": 2.45, + "learning_rate": 5.5043660417307987e-05, + "loss": 4.094, + "step": 1305000 + }, + { + "epoch": 2.45, + "learning_rate": 5.4949807413636464e-05, + "loss": 4.0832, + "step": 1305500 + }, + { + "epoch": 2.45, + "learning_rate": 5.4855954409964935e-05, + "loss": 4.1009, + "step": 1306000 + }, + { + "epoch": 2.45, + "learning_rate": 5.47621014062934e-05, + "loss": 4.0724, + "step": 1306500 + }, + { + "epoch": 2.45, + "learning_rate": 5.466824840262187e-05, + "loss": 4.1011, + "step": 1307000 + }, + { + "epoch": 2.45, + "learning_rate": 5.4574395398950346e-05, + "loss": 4.101, + "step": 1307500 + }, + { + "epoch": 2.46, + "learning_rate": 5.448054239527882e-05, + "loss": 4.0842, + "step": 1308000 + }, + { + "epoch": 2.46, + "learning_rate": 5.438668939160728e-05, + "loss": 4.0852, + "step": 1308500 + }, + { + "epoch": 2.46, + "learning_rate": 5.429283638793575e-05, + "loss": 4.0981, + "step": 1309000 + }, + { + "epoch": 2.46, + "learning_rate": 5.419898338426423e-05, + "loss": 4.0998, + "step": 1309500 + }, + { + "epoch": 2.46, + "learning_rate": 5.410513038059269e-05, + "loss": 4.0701, + "step": 1310000 + }, + { + "epoch": 2.46, + "learning_rate": 5.4011277376921164e-05, + "loss": 4.0971, + "step": 1310500 + }, + { + "epoch": 2.46, + "learning_rate": 5.391742437324964e-05, + "loss": 4.0919, + "step": 1311000 + }, + { + "epoch": 2.46, + "learning_rate": 5.382357136957811e-05, + "loss": 4.1148, + "step": 1311500 + }, + { + "epoch": 2.46, + "learning_rate": 5.3729718365906575e-05, + "loss": 4.0948, + "step": 1312000 + }, + { + "epoch": 2.46, + "learning_rate": 5.3635865362235046e-05, + "loss": 4.0855, + "step": 1312500 + }, + { + "epoch": 2.46, + "learning_rate": 5.3542012358563524e-05, + "loss": 4.0918, + "step": 1313000 + }, + { + "epoch": 2.47, + "learning_rate": 5.344815935489199e-05, + "loss": 4.0908, + "step": 1313500 + }, + { + "epoch": 2.47, + "learning_rate": 5.335430635122046e-05, + "loss": 4.1036, + "step": 1314000 + }, + { + "epoch": 2.47, + "learning_rate": 5.3260453347548935e-05, + "loss": 4.0688, + "step": 1314500 + }, + { + "epoch": 2.47, + "learning_rate": 5.31666003438774e-05, + "loss": 4.0748, + "step": 1315000 + }, + { + "epoch": 2.47, + "learning_rate": 5.307274734020587e-05, + "loss": 4.0831, + "step": 1315500 + }, + { + "epoch": 2.47, + "learning_rate": 5.297889433653434e-05, + "loss": 4.0808, + "step": 1316000 + }, + { + "epoch": 2.47, + "learning_rate": 5.288504133286282e-05, + "loss": 4.0749, + "step": 1316500 + }, + { + "epoch": 2.47, + "learning_rate": 5.279118832919128e-05, + "loss": 4.0916, + "step": 1317000 + }, + { + "epoch": 2.47, + "learning_rate": 5.269733532551975e-05, + "loss": 4.0932, + "step": 1317500 + }, + { + "epoch": 2.47, + "learning_rate": 5.260348232184822e-05, + "loss": 4.089, + "step": 1318000 + }, + { + "epoch": 2.47, + "learning_rate": 5.2509629318176694e-05, + "loss": 4.0886, + "step": 1318500 + }, + { + "epoch": 2.48, + "learning_rate": 5.2415776314505164e-05, + "loss": 4.085, + "step": 1319000 + }, + { + "epoch": 2.48, + "learning_rate": 5.2321923310833635e-05, + "loss": 4.0798, + "step": 1319500 + }, + { + "epoch": 2.48, + "learning_rate": 5.222807030716211e-05, + "loss": 4.0888, + "step": 1320000 + }, + { + "epoch": 2.48, + "learning_rate": 5.2134217303490576e-05, + "loss": 4.0912, + "step": 1320500 + }, + { + "epoch": 2.48, + "learning_rate": 5.204036429981905e-05, + "loss": 4.1052, + "step": 1321000 + }, + { + "epoch": 2.48, + "learning_rate": 5.194651129614751e-05, + "loss": 4.1082, + "step": 1321500 + }, + { + "epoch": 2.48, + "learning_rate": 5.185265829247599e-05, + "loss": 4.0893, + "step": 1322000 + }, + { + "epoch": 2.48, + "learning_rate": 5.175880528880446e-05, + "loss": 4.1092, + "step": 1322500 + }, + { + "epoch": 2.48, + "learning_rate": 5.166495228513293e-05, + "loss": 4.0737, + "step": 1323000 + }, + { + "epoch": 2.48, + "learning_rate": 5.15710992814614e-05, + "loss": 4.0798, + "step": 1323500 + }, + { + "epoch": 2.49, + "learning_rate": 5.147724627778987e-05, + "loss": 4.109, + "step": 1324000 + }, + { + "epoch": 2.49, + "learning_rate": 5.138339327411834e-05, + "loss": 4.0857, + "step": 1324500 + }, + { + "epoch": 2.49, + "learning_rate": 5.1289540270446805e-05, + "loss": 4.1154, + "step": 1325000 + }, + { + "epoch": 2.49, + "learning_rate": 5.119568726677528e-05, + "loss": 4.0871, + "step": 1325500 + }, + { + "epoch": 2.49, + "learning_rate": 5.1101834263103753e-05, + "loss": 4.0941, + "step": 1326000 + }, + { + "epoch": 2.49, + "learning_rate": 5.1007981259432224e-05, + "loss": 4.0885, + "step": 1326500 + }, + { + "epoch": 2.49, + "learning_rate": 5.091412825576069e-05, + "loss": 4.0857, + "step": 1327000 + }, + { + "epoch": 2.49, + "learning_rate": 5.0820275252089165e-05, + "loss": 4.0746, + "step": 1327500 + }, + { + "epoch": 2.49, + "learning_rate": 5.0726422248417636e-05, + "loss": 4.064, + "step": 1328000 + }, + { + "epoch": 2.49, + "learning_rate": 5.06325692447461e-05, + "loss": 4.0834, + "step": 1328500 + }, + { + "epoch": 2.49, + "learning_rate": 5.053871624107458e-05, + "loss": 4.0884, + "step": 1329000 + }, + { + "epoch": 2.5, + "learning_rate": 5.044486323740305e-05, + "loss": 4.0873, + "step": 1329500 + }, + { + "epoch": 2.5, + "learning_rate": 5.035101023373151e-05, + "loss": 4.0933, + "step": 1330000 + }, + { + "epoch": 2.5, + "learning_rate": 5.025715723005998e-05, + "loss": 4.0932, + "step": 1330500 + }, + { + "epoch": 2.5, + "learning_rate": 5.016330422638846e-05, + "loss": 4.0799, + "step": 1331000 + }, + { + "epoch": 2.5, + "learning_rate": 5.006945122271693e-05, + "loss": 4.0742, + "step": 1331500 + }, + { + "epoch": 2.5, + "learning_rate": 4.9975598219045394e-05, + "loss": 4.0959, + "step": 1332000 + }, + { + "epoch": 2.5, + "learning_rate": 4.988174521537387e-05, + "loss": 4.087, + "step": 1332500 + }, + { + "epoch": 2.5, + "learning_rate": 4.978789221170234e-05, + "loss": 4.0873, + "step": 1333000 + }, + { + "epoch": 2.5, + "learning_rate": 4.9694039208030806e-05, + "loss": 4.0715, + "step": 1333500 + }, + { + "epoch": 2.5, + "learning_rate": 4.960018620435928e-05, + "loss": 4.0868, + "step": 1334000 + }, + { + "epoch": 2.5, + "learning_rate": 4.9506333200687754e-05, + "loss": 4.0726, + "step": 1334500 + }, + { + "epoch": 2.51, + "learning_rate": 4.941248019701622e-05, + "loss": 4.0785, + "step": 1335000 + }, + { + "epoch": 2.51, + "learning_rate": 4.931862719334469e-05, + "loss": 4.0701, + "step": 1335500 + }, + { + "epoch": 2.51, + "learning_rate": 4.922477418967316e-05, + "loss": 4.0798, + "step": 1336000 + }, + { + "epoch": 2.51, + "learning_rate": 4.913092118600164e-05, + "loss": 4.0938, + "step": 1336500 + }, + { + "epoch": 2.51, + "learning_rate": 4.90370681823301e-05, + "loss": 4.0849, + "step": 1337000 + }, + { + "epoch": 2.51, + "learning_rate": 4.894321517865857e-05, + "loss": 4.0584, + "step": 1337500 + }, + { + "epoch": 2.51, + "learning_rate": 4.884936217498705e-05, + "loss": 4.0809, + "step": 1338000 + }, + { + "epoch": 2.51, + "learning_rate": 4.875550917131551e-05, + "loss": 4.0834, + "step": 1338500 + }, + { + "epoch": 2.51, + "learning_rate": 4.866165616764398e-05, + "loss": 4.1006, + "step": 1339000 + }, + { + "epoch": 2.51, + "learning_rate": 4.8567803163972454e-05, + "loss": 4.0869, + "step": 1339500 + }, + { + "epoch": 2.52, + "learning_rate": 4.847395016030093e-05, + "loss": 4.0998, + "step": 1340000 + }, + { + "epoch": 2.52, + "learning_rate": 4.8380097156629395e-05, + "loss": 4.0782, + "step": 1340500 + }, + { + "epoch": 2.52, + "learning_rate": 4.8286244152957866e-05, + "loss": 4.0942, + "step": 1341000 + }, + { + "epoch": 2.52, + "learning_rate": 4.819239114928634e-05, + "loss": 4.1044, + "step": 1341500 + }, + { + "epoch": 2.52, + "learning_rate": 4.809853814561481e-05, + "loss": 4.0865, + "step": 1342000 + }, + { + "epoch": 2.52, + "learning_rate": 4.800468514194328e-05, + "loss": 4.0917, + "step": 1342500 + }, + { + "epoch": 2.52, + "learning_rate": 4.791083213827175e-05, + "loss": 4.0874, + "step": 1343000 + }, + { + "epoch": 2.52, + "learning_rate": 4.781697913460022e-05, + "loss": 4.0948, + "step": 1343500 + }, + { + "epoch": 2.52, + "learning_rate": 4.772312613092869e-05, + "loss": 4.082, + "step": 1344000 + }, + { + "epoch": 2.52, + "learning_rate": 4.762927312725716e-05, + "loss": 4.0872, + "step": 1344500 + }, + { + "epoch": 2.52, + "learning_rate": 4.753542012358564e-05, + "loss": 4.0923, + "step": 1345000 + }, + { + "epoch": 2.53, + "learning_rate": 4.74415671199141e-05, + "loss": 4.094, + "step": 1345500 + }, + { + "epoch": 2.53, + "learning_rate": 4.734771411624257e-05, + "loss": 4.1156, + "step": 1346000 + }, + { + "epoch": 2.53, + "learning_rate": 4.725386111257104e-05, + "loss": 4.0886, + "step": 1346500 + }, + { + "epoch": 2.53, + "learning_rate": 4.7160008108899514e-05, + "loss": 4.1015, + "step": 1347000 + }, + { + "epoch": 2.53, + "learning_rate": 4.7066155105227984e-05, + "loss": 4.0873, + "step": 1347500 + }, + { + "epoch": 2.53, + "learning_rate": 4.6972302101556455e-05, + "loss": 4.0725, + "step": 1348000 + }, + { + "epoch": 2.53, + "learning_rate": 4.687844909788492e-05, + "loss": 4.077, + "step": 1348500 + }, + { + "epoch": 2.53, + "learning_rate": 4.6784596094213396e-05, + "loss": 4.0915, + "step": 1349000 + }, + { + "epoch": 2.53, + "learning_rate": 4.669074309054187e-05, + "loss": 4.0733, + "step": 1349500 + }, + { + "epoch": 2.53, + "learning_rate": 4.659689008687033e-05, + "loss": 4.0878, + "step": 1350000 + }, + { + "epoch": 2.53, + "learning_rate": 4.650303708319881e-05, + "loss": 4.0922, + "step": 1350500 + }, + { + "epoch": 2.54, + "learning_rate": 4.640918407952728e-05, + "loss": 4.0829, + "step": 1351000 + }, + { + "epoch": 2.54, + "learning_rate": 4.631533107585575e-05, + "loss": 4.0832, + "step": 1351500 + }, + { + "epoch": 2.54, + "learning_rate": 4.622147807218421e-05, + "loss": 4.0796, + "step": 1352000 + }, + { + "epoch": 2.54, + "learning_rate": 4.612762506851269e-05, + "loss": 4.0716, + "step": 1352500 + }, + { + "epoch": 2.54, + "learning_rate": 4.603377206484116e-05, + "loss": 4.0947, + "step": 1353000 + }, + { + "epoch": 2.54, + "learning_rate": 4.5939919061169625e-05, + "loss": 4.0737, + "step": 1353500 + }, + { + "epoch": 2.54, + "learning_rate": 4.58460660574981e-05, + "loss": 4.0652, + "step": 1354000 + }, + { + "epoch": 2.54, + "learning_rate": 4.575221305382657e-05, + "loss": 4.0775, + "step": 1354500 + }, + { + "epoch": 2.54, + "learning_rate": 4.5658360050155044e-05, + "loss": 4.0734, + "step": 1355000 + }, + { + "epoch": 2.54, + "learning_rate": 4.556450704648351e-05, + "loss": 4.0891, + "step": 1355500 + }, + { + "epoch": 2.55, + "learning_rate": 4.5470654042811985e-05, + "loss": 4.0902, + "step": 1356000 + }, + { + "epoch": 2.55, + "learning_rate": 4.5376801039140456e-05, + "loss": 4.0873, + "step": 1356500 + }, + { + "epoch": 2.55, + "learning_rate": 4.528294803546892e-05, + "loss": 4.1043, + "step": 1357000 + }, + { + "epoch": 2.55, + "learning_rate": 4.518909503179739e-05, + "loss": 4.0743, + "step": 1357500 + }, + { + "epoch": 2.55, + "learning_rate": 4.509524202812587e-05, + "loss": 4.0598, + "step": 1358000 + }, + { + "epoch": 2.55, + "learning_rate": 4.500138902445433e-05, + "loss": 4.0914, + "step": 1358500 + }, + { + "epoch": 2.55, + "learning_rate": 4.49075360207828e-05, + "loss": 4.0748, + "step": 1359000 + }, + { + "epoch": 2.55, + "learning_rate": 4.481368301711128e-05, + "loss": 4.091, + "step": 1359500 + }, + { + "epoch": 2.55, + "learning_rate": 4.471983001343975e-05, + "loss": 4.0587, + "step": 1360000 + }, + { + "epoch": 2.55, + "learning_rate": 4.4625977009768214e-05, + "loss": 4.0886, + "step": 1360500 + }, + { + "epoch": 2.55, + "learning_rate": 4.4532124006096685e-05, + "loss": 4.0904, + "step": 1361000 + }, + { + "epoch": 2.56, + "learning_rate": 4.443827100242516e-05, + "loss": 4.0911, + "step": 1361500 + }, + { + "epoch": 2.56, + "learning_rate": 4.4344417998753626e-05, + "loss": 4.0815, + "step": 1362000 + }, + { + "epoch": 2.56, + "learning_rate": 4.42505649950821e-05, + "loss": 4.0724, + "step": 1362500 + }, + { + "epoch": 2.56, + "learning_rate": 4.4156711991410574e-05, + "loss": 4.0758, + "step": 1363000 + }, + { + "epoch": 2.56, + "learning_rate": 4.4062858987739045e-05, + "loss": 4.0822, + "step": 1363500 + }, + { + "epoch": 2.56, + "learning_rate": 4.396900598406751e-05, + "loss": 4.0769, + "step": 1364000 + }, + { + "epoch": 2.56, + "learning_rate": 4.387515298039598e-05, + "loss": 4.067, + "step": 1364500 + }, + { + "epoch": 2.56, + "learning_rate": 4.3781299976724457e-05, + "loss": 4.0907, + "step": 1365000 + }, + { + "epoch": 2.56, + "learning_rate": 4.368744697305292e-05, + "loss": 4.0809, + "step": 1365500 + }, + { + "epoch": 2.56, + "learning_rate": 4.359359396938139e-05, + "loss": 4.0687, + "step": 1366000 + }, + { + "epoch": 2.57, + "learning_rate": 4.349974096570986e-05, + "loss": 4.078, + "step": 1366500 + }, + { + "epoch": 2.57, + "learning_rate": 4.340588796203833e-05, + "loss": 4.0769, + "step": 1367000 + }, + { + "epoch": 2.57, + "learning_rate": 4.33120349583668e-05, + "loss": 4.0747, + "step": 1367500 + }, + { + "epoch": 2.57, + "learning_rate": 4.3218181954695274e-05, + "loss": 4.0935, + "step": 1368000 + }, + { + "epoch": 2.57, + "learning_rate": 4.312432895102375e-05, + "loss": 4.0849, + "step": 1368500 + }, + { + "epoch": 2.57, + "learning_rate": 4.3030475947352215e-05, + "loss": 4.0822, + "step": 1369000 + }, + { + "epoch": 2.57, + "learning_rate": 4.2936622943680686e-05, + "loss": 4.0804, + "step": 1369500 + }, + { + "epoch": 2.57, + "learning_rate": 4.2842769940009156e-05, + "loss": 4.0616, + "step": 1370000 + }, + { + "epoch": 2.57, + "learning_rate": 4.274891693633763e-05, + "loss": 4.0836, + "step": 1370500 + }, + { + "epoch": 2.57, + "learning_rate": 4.26550639326661e-05, + "loss": 4.0786, + "step": 1371000 + }, + { + "epoch": 2.57, + "learning_rate": 4.256121092899457e-05, + "loss": 4.0962, + "step": 1371500 + }, + { + "epoch": 2.58, + "learning_rate": 4.2467357925323046e-05, + "loss": 4.0542, + "step": 1372000 + }, + { + "epoch": 2.58, + "learning_rate": 4.237350492165151e-05, + "loss": 4.066, + "step": 1372500 + }, + { + "epoch": 2.58, + "learning_rate": 4.227965191797998e-05, + "loss": 4.0942, + "step": 1373000 + }, + { + "epoch": 2.58, + "learning_rate": 4.2185798914308444e-05, + "loss": 4.1004, + "step": 1373500 + }, + { + "epoch": 2.58, + "learning_rate": 4.209194591063692e-05, + "loss": 4.0927, + "step": 1374000 + }, + { + "epoch": 2.58, + "learning_rate": 4.199809290696539e-05, + "loss": 4.0715, + "step": 1374500 + }, + { + "epoch": 2.58, + "learning_rate": 4.190423990329386e-05, + "loss": 4.0849, + "step": 1375000 + }, + { + "epoch": 2.58, + "learning_rate": 4.1810386899622327e-05, + "loss": 4.0863, + "step": 1375500 + }, + { + "epoch": 2.58, + "learning_rate": 4.1716533895950804e-05, + "loss": 4.0774, + "step": 1376000 + }, + { + "epoch": 2.58, + "learning_rate": 4.1622680892279275e-05, + "loss": 4.0682, + "step": 1376500 + }, + { + "epoch": 2.58, + "learning_rate": 4.152882788860774e-05, + "loss": 4.0664, + "step": 1377000 + }, + { + "epoch": 2.59, + "learning_rate": 4.1434974884936216e-05, + "loss": 4.0759, + "step": 1377500 + }, + { + "epoch": 2.59, + "learning_rate": 4.1341121881264686e-05, + "loss": 4.0804, + "step": 1378000 + }, + { + "epoch": 2.59, + "learning_rate": 4.124726887759315e-05, + "loss": 4.0909, + "step": 1378500 + }, + { + "epoch": 2.59, + "learning_rate": 4.115341587392162e-05, + "loss": 4.0613, + "step": 1379000 + }, + { + "epoch": 2.59, + "learning_rate": 4.10595628702501e-05, + "loss": 4.0658, + "step": 1379500 + }, + { + "epoch": 2.59, + "learning_rate": 4.096570986657857e-05, + "loss": 4.0913, + "step": 1380000 + }, + { + "epoch": 2.59, + "learning_rate": 4.087185686290703e-05, + "loss": 4.0913, + "step": 1380500 + }, + { + "epoch": 2.59, + "learning_rate": 4.077800385923551e-05, + "loss": 4.0813, + "step": 1381000 + }, + { + "epoch": 2.59, + "learning_rate": 4.068415085556398e-05, + "loss": 4.0881, + "step": 1381500 + }, + { + "epoch": 2.59, + "learning_rate": 4.0590297851892445e-05, + "loss": 4.0741, + "step": 1382000 + }, + { + "epoch": 2.6, + "learning_rate": 4.0496444848220915e-05, + "loss": 4.0748, + "step": 1382500 + }, + { + "epoch": 2.6, + "learning_rate": 4.040259184454939e-05, + "loss": 4.0922, + "step": 1383000 + }, + { + "epoch": 2.6, + "learning_rate": 4.0308738840877864e-05, + "loss": 4.068, + "step": 1383500 + }, + { + "epoch": 2.6, + "learning_rate": 4.021488583720633e-05, + "loss": 4.1101, + "step": 1384000 + }, + { + "epoch": 2.6, + "learning_rate": 4.01210328335348e-05, + "loss": 4.0935, + "step": 1384500 + }, + { + "epoch": 2.6, + "learning_rate": 4.0027179829863275e-05, + "loss": 4.0758, + "step": 1385000 + }, + { + "epoch": 2.6, + "learning_rate": 3.993332682619174e-05, + "loss": 4.0839, + "step": 1385500 + }, + { + "epoch": 2.6, + "learning_rate": 3.983947382252021e-05, + "loss": 4.1031, + "step": 1386000 + }, + { + "epoch": 2.6, + "learning_rate": 3.974562081884869e-05, + "loss": 4.0771, + "step": 1386500 + }, + { + "epoch": 2.6, + "learning_rate": 3.965176781517715e-05, + "loss": 4.0786, + "step": 1387000 + }, + { + "epoch": 2.6, + "learning_rate": 3.955791481150562e-05, + "loss": 4.0738, + "step": 1387500 + }, + { + "epoch": 2.61, + "learning_rate": 3.946406180783409e-05, + "loss": 4.072, + "step": 1388000 + }, + { + "epoch": 2.61, + "learning_rate": 3.937020880416257e-05, + "loss": 4.091, + "step": 1388500 + }, + { + "epoch": 2.61, + "learning_rate": 3.9276355800491034e-05, + "loss": 4.088, + "step": 1389000 + }, + { + "epoch": 2.61, + "learning_rate": 3.9182502796819504e-05, + "loss": 4.0842, + "step": 1389500 + }, + { + "epoch": 2.61, + "learning_rate": 3.908864979314798e-05, + "loss": 4.0651, + "step": 1390000 + }, + { + "epoch": 2.61, + "learning_rate": 3.8994796789476446e-05, + "loss": 4.0968, + "step": 1390500 + }, + { + "epoch": 2.61, + "learning_rate": 3.8900943785804916e-05, + "loss": 4.06, + "step": 1391000 + }, + { + "epoch": 2.61, + "learning_rate": 3.880709078213339e-05, + "loss": 4.0594, + "step": 1391500 + }, + { + "epoch": 2.61, + "learning_rate": 3.8713237778461864e-05, + "loss": 4.0864, + "step": 1392000 + }, + { + "epoch": 2.61, + "learning_rate": 3.861938477479033e-05, + "loss": 4.083, + "step": 1392500 + }, + { + "epoch": 2.61, + "learning_rate": 3.85255317711188e-05, + "loss": 4.0956, + "step": 1393000 + }, + { + "epoch": 2.62, + "learning_rate": 3.843167876744726e-05, + "loss": 4.0824, + "step": 1393500 + }, + { + "epoch": 2.62, + "learning_rate": 3.833782576377574e-05, + "loss": 4.0737, + "step": 1394000 + }, + { + "epoch": 2.62, + "learning_rate": 3.824397276010421e-05, + "loss": 4.0801, + "step": 1394500 + }, + { + "epoch": 2.62, + "learning_rate": 3.815011975643268e-05, + "loss": 4.0845, + "step": 1395000 + }, + { + "epoch": 2.62, + "learning_rate": 3.805626675276115e-05, + "loss": 4.0736, + "step": 1395500 + }, + { + "epoch": 2.62, + "learning_rate": 3.796241374908962e-05, + "loss": 4.0924, + "step": 1396000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7868560745418093e-05, + "loss": 4.0812, + "step": 1396500 + }, + { + "epoch": 2.62, + "learning_rate": 3.777470774174656e-05, + "loss": 4.0882, + "step": 1397000 + }, + { + "epoch": 2.62, + "learning_rate": 3.7680854738075035e-05, + "loss": 4.0813, + "step": 1397500 + }, + { + "epoch": 2.62, + "learning_rate": 3.7587001734403505e-05, + "loss": 4.0796, + "step": 1398000 + }, + { + "epoch": 2.63, + "learning_rate": 3.7493148730731976e-05, + "loss": 4.0755, + "step": 1398500 + }, + { + "epoch": 2.63, + "learning_rate": 3.739929572706045e-05, + "loss": 4.0857, + "step": 1399000 + }, + { + "epoch": 2.63, + "learning_rate": 3.730544272338892e-05, + "loss": 4.0851, + "step": 1399500 + }, + { + "epoch": 2.63, + "learning_rate": 3.721158971971739e-05, + "loss": 4.0766, + "step": 1400000 + }, + { + "epoch": 2.63, + "learning_rate": 3.711773671604586e-05, + "loss": 4.0766, + "step": 1400500 + }, + { + "epoch": 2.63, + "learning_rate": 3.702388371237433e-05, + "loss": 4.0816, + "step": 1401000 + }, + { + "epoch": 2.63, + "learning_rate": 3.69300307087028e-05, + "loss": 4.0744, + "step": 1401500 + }, + { + "epoch": 2.63, + "learning_rate": 3.6836177705031264e-05, + "loss": 4.0843, + "step": 1402000 + }, + { + "epoch": 2.63, + "learning_rate": 3.674232470135974e-05, + "loss": 4.0984, + "step": 1402500 + }, + { + "epoch": 2.63, + "learning_rate": 3.664847169768821e-05, + "loss": 4.0624, + "step": 1403000 + }, + { + "epoch": 2.63, + "learning_rate": 3.655461869401668e-05, + "loss": 4.0806, + "step": 1403500 + }, + { + "epoch": 2.64, + "learning_rate": 3.646076569034515e-05, + "loss": 4.0838, + "step": 1404000 + }, + { + "epoch": 2.64, + "learning_rate": 3.636691268667362e-05, + "loss": 4.0784, + "step": 1404500 + }, + { + "epoch": 2.64, + "learning_rate": 3.6273059683002094e-05, + "loss": 4.0847, + "step": 1405000 + }, + { + "epoch": 2.64, + "learning_rate": 3.617920667933056e-05, + "loss": 4.0783, + "step": 1405500 + }, + { + "epoch": 2.64, + "learning_rate": 3.6085353675659036e-05, + "loss": 4.0807, + "step": 1406000 + }, + { + "epoch": 2.64, + "learning_rate": 3.59915006719875e-05, + "loss": 4.0524, + "step": 1406500 + }, + { + "epoch": 2.64, + "learning_rate": 3.589764766831598e-05, + "loss": 4.0808, + "step": 1407000 + }, + { + "epoch": 2.64, + "learning_rate": 3.580379466464445e-05, + "loss": 4.0714, + "step": 1407500 + }, + { + "epoch": 2.64, + "learning_rate": 3.570994166097291e-05, + "loss": 4.0916, + "step": 1408000 + }, + { + "epoch": 2.64, + "learning_rate": 3.561608865730139e-05, + "loss": 4.0711, + "step": 1408500 + }, + { + "epoch": 2.64, + "learning_rate": 3.552223565362985e-05, + "loss": 4.0768, + "step": 1409000 + }, + { + "epoch": 2.65, + "learning_rate": 3.542838264995833e-05, + "loss": 4.0762, + "step": 1409500 + }, + { + "epoch": 2.65, + "learning_rate": 3.5334529646286794e-05, + "loss": 4.1018, + "step": 1410000 + }, + { + "epoch": 2.65, + "learning_rate": 3.5240676642615265e-05, + "loss": 4.0837, + "step": 1410500 + }, + { + "epoch": 2.65, + "learning_rate": 3.5146823638943735e-05, + "loss": 4.0754, + "step": 1411000 + }, + { + "epoch": 2.65, + "learning_rate": 3.5052970635272206e-05, + "loss": 4.0883, + "step": 1411500 + }, + { + "epoch": 2.65, + "learning_rate": 3.495911763160068e-05, + "loss": 4.073, + "step": 1412000 + }, + { + "epoch": 2.65, + "learning_rate": 3.486526462792915e-05, + "loss": 4.0878, + "step": 1412500 + }, + { + "epoch": 2.65, + "learning_rate": 3.477141162425762e-05, + "loss": 4.0713, + "step": 1413000 + }, + { + "epoch": 2.65, + "learning_rate": 3.467755862058609e-05, + "loss": 4.0709, + "step": 1413500 + }, + { + "epoch": 2.65, + "learning_rate": 3.458370561691456e-05, + "loss": 4.083, + "step": 1414000 + }, + { + "epoch": 2.66, + "learning_rate": 3.448985261324303e-05, + "loss": 4.0697, + "step": 1414500 + }, + { + "epoch": 2.66, + "learning_rate": 3.43959996095715e-05, + "loss": 4.0878, + "step": 1415000 + }, + { + "epoch": 2.66, + "learning_rate": 3.430214660589997e-05, + "loss": 4.075, + "step": 1415500 + }, + { + "epoch": 2.66, + "learning_rate": 3.420829360222844e-05, + "loss": 4.085, + "step": 1416000 + }, + { + "epoch": 2.66, + "learning_rate": 3.411444059855691e-05, + "loss": 4.0705, + "step": 1416500 + }, + { + "epoch": 2.66, + "learning_rate": 3.402058759488538e-05, + "loss": 4.0619, + "step": 1417000 + }, + { + "epoch": 2.66, + "learning_rate": 3.3926734591213854e-05, + "loss": 4.0675, + "step": 1417500 + }, + { + "epoch": 2.66, + "learning_rate": 3.3832881587542324e-05, + "loss": 4.0697, + "step": 1418000 + }, + { + "epoch": 2.66, + "learning_rate": 3.3739028583870795e-05, + "loss": 4.0825, + "step": 1418500 + }, + { + "epoch": 2.66, + "learning_rate": 3.3645175580199265e-05, + "loss": 4.0742, + "step": 1419000 + }, + { + "epoch": 2.66, + "learning_rate": 3.3551322576527736e-05, + "loss": 4.082, + "step": 1419500 + }, + { + "epoch": 2.67, + "learning_rate": 3.345746957285621e-05, + "loss": 4.0752, + "step": 1420000 + }, + { + "epoch": 2.67, + "learning_rate": 3.336361656918468e-05, + "loss": 4.0694, + "step": 1420500 + }, + { + "epoch": 2.67, + "learning_rate": 3.326976356551315e-05, + "loss": 4.0844, + "step": 1421000 + }, + { + "epoch": 2.67, + "learning_rate": 3.317591056184162e-05, + "loss": 4.0892, + "step": 1421500 + }, + { + "epoch": 2.67, + "learning_rate": 3.308205755817009e-05, + "loss": 4.1071, + "step": 1422000 + }, + { + "epoch": 2.67, + "learning_rate": 3.298820455449856e-05, + "loss": 4.0752, + "step": 1422500 + }, + { + "epoch": 2.67, + "learning_rate": 3.289435155082703e-05, + "loss": 4.0692, + "step": 1423000 + }, + { + "epoch": 2.67, + "learning_rate": 3.28004985471555e-05, + "loss": 4.0879, + "step": 1423500 + }, + { + "epoch": 2.67, + "learning_rate": 3.270664554348397e-05, + "loss": 4.0688, + "step": 1424000 + }, + { + "epoch": 2.67, + "learning_rate": 3.261279253981244e-05, + "loss": 4.0851, + "step": 1424500 + }, + { + "epoch": 2.67, + "learning_rate": 3.251893953614091e-05, + "loss": 4.0762, + "step": 1425000 + }, + { + "epoch": 2.68, + "learning_rate": 3.2425086532469384e-05, + "loss": 4.0752, + "step": 1425500 + }, + { + "epoch": 2.68, + "learning_rate": 3.2331233528797854e-05, + "loss": 4.0928, + "step": 1426000 + }, + { + "epoch": 2.68, + "learning_rate": 3.2237380525126325e-05, + "loss": 4.0672, + "step": 1426500 + }, + { + "epoch": 2.68, + "learning_rate": 3.2143527521454796e-05, + "loss": 4.0802, + "step": 1427000 + }, + { + "epoch": 2.68, + "learning_rate": 3.2049674517783266e-05, + "loss": 4.0856, + "step": 1427500 + }, + { + "epoch": 2.68, + "learning_rate": 3.195582151411173e-05, + "loss": 4.0751, + "step": 1428000 + }, + { + "epoch": 2.68, + "learning_rate": 3.186196851044021e-05, + "loss": 4.0848, + "step": 1428500 + }, + { + "epoch": 2.68, + "learning_rate": 3.176811550676867e-05, + "loss": 4.0826, + "step": 1429000 + }, + { + "epoch": 2.68, + "learning_rate": 3.167426250309715e-05, + "loss": 4.0575, + "step": 1429500 + }, + { + "epoch": 2.68, + "learning_rate": 3.158040949942562e-05, + "loss": 4.0846, + "step": 1430000 + }, + { + "epoch": 2.69, + "learning_rate": 3.1486556495754083e-05, + "loss": 4.0905, + "step": 1430500 + }, + { + "epoch": 2.69, + "learning_rate": 3.139270349208256e-05, + "loss": 4.0635, + "step": 1431000 + }, + { + "epoch": 2.69, + "learning_rate": 3.1298850488411025e-05, + "loss": 4.0681, + "step": 1431500 + }, + { + "epoch": 2.69, + "learning_rate": 3.12049974847395e-05, + "loss": 4.0833, + "step": 1432000 + }, + { + "epoch": 2.69, + "learning_rate": 3.1111144481067966e-05, + "loss": 4.0834, + "step": 1432500 + }, + { + "epoch": 2.69, + "learning_rate": 3.1017291477396443e-05, + "loss": 4.0623, + "step": 1433000 + }, + { + "epoch": 2.69, + "learning_rate": 3.0923438473724914e-05, + "loss": 4.0842, + "step": 1433500 + }, + { + "epoch": 2.69, + "learning_rate": 3.082958547005338e-05, + "loss": 4.0805, + "step": 1434000 + }, + { + "epoch": 2.69, + "learning_rate": 3.0735732466381855e-05, + "loss": 4.0886, + "step": 1434500 + }, + { + "epoch": 2.69, + "learning_rate": 3.064187946271032e-05, + "loss": 4.0627, + "step": 1435000 + }, + { + "epoch": 2.69, + "learning_rate": 3.0548026459038797e-05, + "loss": 4.0817, + "step": 1435500 + }, + { + "epoch": 2.7, + "learning_rate": 3.045417345536726e-05, + "loss": 4.1082, + "step": 1436000 + }, + { + "epoch": 2.7, + "learning_rate": 3.0360320451695734e-05, + "loss": 4.0759, + "step": 1436500 + }, + { + "epoch": 2.7, + "learning_rate": 3.0266467448024202e-05, + "loss": 4.0907, + "step": 1437000 + }, + { + "epoch": 2.7, + "learning_rate": 3.0172614444352676e-05, + "loss": 4.0807, + "step": 1437500 + }, + { + "epoch": 2.7, + "learning_rate": 3.0078761440681146e-05, + "loss": 4.0782, + "step": 1438000 + }, + { + "epoch": 2.7, + "learning_rate": 2.9984908437009614e-05, + "loss": 4.0759, + "step": 1438500 + }, + { + "epoch": 2.7, + "learning_rate": 2.9891055433338088e-05, + "loss": 4.0529, + "step": 1439000 + }, + { + "epoch": 2.7, + "learning_rate": 2.9797202429666555e-05, + "loss": 4.0761, + "step": 1439500 + }, + { + "epoch": 2.7, + "learning_rate": 2.970334942599503e-05, + "loss": 4.0891, + "step": 1440000 + }, + { + "epoch": 2.7, + "learning_rate": 2.9609496422323496e-05, + "loss": 4.0623, + "step": 1440500 + }, + { + "epoch": 2.7, + "learning_rate": 2.951564341865197e-05, + "loss": 4.0779, + "step": 1441000 + }, + { + "epoch": 2.71, + "learning_rate": 2.9421790414980438e-05, + "loss": 4.0642, + "step": 1441500 + }, + { + "epoch": 2.71, + "learning_rate": 2.9327937411308908e-05, + "loss": 4.0584, + "step": 1442000 + }, + { + "epoch": 2.71, + "learning_rate": 2.9234084407637382e-05, + "loss": 4.0769, + "step": 1442500 + }, + { + "epoch": 2.71, + "learning_rate": 2.914023140396585e-05, + "loss": 4.0739, + "step": 1443000 + }, + { + "epoch": 2.71, + "learning_rate": 2.9046378400294323e-05, + "loss": 4.0837, + "step": 1443500 + }, + { + "epoch": 2.71, + "learning_rate": 2.895252539662279e-05, + "loss": 4.0882, + "step": 1444000 + }, + { + "epoch": 2.71, + "learning_rate": 2.885867239295126e-05, + "loss": 4.0639, + "step": 1444500 + }, + { + "epoch": 2.71, + "learning_rate": 2.8764819389279732e-05, + "loss": 4.0715, + "step": 1445000 + }, + { + "epoch": 2.71, + "learning_rate": 2.8670966385608203e-05, + "loss": 4.071, + "step": 1445500 + }, + { + "epoch": 2.71, + "learning_rate": 2.857711338193667e-05, + "loss": 4.0727, + "step": 1446000 + }, + { + "epoch": 2.72, + "learning_rate": 2.8483260378265144e-05, + "loss": 4.0621, + "step": 1446500 + }, + { + "epoch": 2.72, + "learning_rate": 2.8389407374593615e-05, + "loss": 4.0671, + "step": 1447000 + }, + { + "epoch": 2.72, + "learning_rate": 2.8295554370922085e-05, + "loss": 4.0786, + "step": 1447500 + }, + { + "epoch": 2.72, + "learning_rate": 2.8201701367250556e-05, + "loss": 4.0629, + "step": 1448000 + }, + { + "epoch": 2.72, + "learning_rate": 2.8107848363579023e-05, + "loss": 4.0769, + "step": 1448500 + }, + { + "epoch": 2.72, + "learning_rate": 2.8013995359907497e-05, + "loss": 4.084, + "step": 1449000 + }, + { + "epoch": 2.72, + "learning_rate": 2.7920142356235964e-05, + "loss": 4.0597, + "step": 1449500 + }, + { + "epoch": 2.72, + "learning_rate": 2.782628935256444e-05, + "loss": 4.0746, + "step": 1450000 + }, + { + "epoch": 2.72, + "learning_rate": 2.7732436348892906e-05, + "loss": 4.0876, + "step": 1450500 + }, + { + "epoch": 2.72, + "learning_rate": 2.763858334522138e-05, + "loss": 4.0802, + "step": 1451000 + }, + { + "epoch": 2.72, + "learning_rate": 2.754473034154985e-05, + "loss": 4.0643, + "step": 1451500 + }, + { + "epoch": 2.73, + "learning_rate": 2.7450877337878318e-05, + "loss": 4.0735, + "step": 1452000 + }, + { + "epoch": 2.73, + "learning_rate": 2.735702433420679e-05, + "loss": 4.0577, + "step": 1452500 + }, + { + "epoch": 2.73, + "learning_rate": 2.726317133053526e-05, + "loss": 4.0716, + "step": 1453000 + }, + { + "epoch": 2.73, + "learning_rate": 2.7169318326863733e-05, + "loss": 4.0616, + "step": 1453500 + }, + { + "epoch": 2.73, + "learning_rate": 2.70754653231922e-05, + "loss": 4.0756, + "step": 1454000 + }, + { + "epoch": 2.73, + "learning_rate": 2.698161231952067e-05, + "loss": 4.0606, + "step": 1454500 + }, + { + "epoch": 2.73, + "learning_rate": 2.688775931584914e-05, + "loss": 4.0849, + "step": 1455000 + }, + { + "epoch": 2.73, + "learning_rate": 2.6793906312177612e-05, + "loss": 4.0531, + "step": 1455500 + }, + { + "epoch": 2.73, + "learning_rate": 2.6700053308506086e-05, + "loss": 4.0721, + "step": 1456000 + }, + { + "epoch": 2.73, + "learning_rate": 2.6606200304834553e-05, + "loss": 4.0736, + "step": 1456500 + }, + { + "epoch": 2.73, + "learning_rate": 2.6512347301163024e-05, + "loss": 4.074, + "step": 1457000 + }, + { + "epoch": 2.74, + "learning_rate": 2.6418494297491495e-05, + "loss": 4.1067, + "step": 1457500 + }, + { + "epoch": 2.74, + "learning_rate": 2.6324641293819965e-05, + "loss": 4.0933, + "step": 1458000 + }, + { + "epoch": 2.74, + "learning_rate": 2.6230788290148436e-05, + "loss": 4.0737, + "step": 1458500 + }, + { + "epoch": 2.74, + "learning_rate": 2.6136935286476907e-05, + "loss": 4.0638, + "step": 1459000 + }, + { + "epoch": 2.74, + "learning_rate": 2.6043082282805374e-05, + "loss": 4.0896, + "step": 1459500 + }, + { + "epoch": 2.74, + "learning_rate": 2.5949229279133848e-05, + "loss": 4.0674, + "step": 1460000 + }, + { + "epoch": 2.74, + "learning_rate": 2.585537627546232e-05, + "loss": 4.0543, + "step": 1460500 + }, + { + "epoch": 2.74, + "learning_rate": 2.576152327179079e-05, + "loss": 4.0558, + "step": 1461000 + }, + { + "epoch": 2.74, + "learning_rate": 2.566767026811926e-05, + "loss": 4.0691, + "step": 1461500 + }, + { + "epoch": 2.74, + "learning_rate": 2.5573817264447727e-05, + "loss": 4.0653, + "step": 1462000 + }, + { + "epoch": 2.75, + "learning_rate": 2.54799642607762e-05, + "loss": 4.0803, + "step": 1462500 + }, + { + "epoch": 2.75, + "learning_rate": 2.5386111257104668e-05, + "loss": 4.0641, + "step": 1463000 + }, + { + "epoch": 2.75, + "learning_rate": 2.5292258253433142e-05, + "loss": 4.0812, + "step": 1463500 + }, + { + "epoch": 2.75, + "learning_rate": 2.519840524976161e-05, + "loss": 4.0733, + "step": 1464000 + }, + { + "epoch": 2.75, + "learning_rate": 2.510455224609008e-05, + "loss": 4.0701, + "step": 1464500 + }, + { + "epoch": 2.75, + "learning_rate": 2.5010699242418554e-05, + "loss": 4.082, + "step": 1465000 + }, + { + "epoch": 2.75, + "learning_rate": 2.491684623874702e-05, + "loss": 4.0887, + "step": 1465500 + }, + { + "epoch": 2.75, + "learning_rate": 2.4822993235075496e-05, + "loss": 4.0729, + "step": 1466000 + }, + { + "epoch": 2.75, + "learning_rate": 2.4729140231403963e-05, + "loss": 4.0826, + "step": 1466500 + }, + { + "epoch": 2.75, + "learning_rate": 2.4635287227732433e-05, + "loss": 4.073, + "step": 1467000 + }, + { + "epoch": 2.75, + "learning_rate": 2.4541434224060904e-05, + "loss": 4.0581, + "step": 1467500 + }, + { + "epoch": 2.76, + "learning_rate": 2.4447581220389375e-05, + "loss": 4.0781, + "step": 1468000 + }, + { + "epoch": 2.76, + "learning_rate": 2.4353728216717845e-05, + "loss": 4.0805, + "step": 1468500 + }, + { + "epoch": 2.76, + "learning_rate": 2.4259875213046316e-05, + "loss": 4.057, + "step": 1469000 + }, + { + "epoch": 2.76, + "learning_rate": 2.416602220937479e-05, + "loss": 4.0634, + "step": 1469500 + }, + { + "epoch": 2.76, + "learning_rate": 2.4072169205703257e-05, + "loss": 4.0545, + "step": 1470000 + }, + { + "epoch": 2.76, + "learning_rate": 2.3978316202031728e-05, + "loss": 4.083, + "step": 1470500 + }, + { + "epoch": 2.76, + "learning_rate": 2.38844631983602e-05, + "loss": 4.0732, + "step": 1471000 + }, + { + "epoch": 2.76, + "learning_rate": 2.379061019468867e-05, + "loss": 4.0605, + "step": 1471500 + }, + { + "epoch": 2.76, + "learning_rate": 2.3696757191017136e-05, + "loss": 4.0614, + "step": 1472000 + }, + { + "epoch": 2.76, + "learning_rate": 2.360290418734561e-05, + "loss": 4.0822, + "step": 1472500 + }, + { + "epoch": 2.76, + "learning_rate": 2.3509051183674078e-05, + "loss": 4.0573, + "step": 1473000 + }, + { + "epoch": 2.77, + "learning_rate": 2.3415198180002552e-05, + "loss": 4.0701, + "step": 1473500 + }, + { + "epoch": 2.77, + "learning_rate": 2.3321345176331022e-05, + "loss": 4.0729, + "step": 1474000 + }, + { + "epoch": 2.77, + "learning_rate": 2.322749217265949e-05, + "loss": 4.06, + "step": 1474500 + }, + { + "epoch": 2.77, + "learning_rate": 2.3133639168987964e-05, + "loss": 4.0639, + "step": 1475000 + }, + { + "epoch": 2.77, + "learning_rate": 2.303978616531643e-05, + "loss": 4.0811, + "step": 1475500 + }, + { + "epoch": 2.77, + "learning_rate": 2.2945933161644905e-05, + "loss": 4.0953, + "step": 1476000 + }, + { + "epoch": 2.77, + "learning_rate": 2.2852080157973372e-05, + "loss": 4.0624, + "step": 1476500 + }, + { + "epoch": 2.77, + "learning_rate": 2.2758227154301846e-05, + "loss": 4.0748, + "step": 1477000 + }, + { + "epoch": 2.77, + "learning_rate": 2.2664374150630313e-05, + "loss": 4.0562, + "step": 1477500 + }, + { + "epoch": 2.77, + "learning_rate": 2.2570521146958784e-05, + "loss": 4.0767, + "step": 1478000 + }, + { + "epoch": 2.78, + "learning_rate": 2.2476668143287258e-05, + "loss": 4.0814, + "step": 1478500 + }, + { + "epoch": 2.78, + "learning_rate": 2.2382815139615725e-05, + "loss": 4.0775, + "step": 1479000 + }, + { + "epoch": 2.78, + "learning_rate": 2.22889621359442e-05, + "loss": 4.0572, + "step": 1479500 + }, + { + "epoch": 2.78, + "learning_rate": 2.2195109132272667e-05, + "loss": 4.0755, + "step": 1480000 + }, + { + "epoch": 2.78, + "learning_rate": 2.2101256128601137e-05, + "loss": 4.0798, + "step": 1480500 + }, + { + "epoch": 2.78, + "learning_rate": 2.2007403124929608e-05, + "loss": 4.0777, + "step": 1481000 + }, + { + "epoch": 2.78, + "learning_rate": 2.191355012125808e-05, + "loss": 4.0596, + "step": 1481500 + }, + { + "epoch": 2.78, + "learning_rate": 2.1819697117586546e-05, + "loss": 4.0721, + "step": 1482000 + }, + { + "epoch": 2.78, + "learning_rate": 2.172584411391502e-05, + "loss": 4.0976, + "step": 1482500 + }, + { + "epoch": 2.78, + "learning_rate": 2.163199111024349e-05, + "loss": 4.0837, + "step": 1483000 + }, + { + "epoch": 2.78, + "learning_rate": 2.153813810657196e-05, + "loss": 4.0534, + "step": 1483500 + }, + { + "epoch": 2.79, + "learning_rate": 2.1444285102900432e-05, + "loss": 4.0872, + "step": 1484000 + }, + { + "epoch": 2.79, + "learning_rate": 2.1350432099228902e-05, + "loss": 4.0767, + "step": 1484500 + }, + { + "epoch": 2.79, + "learning_rate": 2.1256579095557373e-05, + "loss": 4.0758, + "step": 1485000 + }, + { + "epoch": 2.79, + "learning_rate": 2.116272609188584e-05, + "loss": 4.0661, + "step": 1485500 + }, + { + "epoch": 2.79, + "learning_rate": 2.1068873088214314e-05, + "loss": 4.0621, + "step": 1486000 + }, + { + "epoch": 2.79, + "learning_rate": 2.097502008454278e-05, + "loss": 4.0947, + "step": 1486500 + }, + { + "epoch": 2.79, + "learning_rate": 2.0881167080871256e-05, + "loss": 4.0672, + "step": 1487000 + }, + { + "epoch": 2.79, + "learning_rate": 2.0787314077199726e-05, + "loss": 4.086, + "step": 1487500 + }, + { + "epoch": 2.79, + "learning_rate": 2.0693461073528194e-05, + "loss": 4.0562, + "step": 1488000 + }, + { + "epoch": 2.79, + "learning_rate": 2.0599608069856668e-05, + "loss": 4.0736, + "step": 1488500 + }, + { + "epoch": 2.79, + "learning_rate": 2.0505755066185135e-05, + "loss": 4.0753, + "step": 1489000 + }, + { + "epoch": 2.8, + "learning_rate": 2.041190206251361e-05, + "loss": 4.0578, + "step": 1489500 + }, + { + "epoch": 2.8, + "learning_rate": 2.0318049058842076e-05, + "loss": 4.0664, + "step": 1490000 + }, + { + "epoch": 2.8, + "learning_rate": 2.0224196055170547e-05, + "loss": 4.0841, + "step": 1490500 + }, + { + "epoch": 2.8, + "learning_rate": 2.013034305149902e-05, + "loss": 4.0693, + "step": 1491000 + }, + { + "epoch": 2.8, + "learning_rate": 2.0036490047827488e-05, + "loss": 4.0514, + "step": 1491500 + }, + { + "epoch": 2.8, + "learning_rate": 1.9942637044155962e-05, + "loss": 4.0778, + "step": 1492000 + }, + { + "epoch": 2.8, + "learning_rate": 1.984878404048443e-05, + "loss": 4.0732, + "step": 1492500 + }, + { + "epoch": 2.8, + "learning_rate": 1.97549310368129e-05, + "loss": 4.0693, + "step": 1493000 + }, + { + "epoch": 2.8, + "learning_rate": 1.966107803314137e-05, + "loss": 4.0502, + "step": 1493500 + }, + { + "epoch": 2.8, + "learning_rate": 1.956722502946984e-05, + "loss": 4.0793, + "step": 1494000 + }, + { + "epoch": 2.81, + "learning_rate": 1.9473372025798312e-05, + "loss": 4.0909, + "step": 1494500 + }, + { + "epoch": 2.81, + "learning_rate": 1.9379519022126783e-05, + "loss": 4.0733, + "step": 1495000 + }, + { + "epoch": 2.81, + "learning_rate": 1.9285666018455257e-05, + "loss": 4.0509, + "step": 1495500 + }, + { + "epoch": 2.81, + "learning_rate": 1.9191813014783724e-05, + "loss": 4.0561, + "step": 1496000 + }, + { + "epoch": 2.81, + "learning_rate": 1.9097960011112194e-05, + "loss": 4.0527, + "step": 1496500 + }, + { + "epoch": 2.81, + "learning_rate": 1.9004107007440665e-05, + "loss": 4.0767, + "step": 1497000 + }, + { + "epoch": 2.81, + "learning_rate": 1.8910254003769136e-05, + "loss": 4.0684, + "step": 1497500 + }, + { + "epoch": 2.81, + "learning_rate": 1.8816401000097603e-05, + "loss": 4.0715, + "step": 1498000 + }, + { + "epoch": 2.81, + "learning_rate": 1.8722547996426077e-05, + "loss": 4.0734, + "step": 1498500 + }, + { + "epoch": 2.81, + "learning_rate": 1.8628694992754548e-05, + "loss": 4.0651, + "step": 1499000 + }, + { + "epoch": 2.81, + "learning_rate": 1.8534841989083018e-05, + "loss": 4.0526, + "step": 1499500 + }, + { + "epoch": 2.82, + "learning_rate": 1.844098898541149e-05, + "loss": 4.0517, + "step": 1500000 + }, + { + "epoch": 2.82, + "learning_rate": 1.8347135981739956e-05, + "loss": 4.0661, + "step": 1500500 + }, + { + "epoch": 2.82, + "learning_rate": 1.8253282978068427e-05, + "loss": 4.0654, + "step": 1501000 + }, + { + "epoch": 2.82, + "learning_rate": 1.8159429974396897e-05, + "loss": 4.0687, + "step": 1501500 + }, + { + "epoch": 2.82, + "learning_rate": 1.8065576970725368e-05, + "loss": 4.1042, + "step": 1502000 + }, + { + "epoch": 2.82, + "learning_rate": 1.7971723967053842e-05, + "loss": 4.0794, + "step": 1502500 + }, + { + "epoch": 2.82, + "learning_rate": 1.7877870963382313e-05, + "loss": 4.0609, + "step": 1503000 + }, + { + "epoch": 2.82, + "learning_rate": 1.778401795971078e-05, + "loss": 4.06, + "step": 1503500 + }, + { + "epoch": 2.82, + "learning_rate": 1.769016495603925e-05, + "loss": 4.0815, + "step": 1504000 + }, + { + "epoch": 2.82, + "learning_rate": 1.759631195236772e-05, + "loss": 4.0839, + "step": 1504500 + }, + { + "epoch": 2.82, + "learning_rate": 1.7502458948696192e-05, + "loss": 4.0673, + "step": 1505000 + }, + { + "epoch": 2.83, + "learning_rate": 1.7408605945024663e-05, + "loss": 4.0789, + "step": 1505500 + }, + { + "epoch": 2.83, + "learning_rate": 1.7314752941353133e-05, + "loss": 4.0882, + "step": 1506000 + }, + { + "epoch": 2.83, + "learning_rate": 1.7220899937681604e-05, + "loss": 4.0565, + "step": 1506500 + }, + { + "epoch": 2.83, + "learning_rate": 1.7127046934010074e-05, + "loss": 4.071, + "step": 1507000 + }, + { + "epoch": 2.83, + "learning_rate": 1.7033193930338545e-05, + "loss": 4.0723, + "step": 1507500 + }, + { + "epoch": 2.83, + "learning_rate": 1.6939340926667016e-05, + "loss": 4.0561, + "step": 1508000 + }, + { + "epoch": 2.83, + "learning_rate": 1.6845487922995486e-05, + "loss": 4.0665, + "step": 1508500 + }, + { + "epoch": 2.83, + "learning_rate": 1.6751634919323957e-05, + "loss": 4.0753, + "step": 1509000 + }, + { + "epoch": 2.83, + "learning_rate": 1.6657781915652428e-05, + "loss": 4.0621, + "step": 1509500 + }, + { + "epoch": 2.83, + "learning_rate": 1.65639289119809e-05, + "loss": 4.0514, + "step": 1510000 + }, + { + "epoch": 2.84, + "learning_rate": 1.6470075908309366e-05, + "loss": 4.0546, + "step": 1510500 + }, + { + "epoch": 2.84, + "learning_rate": 1.637622290463784e-05, + "loss": 4.0733, + "step": 1511000 + }, + { + "epoch": 2.84, + "learning_rate": 1.628236990096631e-05, + "loss": 4.057, + "step": 1511500 + }, + { + "epoch": 2.84, + "learning_rate": 1.618851689729478e-05, + "loss": 4.0731, + "step": 1512000 + }, + { + "epoch": 2.84, + "learning_rate": 1.609466389362325e-05, + "loss": 4.0748, + "step": 1512500 + }, + { + "epoch": 2.84, + "learning_rate": 1.6000810889951722e-05, + "loss": 4.0901, + "step": 1513000 + }, + { + "epoch": 2.84, + "learning_rate": 1.590695788628019e-05, + "loss": 4.0753, + "step": 1513500 + }, + { + "epoch": 2.84, + "learning_rate": 1.581310488260866e-05, + "loss": 4.0635, + "step": 1514000 + }, + { + "epoch": 2.84, + "learning_rate": 1.571925187893713e-05, + "loss": 4.0726, + "step": 1514500 + }, + { + "epoch": 2.84, + "learning_rate": 1.56253988752656e-05, + "loss": 4.0904, + "step": 1515000 + }, + { + "epoch": 2.84, + "learning_rate": 1.5531545871594075e-05, + "loss": 4.0687, + "step": 1515500 + }, + { + "epoch": 2.85, + "learning_rate": 1.5437692867922546e-05, + "loss": 4.0737, + "step": 1516000 + }, + { + "epoch": 2.85, + "learning_rate": 1.5343839864251013e-05, + "loss": 4.0753, + "step": 1516500 + }, + { + "epoch": 2.85, + "learning_rate": 1.5249986860579486e-05, + "loss": 4.0761, + "step": 1517000 + }, + { + "epoch": 2.85, + "learning_rate": 1.5156133856907955e-05, + "loss": 4.0725, + "step": 1517500 + }, + { + "epoch": 2.85, + "learning_rate": 1.5062280853236425e-05, + "loss": 4.0736, + "step": 1518000 + }, + { + "epoch": 2.85, + "learning_rate": 1.4968427849564896e-05, + "loss": 4.0662, + "step": 1518500 + }, + { + "epoch": 2.85, + "learning_rate": 1.4874574845893366e-05, + "loss": 4.0811, + "step": 1519000 + }, + { + "epoch": 2.85, + "learning_rate": 1.4780721842221835e-05, + "loss": 4.0701, + "step": 1519500 + }, + { + "epoch": 2.85, + "learning_rate": 1.468686883855031e-05, + "loss": 4.0552, + "step": 1520000 + }, + { + "epoch": 2.85, + "learning_rate": 1.4593015834878778e-05, + "loss": 4.0639, + "step": 1520500 + }, + { + "epoch": 2.86, + "learning_rate": 1.4499162831207249e-05, + "loss": 4.0573, + "step": 1521000 + }, + { + "epoch": 2.86, + "learning_rate": 1.440530982753572e-05, + "loss": 4.0606, + "step": 1521500 + }, + { + "epoch": 2.86, + "learning_rate": 1.431145682386419e-05, + "loss": 4.0827, + "step": 1522000 + }, + { + "epoch": 2.86, + "learning_rate": 1.421760382019266e-05, + "loss": 4.0573, + "step": 1522500 + }, + { + "epoch": 2.86, + "learning_rate": 1.412375081652113e-05, + "loss": 4.0568, + "step": 1523000 + }, + { + "epoch": 2.86, + "learning_rate": 1.40298978128496e-05, + "loss": 4.0633, + "step": 1523500 + }, + { + "epoch": 2.86, + "learning_rate": 1.3936044809178071e-05, + "loss": 4.0646, + "step": 1524000 + }, + { + "epoch": 2.86, + "learning_rate": 1.3842191805506544e-05, + "loss": 4.0796, + "step": 1524500 + }, + { + "epoch": 2.86, + "learning_rate": 1.3748338801835014e-05, + "loss": 4.0707, + "step": 1525000 + }, + { + "epoch": 2.86, + "learning_rate": 1.3654485798163483e-05, + "loss": 4.0444, + "step": 1525500 + }, + { + "epoch": 2.86, + "learning_rate": 1.3560632794491954e-05, + "loss": 4.0624, + "step": 1526000 + }, + { + "epoch": 2.87, + "learning_rate": 1.3466779790820424e-05, + "loss": 4.065, + "step": 1526500 + }, + { + "epoch": 2.87, + "learning_rate": 1.3372926787148895e-05, + "loss": 4.0685, + "step": 1527000 + }, + { + "epoch": 2.87, + "learning_rate": 1.3279073783477364e-05, + "loss": 4.0811, + "step": 1527500 + }, + { + "epoch": 2.87, + "learning_rate": 1.3185220779805835e-05, + "loss": 4.0686, + "step": 1528000 + }, + { + "epoch": 2.87, + "learning_rate": 1.3091367776134305e-05, + "loss": 4.0632, + "step": 1528500 + }, + { + "epoch": 2.87, + "learning_rate": 1.2997514772462778e-05, + "loss": 4.0716, + "step": 1529000 + }, + { + "epoch": 2.87, + "learning_rate": 1.2903661768791248e-05, + "loss": 4.0852, + "step": 1529500 + }, + { + "epoch": 2.87, + "learning_rate": 1.2809808765119719e-05, + "loss": 4.0707, + "step": 1530000 + }, + { + "epoch": 2.87, + "learning_rate": 1.2715955761448188e-05, + "loss": 4.0465, + "step": 1530500 + }, + { + "epoch": 2.87, + "learning_rate": 1.2622102757776658e-05, + "loss": 4.0381, + "step": 1531000 + }, + { + "epoch": 2.87, + "learning_rate": 1.2528249754105129e-05, + "loss": 4.0863, + "step": 1531500 + }, + { + "epoch": 2.88, + "learning_rate": 1.24343967504336e-05, + "loss": 4.0686, + "step": 1532000 + }, + { + "epoch": 2.88, + "learning_rate": 1.2340543746762069e-05, + "loss": 4.0502, + "step": 1532500 + }, + { + "epoch": 2.88, + "learning_rate": 1.224669074309054e-05, + "loss": 4.044, + "step": 1533000 + }, + { + "epoch": 2.88, + "learning_rate": 1.2152837739419012e-05, + "loss": 4.0523, + "step": 1533500 + }, + { + "epoch": 2.88, + "learning_rate": 1.2058984735747482e-05, + "loss": 4.0548, + "step": 1534000 + }, + { + "epoch": 2.88, + "learning_rate": 1.1965131732075953e-05, + "loss": 4.063, + "step": 1534500 + }, + { + "epoch": 2.88, + "learning_rate": 1.1871278728404424e-05, + "loss": 4.0483, + "step": 1535000 + }, + { + "epoch": 2.88, + "learning_rate": 1.1777425724732893e-05, + "loss": 4.0641, + "step": 1535500 + }, + { + "epoch": 2.88, + "learning_rate": 1.1683572721061363e-05, + "loss": 4.067, + "step": 1536000 + }, + { + "epoch": 2.88, + "learning_rate": 1.1589719717389834e-05, + "loss": 4.0757, + "step": 1536500 + }, + { + "epoch": 2.89, + "learning_rate": 1.1495866713718304e-05, + "loss": 4.0793, + "step": 1537000 + }, + { + "epoch": 2.89, + "learning_rate": 1.1402013710046777e-05, + "loss": 4.0673, + "step": 1537500 + }, + { + "epoch": 2.89, + "learning_rate": 1.1308160706375247e-05, + "loss": 4.0763, + "step": 1538000 + }, + { + "epoch": 2.89, + "learning_rate": 1.1214307702703716e-05, + "loss": 4.0714, + "step": 1538500 + }, + { + "epoch": 2.89, + "learning_rate": 1.1120454699032187e-05, + "loss": 4.0724, + "step": 1539000 + }, + { + "epoch": 2.89, + "learning_rate": 1.1026601695360658e-05, + "loss": 4.0746, + "step": 1539500 + }, + { + "epoch": 2.89, + "learning_rate": 1.0932748691689128e-05, + "loss": 4.0671, + "step": 1540000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0838895688017597e-05, + "loss": 4.0652, + "step": 1540500 + }, + { + "epoch": 2.89, + "learning_rate": 1.0745042684346068e-05, + "loss": 4.0546, + "step": 1541000 + }, + { + "epoch": 2.89, + "learning_rate": 1.0651189680674539e-05, + "loss": 4.0641, + "step": 1541500 + }, + { + "epoch": 2.89, + "learning_rate": 1.0557336677003011e-05, + "loss": 4.0924, + "step": 1542000 + }, + { + "epoch": 2.9, + "learning_rate": 1.0463483673331481e-05, + "loss": 4.0365, + "step": 1542500 + }, + { + "epoch": 2.9, + "learning_rate": 1.0369630669659952e-05, + "loss": 4.0726, + "step": 1543000 + }, + { + "epoch": 2.9, + "learning_rate": 1.0275777665988421e-05, + "loss": 4.0716, + "step": 1543500 + }, + { + "epoch": 2.9, + "learning_rate": 1.0181924662316892e-05, + "loss": 4.0571, + "step": 1544000 + }, + { + "epoch": 2.9, + "learning_rate": 1.0088071658645362e-05, + "loss": 4.068, + "step": 1544500 + }, + { + "epoch": 2.9, + "learning_rate": 9.994218654973833e-06, + "loss": 4.0666, + "step": 1545000 + }, + { + "epoch": 2.9, + "learning_rate": 9.900365651302302e-06, + "loss": 4.0692, + "step": 1545500 + }, + { + "epoch": 2.9, + "learning_rate": 9.806512647630773e-06, + "loss": 4.0601, + "step": 1546000 + }, + { + "epoch": 2.9, + "learning_rate": 9.712659643959245e-06, + "loss": 4.0684, + "step": 1546500 + }, + { + "epoch": 2.9, + "learning_rate": 9.618806640287716e-06, + "loss": 4.0693, + "step": 1547000 + }, + { + "epoch": 2.9, + "learning_rate": 9.524953636616186e-06, + "loss": 4.0503, + "step": 1547500 + }, + { + "epoch": 2.91, + "learning_rate": 9.431100632944657e-06, + "loss": 4.0649, + "step": 1548000 + }, + { + "epoch": 2.91, + "learning_rate": 9.337247629273126e-06, + "loss": 4.0477, + "step": 1548500 + }, + { + "epoch": 2.91, + "learning_rate": 9.243394625601596e-06, + "loss": 4.0735, + "step": 1549000 + }, + { + "epoch": 2.91, + "learning_rate": 9.149541621930067e-06, + "loss": 4.0613, + "step": 1549500 + }, + { + "epoch": 2.91, + "learning_rate": 9.055688618258538e-06, + "loss": 4.0593, + "step": 1550000 + }, + { + "epoch": 2.91, + "learning_rate": 8.961835614587008e-06, + "loss": 4.0832, + "step": 1550500 + }, + { + "epoch": 2.91, + "learning_rate": 8.867982610915479e-06, + "loss": 4.0533, + "step": 1551000 + }, + { + "epoch": 2.91, + "learning_rate": 8.77412960724395e-06, + "loss": 4.0752, + "step": 1551500 + }, + { + "epoch": 2.91, + "learning_rate": 8.68027660357242e-06, + "loss": 4.0505, + "step": 1552000 + }, + { + "epoch": 2.91, + "learning_rate": 8.586423599900891e-06, + "loss": 4.0665, + "step": 1552500 + }, + { + "epoch": 2.92, + "learning_rate": 8.492570596229362e-06, + "loss": 4.0617, + "step": 1553000 + }, + { + "epoch": 2.92, + "learning_rate": 8.39871759255783e-06, + "loss": 4.0793, + "step": 1553500 + }, + { + "epoch": 2.92, + "learning_rate": 8.304864588886301e-06, + "loss": 4.0449, + "step": 1554000 + }, + { + "epoch": 2.92, + "learning_rate": 8.211011585214773e-06, + "loss": 4.0949, + "step": 1554500 + }, + { + "epoch": 2.92, + "learning_rate": 8.117158581543242e-06, + "loss": 4.0613, + "step": 1555000 + }, + { + "epoch": 2.92, + "learning_rate": 8.023305577871713e-06, + "loss": 4.0632, + "step": 1555500 + }, + { + "epoch": 2.92, + "learning_rate": 7.929452574200184e-06, + "loss": 4.0474, + "step": 1556000 + }, + { + "epoch": 2.92, + "learning_rate": 7.835599570528654e-06, + "loss": 4.0592, + "step": 1556500 + }, + { + "epoch": 2.92, + "learning_rate": 7.741746566857125e-06, + "loss": 4.0555, + "step": 1557000 + }, + { + "epoch": 2.92, + "learning_rate": 7.647893563185596e-06, + "loss": 4.0757, + "step": 1557500 + }, + { + "epoch": 2.92, + "learning_rate": 7.5540405595140654e-06, + "loss": 4.0636, + "step": 1558000 + }, + { + "epoch": 2.93, + "learning_rate": 7.460187555842537e-06, + "loss": 4.0772, + "step": 1558500 + }, + { + "epoch": 2.93, + "learning_rate": 7.3663345521710076e-06, + "loss": 4.0507, + "step": 1559000 + }, + { + "epoch": 2.93, + "learning_rate": 7.272481548499477e-06, + "loss": 4.062, + "step": 1559500 + }, + { + "epoch": 2.93, + "learning_rate": 7.178628544827948e-06, + "loss": 4.0709, + "step": 1560000 + }, + { + "epoch": 2.93, + "learning_rate": 7.084775541156418e-06, + "loss": 4.066, + "step": 1560500 + }, + { + "epoch": 2.93, + "learning_rate": 6.990922537484889e-06, + "loss": 4.0429, + "step": 1561000 + }, + { + "epoch": 2.93, + "learning_rate": 6.89706953381336e-06, + "loss": 4.0554, + "step": 1561500 + }, + { + "epoch": 2.93, + "learning_rate": 6.80321653014183e-06, + "loss": 4.0622, + "step": 1562000 + }, + { + "epoch": 2.93, + "learning_rate": 6.7093635264703e-06, + "loss": 4.0711, + "step": 1562500 + }, + { + "epoch": 2.93, + "learning_rate": 6.615510522798772e-06, + "loss": 4.0514, + "step": 1563000 + }, + { + "epoch": 2.93, + "learning_rate": 6.521657519127242e-06, + "loss": 4.0655, + "step": 1563500 + }, + { + "epoch": 2.94, + "learning_rate": 6.427804515455712e-06, + "loss": 4.0667, + "step": 1564000 + }, + { + "epoch": 2.94, + "learning_rate": 6.333951511784182e-06, + "loss": 4.0616, + "step": 1564500 + }, + { + "epoch": 2.94, + "learning_rate": 6.240098508112653e-06, + "loss": 4.069, + "step": 1565000 + }, + { + "epoch": 2.94, + "learning_rate": 6.146245504441124e-06, + "loss": 4.0408, + "step": 1565500 + }, + { + "epoch": 2.94, + "learning_rate": 6.052392500769594e-06, + "loss": 4.0799, + "step": 1566000 + }, + { + "epoch": 2.94, + "learning_rate": 5.958539497098065e-06, + "loss": 4.0542, + "step": 1566500 + }, + { + "epoch": 2.94, + "learning_rate": 5.864686493426534e-06, + "loss": 4.0523, + "step": 1567000 + }, + { + "epoch": 2.94, + "learning_rate": 5.770833489755006e-06, + "loss": 4.0604, + "step": 1567500 + }, + { + "epoch": 2.94, + "learning_rate": 5.6769804860834765e-06, + "loss": 4.0685, + "step": 1568000 + }, + { + "epoch": 2.94, + "learning_rate": 5.583127482411946e-06, + "loss": 4.0656, + "step": 1568500 + }, + { + "epoch": 2.95, + "learning_rate": 5.489274478740417e-06, + "loss": 4.0775, + "step": 1569000 + }, + { + "epoch": 2.95, + "learning_rate": 5.395421475068887e-06, + "loss": 4.0758, + "step": 1569500 + }, + { + "epoch": 2.95, + "learning_rate": 5.301568471397358e-06, + "loss": 4.0527, + "step": 1570000 + }, + { + "epoch": 2.95, + "learning_rate": 5.207715467725829e-06, + "loss": 4.0844, + "step": 1570500 + }, + { + "epoch": 2.95, + "learning_rate": 5.113862464054299e-06, + "loss": 4.0691, + "step": 1571000 + }, + { + "epoch": 2.95, + "learning_rate": 5.020009460382769e-06, + "loss": 4.0732, + "step": 1571500 + }, + { + "epoch": 2.95, + "learning_rate": 4.926156456711241e-06, + "loss": 4.0753, + "step": 1572000 + }, + { + "epoch": 2.95, + "learning_rate": 4.832303453039711e-06, + "loss": 4.07, + "step": 1572500 + }, + { + "epoch": 2.95, + "learning_rate": 4.738450449368181e-06, + "loss": 4.0587, + "step": 1573000 + }, + { + "epoch": 2.95, + "learning_rate": 4.644597445696652e-06, + "loss": 4.0684, + "step": 1573500 + }, + { + "epoch": 2.95, + "learning_rate": 4.5507444420251225e-06, + "loss": 4.0555, + "step": 1574000 + }, + { + "epoch": 2.96, + "learning_rate": 4.456891438353592e-06, + "loss": 4.046, + "step": 1574500 + }, + { + "epoch": 2.96, + "learning_rate": 4.363038434682063e-06, + "loss": 4.0417, + "step": 1575000 + }, + { + "epoch": 2.96, + "learning_rate": 4.269185431010534e-06, + "loss": 4.0441, + "step": 1575500 + }, + { + "epoch": 2.96, + "learning_rate": 4.175332427339004e-06, + "loss": 4.0691, + "step": 1576000 + }, + { + "epoch": 2.96, + "learning_rate": 4.081479423667475e-06, + "loss": 4.0547, + "step": 1576500 + }, + { + "epoch": 2.96, + "learning_rate": 3.9876264199959455e-06, + "loss": 4.08, + "step": 1577000 + }, + { + "epoch": 2.96, + "learning_rate": 3.893773416324415e-06, + "loss": 4.0607, + "step": 1577500 + }, + { + "epoch": 2.96, + "learning_rate": 3.7999204126528864e-06, + "loss": 4.063, + "step": 1578000 + }, + { + "epoch": 2.96, + "learning_rate": 3.7060674089813566e-06, + "loss": 4.068, + "step": 1578500 + }, + { + "epoch": 2.96, + "learning_rate": 3.6122144053098273e-06, + "loss": 4.0872, + "step": 1579000 + }, + { + "epoch": 2.96, + "learning_rate": 3.518361401638298e-06, + "loss": 4.0464, + "step": 1579500 + }, + { + "epoch": 2.97, + "learning_rate": 3.424508397966768e-06, + "loss": 4.068, + "step": 1580000 + }, + { + "epoch": 2.97, + "learning_rate": 3.3306553942952387e-06, + "loss": 4.0741, + "step": 1580500 + }, + { + "epoch": 2.97, + "learning_rate": 3.236802390623709e-06, + "loss": 4.0678, + "step": 1581000 + }, + { + "epoch": 2.97, + "learning_rate": 3.14294938695218e-06, + "loss": 4.0486, + "step": 1581500 + }, + { + "epoch": 2.97, + "learning_rate": 3.0490963832806502e-06, + "loss": 4.0615, + "step": 1582000 + }, + { + "epoch": 2.97, + "learning_rate": 2.955243379609121e-06, + "loss": 4.0524, + "step": 1582500 + }, + { + "epoch": 2.97, + "learning_rate": 2.861390375937591e-06, + "loss": 4.0626, + "step": 1583000 + }, + { + "epoch": 2.97, + "learning_rate": 2.7675373722660617e-06, + "loss": 4.0577, + "step": 1583500 + }, + { + "epoch": 2.97, + "learning_rate": 2.6736843685945324e-06, + "loss": 4.0545, + "step": 1584000 + }, + { + "epoch": 2.97, + "learning_rate": 2.579831364923003e-06, + "loss": 4.0658, + "step": 1584500 + }, + { + "epoch": 2.98, + "learning_rate": 2.4859783612514732e-06, + "loss": 4.0748, + "step": 1585000 + }, + { + "epoch": 2.98, + "learning_rate": 2.3921253575799435e-06, + "loss": 4.0625, + "step": 1585500 + }, + { + "epoch": 2.98, + "learning_rate": 2.2982723539084145e-06, + "loss": 4.0686, + "step": 1586000 + }, + { + "epoch": 2.98, + "learning_rate": 2.2044193502368847e-06, + "loss": 4.0765, + "step": 1586500 + }, + { + "epoch": 2.98, + "learning_rate": 2.1105663465653554e-06, + "loss": 4.0554, + "step": 1587000 + }, + { + "epoch": 2.98, + "learning_rate": 2.0167133428938256e-06, + "loss": 4.0595, + "step": 1587500 + }, + { + "epoch": 2.98, + "learning_rate": 1.9228603392222962e-06, + "loss": 4.0785, + "step": 1588000 + }, + { + "epoch": 2.98, + "learning_rate": 1.8290073355507667e-06, + "loss": 4.0654, + "step": 1588500 + }, + { + "epoch": 2.98, + "learning_rate": 1.7351543318792373e-06, + "loss": 4.0748, + "step": 1589000 + }, + { + "epoch": 2.98, + "learning_rate": 1.6413013282077077e-06, + "loss": 4.0668, + "step": 1589500 + }, + { + "epoch": 2.98, + "learning_rate": 1.5474483245361784e-06, + "loss": 4.0526, + "step": 1590000 + }, + { + "epoch": 2.99, + "learning_rate": 1.4535953208646488e-06, + "loss": 4.0478, + "step": 1590500 + }, + { + "epoch": 2.99, + "learning_rate": 1.3597423171931194e-06, + "loss": 4.0519, + "step": 1591000 + }, + { + "epoch": 2.99, + "learning_rate": 1.2658893135215899e-06, + "loss": 4.0559, + "step": 1591500 + }, + { + "epoch": 2.99, + "learning_rate": 1.1720363098500605e-06, + "loss": 4.0642, + "step": 1592000 + }, + { + "epoch": 2.99, + "learning_rate": 1.0781833061785307e-06, + "loss": 4.0688, + "step": 1592500 + }, + { + "epoch": 2.99, + "learning_rate": 9.843303025070014e-07, + "loss": 4.0684, + "step": 1593000 + }, + { + "epoch": 2.99, + "learning_rate": 8.904772988354719e-07, + "loss": 4.045, + "step": 1593500 + }, + { + "epoch": 2.99, + "learning_rate": 7.966242951639423e-07, + "loss": 4.0677, + "step": 1594000 + }, + { + "epoch": 2.99, + "learning_rate": 7.027712914924129e-07, + "loss": 4.0525, + "step": 1594500 + }, + { + "epoch": 2.99, + "learning_rate": 6.089182878208833e-07, + "loss": 4.0839, + "step": 1595000 + }, + { + "epoch": 2.99, + "learning_rate": 5.150652841493538e-07, + "loss": 4.0531, + "step": 1595500 + }, + { + "epoch": 3.0, + "learning_rate": 4.2121228047782437e-07, + "loss": 4.0731, + "step": 1596000 + }, + { + "epoch": 3.0, + "learning_rate": 3.2735927680629485e-07, + "loss": 4.0568, + "step": 1596500 + }, + { + "epoch": 3.0, + "learning_rate": 2.3350627313476536e-07, + "loss": 4.0647, + "step": 1597000 + }, + { + "epoch": 3.0, + "learning_rate": 1.396532694632359e-07, + "loss": 4.0598, + "step": 1597500 + }, + { + "epoch": 3.0, + "learning_rate": 4.580026579170639e-08, + "loss": 4.0552, + "step": 1598000 + }, + { + "epoch": 3.0, + "step": 1598244, + "total_flos": 5.01129636544512e+18, + "train_loss": 0.7859680134460877, + "train_runtime": 210975.8843, + "train_samples_per_second": 45.453, + "train_steps_per_second": 7.575 + } + ], + "max_steps": 1598244, + "num_train_epochs": 3, + "total_flos": 5.01129636544512e+18, + "trial_name": null, + "trial_params": null +}