diff --git "a/last-checkpoint/trainer_state.json" "b/last-checkpoint/trainer_state.json" --- "a/last-checkpoint/trainer_state.json" +++ "b/last-checkpoint/trainer_state.json" @@ -1,17116 +1,316 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 2.4494321185009476, - "global_step": 570000, + "epoch": 0.042973785990545764, + "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, - "learning_rate": 0.0004000105276443632, - "loss": 10.1199, + "learning_rate": 0.00040004211081201384, + "loss": 8.3496, "step": 200 }, { "epoch": 0.0, - "learning_rate": 0.00040004211053127486, - "loss": 9.997, + "learning_rate": 0.000400168442509171, + "loss": 8.2272, "step": 400 }, { "epoch": 0.0, - "learning_rate": 0.000400094748522194, - "loss": 9.9386, + "learning_rate": 0.000400378992874836, + "loss": 7.6879, "step": 600 }, { "epoch": 0.0, - "learning_rate": 0.00040016844138622554, - "loss": 9.8988, + "learning_rate": 0.0004006737582146567, + "loss": 7.4747, "step": 800 }, { "epoch": 0.0, - "learning_rate": 0.0004002631888001141, - "loss": 9.8579, + "learning_rate": 0.0004010527333566261, + "loss": 7.2829, "step": 1000 }, { "epoch": 0.01, - "learning_rate": 0.0004003789903482477, - "loss": 9.8159, + "learning_rate": 0.0004015159116511832, + "loss": 7.1171, "step": 1200 }, { "epoch": 0.01, - "learning_rate": 0.0004005158455226594, - "loss": 9.7867, + "learning_rate": 0.00040206328497132196, + "loss": 6.9445, "step": 1400 }, { "epoch": 0.01, - "learning_rate": 0.0004006737537230326, - "loss": 9.7605, + "learning_rate": 0.0004026948437127389, + "loss": 6.8391, "step": 1600 }, { "epoch": 0.01, - "learning_rate": 0.0004008527142566991, - "loss": 9.7357, + "learning_rate": 0.0004034105767939909, + "loss": 6.7131, "step": 1800 }, { "epoch": 0.01, - "learning_rate": 0.0004010527263386479, - "loss": 9.7138, + "learning_rate": 0.00040421047165670534, + "loss": 6.6113, "step": 2000 }, { "epoch": 0.01, - "learning_rate": 0.00040127378909152016, - "loss": 9.6894, + "learning_rate": 0.0004050945142657896, + "loss": 6.4966, "step": 2200 }, { "epoch": 0.01, - "learning_rate": 0.000401515901545621, - "loss": 9.6634, + "learning_rate": 0.0004060626891096795, + "loss": 6.3979, "step": 2400 }, { "epoch": 0.01, - "learning_rate": 0.00040177906263891804, - "loss": 9.6451, + "learning_rate": 0.0004071149792006148, + "loss": 6.3116, "step": 2600 }, { "epoch": 0.01, - "learning_rate": 0.00040206327121705167, - "loss": 9.6279, + "learning_rate": 0.00040825136607492915, + "loss": 6.2301, "step": 2800 }, { "epoch": 0.01, - "learning_rate": 0.00040236852603333685, - "loss": 9.6038, + "learning_rate": 0.0004094718297933883, + "loss": 6.123, "step": 3000 }, { "epoch": 0.01, - "learning_rate": 0.0004026948257487631, - "loss": 9.5874, + "learning_rate": 0.0004107763489415231, + "loss": 6.0802, "step": 3200 }, { "epoch": 0.01, - "learning_rate": 0.00040304216893201697, - "loss": 9.5729, + "learning_rate": 0.00041216490063001633, + "loss": 6.0029, "step": 3400 }, { "epoch": 0.02, - "learning_rate": 0.0004034105540594666, - "loss": 9.547, + "learning_rate": 0.00041363746049510354, + "loss": 5.9471, "step": 3600 }, { "epoch": 0.02, - "learning_rate": 0.0004037999795151858, - "loss": 9.5348, + "learning_rate": 0.0004151940026989945, + "loss": 5.9132, "step": 3800 }, { "epoch": 0.02, - "learning_rate": 0.0004042104435909525, - "loss": 9.5207, + "learning_rate": 0.0004168344999303346, + "loss": 5.8561, "step": 4000 }, { "epoch": 0.02, - "learning_rate": 0.0004046419444862573, - "loss": 9.5061, + "learning_rate": 0.00041855892340467854, + "loss": 5.8044, "step": 4200 }, { "epoch": 0.02, - "learning_rate": 0.0004050944803083139, - "loss": 9.493, + "learning_rate": 0.0004203672428649916, + "loss": 5.734, "step": 4400 }, { "epoch": 0.02, - "learning_rate": 0.0004055680490720661, - "loss": 9.4782, + "learning_rate": 0.0004222594265821944, + "loss": 5.7245, "step": 4600 }, { "epoch": 0.02, - "learning_rate": 0.0004060626487001964, - "loss": 9.4636, + "learning_rate": 0.0004242354413557057, + "loss": 5.6867, "step": 4800 }, { "epoch": 0.02, - "learning_rate": 0.0004065782770231313, - "loss": 9.4546, + "learning_rate": 0.00042629525251402893, + "loss": 5.6387, "step": 5000 }, { "epoch": 0.02, - "learning_rate": 0.000407114931779062, - "loss": 9.4453, + "learning_rate": 0.0004284388239153662, + "loss": 5.6119, "step": 5200 }, { "epoch": 0.02, - "learning_rate": 0.00040767261061393917, - "loss": 9.4174, + "learning_rate": 0.0004306661179482429, + "loss": 5.5533, "step": 5400 }, { "epoch": 0.02, - "learning_rate": 0.00040825131108149573, - "loss": 9.4159, + "learning_rate": 0.0004329770955321787, + "loss": 5.517, "step": 5600 }, { "epoch": 0.02, - "learning_rate": 0.00040885103064325357, - "loss": 9.3993, + "learning_rate": 0.0004353717161183629, + "loss": 5.4864, "step": 5800 }, { "epoch": 0.03, - "learning_rate": 0.00040947176666852707, - "loss": 9.3953, + "learning_rate": 0.0004378499376903721, + "loss": 5.4671, "step": 6000 }, { "epoch": 0.03, - "learning_rate": 0.00041011351643444917, - "loss": 9.3854, + "learning_rate": 0.00044041171676490604, + "loss": 5.4412, "step": 6200 }, { "epoch": 0.03, - "learning_rate": 0.0004107762771259713, - "loss": 9.3679, + "learning_rate": 0.0004430570083925455, + "loss": 5.4108, "step": 6400 }, { "epoch": 0.03, - "learning_rate": 0.0004114600458358809, - "loss": 9.3595, + "learning_rate": 0.0004457857661585539, + "loss": 5.3807, "step": 6600 }, { "epoch": 0.03, - "learning_rate": 0.00041216481956481664, - "loss": 9.3504, + "learning_rate": 0.0004485979421836768, + "loss": 5.3353, "step": 6800 }, { "epoch": 0.03, - "learning_rate": 0.00041289059522127414, - "loss": 9.3417, + "learning_rate": 0.0004514934871249904, + "loss": 5.3277, "step": 7000 }, { "epoch": 0.03, - "learning_rate": 0.0004136373696216229, - "loss": 9.3275, + "learning_rate": 0.00045447235017676696, + "loss": 5.2979, "step": 7200 }, { "epoch": 0.03, - "learning_rate": 0.0004144051394901274, - "loss": 9.3201, + "learning_rate": 0.00045753447907136494, + "loss": 5.2791, "step": 7400 }, { "epoch": 0.03, - "learning_rate": 0.0004151939014589469, - "loss": 9.3123, + "learning_rate": 0.000460679820080143, + "loss": 5.2494, "step": 7600 }, { "epoch": 0.03, - "learning_rate": 0.0004160036520681667, - "loss": 9.3084, + "learning_rate": 0.00046390831801440893, + "loss": 5.2175, "step": 7800 }, { "epoch": 0.03, - "learning_rate": 0.0004168343877657965, - "loss": 9.2954, + "learning_rate": 0.0004672199162263843, + "loss": 5.2038, "step": 8000 }, { "epoch": 0.04, - "learning_rate": 0.00041768179413688954, - "loss": 9.2862, + "learning_rate": 0.0004706145566101966, + "loss": 5.1835, "step": 8200 }, { "epoch": 0.04, - "learning_rate": 0.00041855438410810103, - "loss": 9.283, + "learning_rate": 0.0004740921796029061, + "loss": 5.1691, "step": 8400 }, { "epoch": 0.04, - "learning_rate": 0.00041944794797888797, - "loss": 9.2711, + "learning_rate": 0.0004776527241855382, + "loss": 5.1582, "step": 8600 }, { "epoch": 0.04, - "learning_rate": 0.00042036248182962185, - "loss": 9.2726, + "learning_rate": 0.0004812961278841711, + "loss": 5.1504, "step": 8800 }, { "epoch": 0.04, - "learning_rate": 0.0004212979816486783, - "loss": 9.2621, + "learning_rate": 0.0004850223267710129, + "loss": 5.1162, "step": 9000 }, { "epoch": 0.04, - "learning_rate": 0.00042225444333247354, - "loss": 9.2527, + "learning_rate": 0.0004888312554655432, + "loss": 5.0957, "step": 9200 }, { "epoch": 0.04, - "learning_rate": 0.0004232318626854678, - "loss": 9.2453, + "learning_rate": 0.0004927228471356421, + "loss": 5.079, "step": 9400 }, { "epoch": 0.04, - "learning_rate": 0.0004242302354201949, - "loss": 9.2314, + "learning_rate": 0.0004966970334987757, + "loss": 5.0572, "step": 9600 }, { "epoch": 0.04, - "learning_rate": 0.000425249557157276, - "loss": 9.2337, + "learning_rate": 0.0005007537448231871, + "loss": 5.0342, "step": 9800 }, { "epoch": 0.04, - "learning_rate": 0.00042628982342543184, - "loss": 9.2276, + "learning_rate": 0.0005048929099291249, + "loss": 5.0106, "step": 10000 - }, - { - "epoch": 0.04, - "learning_rate": 0.0004273456715498305, - "loss": 9.2181, - "step": 10200 - }, - { - "epoch": 0.04, - "learning_rate": 0.00042842770843401837, - "loss": 9.2142, - "step": 10400 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004295306759082608, - "loss": 9.2052, - "step": 10600 - }, - { - "epoch": 0.05, - "learning_rate": 0.00043065456913437584, - "loss": 9.1994, - "step": 10800 - }, - { - "epoch": 0.05, - "learning_rate": 0.00043179938318238693, - "loss": 9.2017, - "step": 11000 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004329651130305402, - "loss": 9.1991, - "step": 11200 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004341517535653445, - "loss": 9.1921, - "step": 11400 - }, - { - "epoch": 0.05, - "learning_rate": 0.00043535929958157804, - "loss": 9.1786, - "step": 11600 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004365877457823183, - "loss": 9.1766, - "step": 11800 - }, - { - "epoch": 0.05, - "learning_rate": 0.00043783708677896244, - "loss": 9.1614, - "step": 12000 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004391073170912519, - "loss": 9.1717, - "step": 12200 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004403984311473017, - "loss": 9.1551, - "step": 12400 - }, - { - "epoch": 0.05, - "learning_rate": 0.0004417104232836127, - "loss": 9.1542, - "step": 12600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00044304328774510786, - "loss": 9.1525, - "step": 12800 - }, - { - "epoch": 0.06, - "learning_rate": 0.000444397018685155, - "loss": 9.1443, - "step": 13000 - }, - { - "epoch": 0.06, - "learning_rate": 0.00044577161016558405, - "loss": 9.1301, - "step": 13200 - }, - { - "epoch": 0.06, - "learning_rate": 0.0004471670561567286, - "loss": 9.1343, - "step": 13400 - }, - { - "epoch": 0.06, - "learning_rate": 0.00044858335053743655, - "loss": 9.1287, - "step": 13600 - }, - { - "epoch": 0.06, - "learning_rate": 0.0004500204870951062, - "loss": 9.1189, - "step": 13800 - }, - { - "epoch": 0.06, - "learning_rate": 0.00045147845952571257, - "loss": 9.1171, - "step": 14000 - }, - { - "epoch": 0.06, - "learning_rate": 0.0004529498156216581, - "loss": 9.1105, - "step": 14200 - }, - { - "epoch": 0.06, - "learning_rate": 0.0004544493364218305, - "loss": 9.0969, - "step": 14400 - }, - { - "epoch": 0.06, - "learning_rate": 0.00045596967366771067, - "loss": 9.1014, - "step": 14600 - }, - { - "epoch": 0.06, - "learning_rate": 0.00045751082069031036, - "loss": 9.0951, - "step": 14800 - }, - { - "epoch": 0.06, - "learning_rate": 0.00045907277072936015, - "loss": 9.0867, - "step": 15000 - }, - { - "epoch": 0.07, - "learning_rate": 0.00046065551693333547, - "loss": 9.0872, - "step": 15200 - }, - { - "epoch": 0.07, - "learning_rate": 0.00046225905235949306, - "loss": 9.0708, - "step": 15400 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004638833699738953, - "loss": 9.0716, - "step": 15600 - }, - { - "epoch": 0.07, - "learning_rate": 0.00046552846265143777, - "loss": 9.071, - "step": 15800 - }, - { - "epoch": 0.07, - "learning_rate": 0.00046719432317589814, - "loss": 9.0618, - "step": 16000 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004688724595049813, - "loss": 9.0518, - "step": 16200 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004705797299630679, - "loss": 9.0442, - "step": 16400 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004723077461105934, - "loss": 9.0477, - "step": 16600 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004740565003675777, - "loss": 9.0397, - "step": 16800 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004758259850630858, - "loss": 9.0355, - "step": 17000 - }, - { - "epoch": 0.07, - "learning_rate": 0.00047761619243523283, - "loss": 9.0248, - "step": 17200 - }, - { - "epoch": 0.07, - "learning_rate": 0.0004794271146312465, - "loss": 9.0137, - "step": 17400 - }, - { - "epoch": 0.08, - "learning_rate": 0.00048125874370748105, - "loss": 9.0205, - "step": 17600 - }, - { - "epoch": 0.08, - "learning_rate": 0.00048311107162946065, - "loss": 9.0008, - "step": 17800 - }, - { - "epoch": 0.08, - "learning_rate": 0.00048498409027191575, - "loss": 8.9975, - "step": 18000 - }, - { - "epoch": 0.08, - "learning_rate": 0.0004868682714790542, - "loss": 8.9992, - "step": 18200 - }, - { - "epoch": 0.08, - "learning_rate": 0.0004887825434734695, - "loss": 8.9777, - "step": 18400 - }, - { - "epoch": 0.08, - "learning_rate": 0.0004907174813103439, - "loss": 8.9871, - "step": 18600 - }, - { - "epoch": 0.08, - "learning_rate": 0.0004926730765020346, - "loss": 8.9765, - "step": 18800 - }, - { - "epoch": 0.08, - "learning_rate": 0.0004946393879009196, - "loss": 8.9754, - "step": 19000 - }, - { - "epoch": 0.08, - "learning_rate": 0.0004966361687980866, - "loss": 8.9678, - "step": 19200 - }, - { - "epoch": 0.08, - "learning_rate": 0.000498653581087638, - "loss": 8.9677, - "step": 19400 - }, - { - "epoch": 0.08, - "learning_rate": 0.0005006916159201579, - "loss": 8.9644, - "step": 19600 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005027502643557748, - "loss": 8.9642, - "step": 19800 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005048295173641828, - "loss": 8.9569, - "step": 20000 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005069293658247036, - "loss": 8.9605, - "step": 20200 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005090498005263129, - "loss": 8.9431, - "step": 20400 - }, - { - "epoch": 0.09, - "learning_rate": 0.000511190812167682, - "loss": 8.9431, - "step": 20600 - }, - { - "epoch": 0.09, - "learning_rate": 0.000513352391357226, - "loss": 8.9342, - "step": 20800 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005155345286131357, - "loss": 8.9324, - "step": 21000 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005177372143634305, - "loss": 8.9382, - "step": 21200 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005199604389459836, - "loss": 8.9424, - "step": 21400 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005222041926085837, - "loss": 8.9157, - "step": 21600 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005244684655089597, - "loss": 8.9236, - "step": 21800 - }, - { - "epoch": 0.09, - "learning_rate": 0.0005267532477148378, - "loss": 8.9246, - "step": 22000 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005290585292039816, - "loss": 8.9268, - "step": 22200 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005313842998642265, - "loss": 8.9203, - "step": 22400 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005337305494935388, - "loss": 8.9095, - "step": 22600 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005360972678000522, - "loss": 8.9061, - "step": 22800 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005384724576463773, - "loss": 8.9117, - "step": 23000 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005408799798596632, - "loss": 8.9043, - "step": 23200 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005433079393890421, - "loss": 8.9012, - "step": 23400 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005457563255842242, - "loss": 8.8969, - "step": 23600 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005482251277053145, - "loss": 8.9013, - "step": 23800 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005507143349228714, - "loss": 8.8912, - "step": 24000 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005532239363179401, - "loss": 8.8896, - "step": 24200 - }, - { - "epoch": 0.1, - "learning_rate": 0.0005557539208821075, - "loss": 8.8838, - "step": 24400 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005583042775175479, - "loss": 8.889, - "step": 24600 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005608749950370764, - "loss": 8.888, - "step": 24800 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005634530562276738, - "loss": 8.8814, - "step": 25000 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005660643599338256, - "loss": 8.8755, - "step": 25200 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005686959904843206, - "loss": 8.8667, - "step": 25400 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005713479363354621, - "loss": 8.8748, - "step": 25600 - }, - { - "epoch": 0.11, - "learning_rate": 0.000574020185854441, - "loss": 8.862, - "step": 25800 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005767127273193853, - "loss": 8.8534, - "step": 26000 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005794255489194114, - "loss": 8.8655, - "step": 26200 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005821586387546804, - "loss": 8.8574, - "step": 26400 - }, - { - "epoch": 0.11, - "learning_rate": 0.0005849119848364386, - "loss": 8.8531, - "step": 26600 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005876855750870848, - "loss": 8.8479, - "step": 26800 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005904653779220791, - "loss": 8.8405, - "step": 27000 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005932793188544346, - "loss": 8.8435, - "step": 27200 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005961134672522114, - "loss": 8.8425, - "step": 27400 - }, - { - "epoch": 0.12, - "learning_rate": 0.0005989678106833648, - "loss": 8.8389, - "step": 27600 - }, - { - "epoch": 0.12, - "learning_rate": 0.0006018423366272695, - "loss": 8.8525, - "step": 27800 - }, - { - "epoch": 0.12, - "learning_rate": 0.0006047370324747583, - "loss": 8.8273, - "step": 28000 - }, - { - "epoch": 0.12, - "learning_rate": 0.0006076518855281984, - "loss": 8.8306, - "step": 28200 - }, - { - "epoch": 0.12, - "learning_rate": 0.000610586883001531, - "loss": 8.8437, - "step": 28400 - }, - { - "epoch": 0.12, - "learning_rate": 0.000613542012020336, - "loss": 8.8236, - "step": 28600 - }, - { - "epoch": 0.12, - "learning_rate": 0.0006165172596218869, - "loss": 8.8274, - "step": 28800 - }, - { - "epoch": 0.12, - "learning_rate": 0.0006194975859987236, - "loss": 8.8275, - "step": 29000 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006225129310954997, - "loss": 8.8211, - "step": 29200 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006255483554239195, - "loss": 8.8177, - "step": 29400 - }, - { - "epoch": 0.13, - "learning_rate": 0.000628603845669035, - "loss": 8.8223, - "step": 29600 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006316793884278832, - "loss": 8.8123, - "step": 29800 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006347749702095389, - "loss": 8.8107, - "step": 30000 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006378905774351747, - "loss": 8.8122, - "step": 30200 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006410261964381238, - "loss": 8.811, - "step": 30400 - }, - { - "epoch": 0.13, - "learning_rate": 0.000644181813463934, - "loss": 8.813, - "step": 30600 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006473574146704329, - "loss": 8.8057, - "step": 30800 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006505369586176524, - "loss": 8.8033, - "step": 31000 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006537523865622775, - "loss": 8.795, - "step": 31200 - }, - { - "epoch": 0.13, - "learning_rate": 0.0006569877567060931, - "loss": 8.7938, - "step": 31400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006602430548570907, - "loss": 8.7969, - "step": 31600 - }, - { - "epoch": 0.14, - "learning_rate": 0.000663518266735847, - "loss": 8.7966, - "step": 31800 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006668133779755819, - "loss": 8.7936, - "step": 32000 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006701283741222287, - "loss": 8.7888, - "step": 32200 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006734632406344993, - "loss": 8.7829, - "step": 32400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006768179628839337, - "loss": 8.7789, - "step": 32600 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006801925261549872, - "loss": 8.778, - "step": 32800 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006835698944044951, - "loss": 8.7897, - "step": 33000 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006869839962045932, - "loss": 8.779, - "step": 33200 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006904178944328165, - "loss": 8.7697, - "step": 33400 - }, - { - "epoch": 0.14, - "learning_rate": 0.0006938715740263026, - "loss": 8.7818, - "step": 33600 - }, - { - "epoch": 0.15, - "learning_rate": 0.0006973450198354252, - "loss": 8.7667, - "step": 33800 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007008382166238496, - "loss": 8.7759, - "step": 34000 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007043511490686036, - "loss": 8.7797, - "step": 34200 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007078838017601421, - "loss": 8.7644, - "step": 34400 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007114361592024231, - "loss": 8.7678, - "step": 34600 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007150082058129618, - "loss": 8.7672, - "step": 34800 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007185819184105553, - "loss": 8.7672, - "step": 35000 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007221750927446872, - "loss": 8.7573, - "step": 35200 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007258059161614535, - "loss": 8.7584, - "step": 35400 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007294563657132755, - "loss": 8.7442, - "step": 35600 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007331264253873856, - "loss": 8.7595, - "step": 35800 - }, - { - "epoch": 0.15, - "learning_rate": 0.0007368160790850002, - "loss": 8.7564, - "step": 36000 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007405253106213833, - "loss": 8.7517, - "step": 36200 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007442541037259286, - "loss": 8.7583, - "step": 36400 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007480024420422077, - "loss": 8.7426, - "step": 36600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007517703091280727, - "loss": 8.7519, - "step": 36800 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007555576884556992, - "loss": 8.7393, - "step": 37000 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007593645634116821, - "loss": 8.7262, - "step": 37200 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007631717371015569, - "loss": 8.7383, - "step": 37400 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007670174558631893, - "loss": 8.7365, - "step": 37600 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007708826199846926, - "loss": 8.7385, - "step": 37800 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007747672125114589, - "loss": 8.7385, - "step": 38000 - }, - { - "epoch": 0.16, - "learning_rate": 0.0007786712164036449, - "loss": 8.7164, - "step": 38200 - }, - { - "epoch": 0.17, - "learning_rate": 0.0007825946145362667, - "loss": 8.7262, - "step": 38400 - }, - { - "epoch": 0.17, - "learning_rate": 0.0007865373896992697, - "loss": 8.728, - "step": 38600 - }, - { - "epoch": 0.17, - "learning_rate": 0.0007904995245975929, - "loss": 8.7281, - "step": 38800 - }, - { - "epoch": 0.17, - "learning_rate": 0.0007944810018512619, - "loss": 8.7179, - "step": 39000 - }, - { - "epoch": 0.17, - "learning_rate": 0.000798481803995452, - "loss": 8.7264, - "step": 39200 - }, - { - "epoch": 0.17, - "learning_rate": 0.0008025019134805696, - "loss": 8.7229, - "step": 39400 - }, - { - "epoch": 0.17, - "learning_rate": 0.0008065210677225022, - "loss": 8.7246, - "step": 39600 - }, - { - "epoch": 0.17, - "learning_rate": 0.000810579642586285, - "loss": 8.7199, - "step": 39800 - }, - { - "epoch": 0.17, - "learning_rate": 0.0008146574717236045, - "loss": 8.7209, - "step": 40000 - }, - { - "epoch": 0.17, - "learning_rate": 0.0008187545372469861, - "loss": 8.7075, - "step": 40200 - }, - { - "epoch": 0.17, - "learning_rate": 0.0008228708211845768, - "loss": 8.7101, - "step": 40400 - }, - { - "epoch": 0.17, - "learning_rate": 0.0008270063054802209, - "loss": 8.7144, - "step": 40600 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008311609719935404, - "loss": 8.7173, - "step": 40800 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008353348025000144, - "loss": 8.7183, - "step": 41000 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008395277786910574, - "loss": 8.7107, - "step": 41200 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008437398821741025, - "loss": 8.7113, - "step": 41400 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008479710944726774, - "loss": 8.7085, - "step": 41600 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008522000980575213, - "loss": 8.7115, - "step": 41800 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008564693769110079, - "loss": 8.7055, - "step": 42000 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008607577087418623, - "loss": 8.6935, - "step": 42200 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008650650747392373, - "loss": 8.7042, - "step": 42400 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008693914560087938, - "loss": 8.6849, - "step": 42600 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008737368335727785, - "loss": 8.6876, - "step": 42800 - }, - { - "epoch": 0.18, - "learning_rate": 0.0008781011883701138, - "loss": 8.6922, - "step": 43000 - }, - { - "epoch": 0.19, - "learning_rate": 0.0008824845012564749, - "loss": 8.6922, - "step": 43200 - }, - { - "epoch": 0.19, - "learning_rate": 0.000886886753004381, - "loss": 8.6853, - "step": 43400 - }, - { - "epoch": 0.19, - "learning_rate": 0.000891307924303272, - "loss": 8.6936, - "step": 43600 - }, - { - "epoch": 0.19, - "learning_rate": 0.0008957257484203587, - "loss": 8.6995, - "step": 43800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009001846062028449, - "loss": 8.6841, - "step": 44000 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009046623252050388, - "loss": 8.6735, - "step": 44200 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009091588857853411, - "loss": 8.6888, - "step": 44400 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009136742682195071, - "loss": 8.6788, - "step": 44600 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009182084527007278, - "loss": 8.6817, - "step": 44800 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009227614193397203, - "loss": 8.671, - "step": 45000 - }, - { - "epoch": 0.19, - "learning_rate": 0.0009273331481648092, - "loss": 8.6738, - "step": 45200 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009319236191220222, - "loss": 8.6657, - "step": 45400 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009365328120751783, - "loss": 8.6599, - "step": 45600 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009411375208451828, - "loss": 8.6747, - "step": 45800 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009457840036964961, - "loss": 8.6698, - "step": 46000 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009504491477449178, - "loss": 8.6765, - "step": 46200 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009551329325267026, - "loss": 8.6732, - "step": 46400 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009598353374963477, - "loss": 8.6654, - "step": 46600 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009645563420266623, - "loss": 8.6614, - "step": 46800 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009692959254088748, - "loss": 8.6672, - "step": 47000 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009740540668527146, - "loss": 8.6508, - "step": 47200 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009788307454865058, - "loss": 8.6533, - "step": 47400 - }, - { - "epoch": 0.2, - "learning_rate": 0.0009836259403572592, - "loss": 8.656, - "step": 47600 - }, - { - "epoch": 0.21, - "learning_rate": 0.0009884155160084767, - "loss": 8.65, - "step": 47800 - }, - { - "epoch": 0.21, - "learning_rate": 0.0009932475878516138, - "loss": 8.6559, - "step": 48000 - }, - { - "epoch": 0.21, - "learning_rate": 0.0009980981126919714, - "loss": 8.646, - "step": 48200 - }, - { - "epoch": 0.21, - "learning_rate": 0.0010029670692526266, - "loss": 8.6504, - "step": 48400 - }, - { - "epoch": 0.21, - "learning_rate": 0.001007854436175815, - "loss": 8.6437, - "step": 48600 - }, - { - "epoch": 0.21, - "learning_rate": 0.00101276019202301, - "loss": 8.6483, - "step": 48800 - }, - { - "epoch": 0.21, - "learning_rate": 0.0010176843152750244, - "loss": 8.6381, - "step": 49000 - }, - { - "epoch": 0.21, - "learning_rate": 0.001022602026387454, - "loss": 8.6419, - "step": 49200 - }, - { - "epoch": 0.21, - "learning_rate": 0.0010275627280027944, - "loss": 8.6381, - "step": 49400 - }, - { - "epoch": 0.21, - "learning_rate": 0.0010325417320913577, - "loss": 8.6274, - "step": 49600 - }, - { - "epoch": 0.21, - "learning_rate": 0.0010375390168126473, - "loss": 8.6264, - "step": 49800 - }, - { - "epoch": 0.21, - "learning_rate": 0.0010425545602459826, - "loss": 8.6279, - "step": 50000 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010475883403905893, - "loss": 8.636, - "step": 50200 - }, - { - "epoch": 0.22, - "learning_rate": 0.001052640335165696, - "loss": 8.6305, - "step": 50400 - }, - { - "epoch": 0.22, - "learning_rate": 0.001057710522410639, - "loss": 8.6259, - "step": 50600 - }, - { - "epoch": 0.22, - "learning_rate": 0.001062798879884943, - "loss": 8.6288, - "step": 50800 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010679053852684361, - "loss": 8.6286, - "step": 51000 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010730300161613388, - "loss": 8.6203, - "step": 51200 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010781469914207427, - "loss": 8.618, - "step": 51400 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010833077154690767, - "loss": 8.6183, - "step": 51600 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010884864974642153, - "loss": 8.6244, - "step": 51800 - }, - { - "epoch": 0.22, - "learning_rate": 0.0010936833146893334, - "loss": 8.6129, - "step": 52000 - }, - { - "epoch": 0.22, - "learning_rate": 0.001098898144348496, - "loss": 8.622, - "step": 52200 - }, - { - "epoch": 0.23, - "learning_rate": 0.001104130963566756, - "loss": 8.6104, - "step": 52400 - }, - { - "epoch": 0.23, - "learning_rate": 0.001109381749390256, - "loss": 8.603, - "step": 52600 - }, - { - "epoch": 0.23, - "learning_rate": 0.00111465047878633, - "loss": 8.6093, - "step": 52800 - }, - { - "epoch": 0.23, - "learning_rate": 0.001119937128643592, - "loss": 8.5969, - "step": 53000 - }, - { - "epoch": 0.23, - "learning_rate": 0.0011252416757720606, - "loss": 8.5992, - "step": 53200 - }, - { - "epoch": 0.23, - "learning_rate": 0.0011305374403745901, - "loss": 8.6047, - "step": 53400 - }, - { - "epoch": 0.23, - "learning_rate": 0.001135877622966507, - "loss": 8.5958, - "step": 53600 - }, - { - "epoch": 0.23, - "learning_rate": 0.001141235632906355, - "loss": 8.5948, - "step": 53800 - }, - { - "epoch": 0.23, - "learning_rate": 0.0011466114466911256, - "loss": 8.5896, - "step": 54000 - }, - { - "epoch": 0.23, - "learning_rate": 0.001152005040739713, - "loss": 8.5887, - "step": 54200 - }, - { - "epoch": 0.23, - "learning_rate": 0.0011574163913930131, - "loss": 8.5862, - "step": 54400 - }, - { - "epoch": 0.23, - "learning_rate": 0.0011628454749140395, - "loss": 8.5949, - "step": 54600 - }, - { - "epoch": 0.24, - "learning_rate": 0.0011682922674880192, - "loss": 8.588, - "step": 54800 - }, - { - "epoch": 0.24, - "learning_rate": 0.0011737567452224911, - "loss": 8.5918, - "step": 55000 - }, - { - "epoch": 0.24, - "learning_rate": 0.0011792388841474245, - "loss": 8.5904, - "step": 55200 - }, - { - "epoch": 0.24, - "learning_rate": 0.0011847111175024606, - "loss": 8.5739, - "step": 55400 - }, - { - "epoch": 0.24, - "learning_rate": 0.0011902284185834888, - "loss": 8.5756, - "step": 55600 - }, - { - "epoch": 0.24, - "learning_rate": 0.0011957633086016797, - "loss": 8.568, - "step": 55800 - }, - { - "epoch": 0.24, - "learning_rate": 0.0012013157632781366, - "loss": 8.5696, - "step": 56000 - }, - { - "epoch": 0.24, - "learning_rate": 0.00120688575825691, - "loss": 8.5768, - "step": 56200 - }, - { - "epoch": 0.24, - "learning_rate": 0.0012124732691051188, - "loss": 8.5696, - "step": 56400 - }, - { - "epoch": 0.24, - "learning_rate": 0.0012180782713130424, - "loss": 8.5687, - "step": 56600 - }, - { - "epoch": 0.24, - "learning_rate": 0.0012237007402942333, - "loss": 8.56, - "step": 56800 - }, - { - "epoch": 0.24, - "learning_rate": 0.0012293406513856284, - "loss": 8.56, - "step": 57000 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012349979798476525, - "loss": 8.5602, - "step": 57200 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012406442840364133, - "loss": 8.5551, - "step": 57400 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012463362859392122, - "loss": 8.5556, - "step": 57600 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012520456306609733, - "loss": 8.5508, - "step": 57800 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012577722931575563, - "loss": 8.549, - "step": 58000 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012634874855592566, - "loss": 8.5582, - "step": 58200 - }, - { - "epoch": 0.25, - "learning_rate": 0.001269248621894795, - "loss": 8.554, - "step": 58400 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012750270005439136, - "loss": 8.5453, - "step": 58600 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012808225961596451, - "loss": 8.5545, - "step": 58800 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012866353833195041, - "loss": 8.543, - "step": 59000 - }, - { - "epoch": 0.25, - "learning_rate": 0.0012924653365255934, - "loss": 8.5454, - "step": 59200 - }, - { - "epoch": 0.26, - "learning_rate": 0.001298312430204715, - "loss": 8.5496, - "step": 59400 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013041766387084808, - "loss": 8.5348, - "step": 59600 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013100579363134381, - "loss": 8.5311, - "step": 59800 - }, - { - "epoch": 0.26, - "learning_rate": 0.001315956297221161, - "loss": 8.5378, - "step": 60000 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013218716955583822, - "loss": 8.5304, - "step": 60200 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013278041053770978, - "loss": 8.5166, - "step": 60400 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013337237114701053, - "loss": 8.5324, - "step": 60600 - }, - { - "epoch": 0.26, - "learning_rate": 0.001339689981377656, - "loss": 8.5196, - "step": 60800 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013456731846064624, - "loss": 8.5191, - "step": 61000 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013516732949110932, - "loss": 8.5285, - "step": 61200 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013576902859719474, - "loss": 8.5143, - "step": 61400 - }, - { - "epoch": 0.26, - "learning_rate": 0.0013637241313953895, - "loss": 8.519, - "step": 61600 - }, - { - "epoch": 0.27, - "learning_rate": 0.0013697748047138431, - "loss": 8.4964, - "step": 61800 - }, - { - "epoch": 0.27, - "learning_rate": 0.0013758422793859176, - "loss": 8.5216, - "step": 62000 - }, - { - "epoch": 0.27, - "learning_rate": 0.001381926528796519, - "loss": 8.5098, - "step": 62200 - }, - { - "epoch": 0.27, - "learning_rate": 0.0013880275262569807, - "loss": 8.511, - "step": 62400 - }, - { - "epoch": 0.27, - "learning_rate": 0.0013941452450051631, - "loss": 8.5124, - "step": 62600 - }, - { - "epoch": 0.27, - "learning_rate": 0.001400248944656608, - "loss": 8.5054, - "step": 62800 - }, - { - "epoch": 0.27, - "learning_rate": 0.0014063999421298785, - "loss": 8.5016, - "step": 63000 - }, - { - "epoch": 0.27, - "learning_rate": 0.0014125675802999262, - "loss": 8.501, - "step": 63200 - }, - { - "epoch": 0.27, - "learning_rate": 0.001418751832112295, - "loss": 8.505, - "step": 63400 - }, - { - "epoch": 0.27, - "learning_rate": 0.0014249526704396467, - "loss": 8.4953, - "step": 63600 - }, - { - "epoch": 0.27, - "learning_rate": 0.0014311700680818915, - "loss": 8.5074, - "step": 63800 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014374039977662987, - "loss": 8.4991, - "step": 64000 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014436544321476206, - "loss": 8.5087, - "step": 64200 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014499213438082127, - "loss": 8.4871, - "step": 64400 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014562047052581514, - "loss": 8.4954, - "step": 64600 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014624729492123557, - "loss": 8.4791, - "step": 64800 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014687890455785963, - "loss": 8.4853, - "step": 65000 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014751215089706584, - "loss": 8.4855, - "step": 65200 - }, - { - "epoch": 0.28, - "learning_rate": 0.0014814703116110776, - "loss": 8.4707, - "step": 65400 - }, - { - "epoch": 0.28, - "learning_rate": 0.001487835425650709, - "loss": 8.4743, - "step": 65600 - }, - { - "epoch": 0.28, - "learning_rate": 0.001494216823168866, - "loss": 8.4717, - "step": 65800 - }, - { - "epoch": 0.28, - "learning_rate": 0.0015006144761734279, - "loss": 8.4823, - "step": 66000 - }, - { - "epoch": 0.28, - "learning_rate": 0.001507028356600975, - "loss": 8.4708, - "step": 66200 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015134584363168998, - "loss": 8.4649, - "step": 66400 - }, - { - "epoch": 0.29, - "learning_rate": 0.001519904687115537, - "loss": 8.4695, - "step": 66600 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015263347286438994, - "loss": 8.4759, - "step": 66800 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015328131562056986, - "loss": 8.4655, - "step": 67000 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015393076699503766, - "loss": 8.4752, - "step": 67200 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015458182413896245, - "loss": 8.4535, - "step": 67400 - }, - { - "epoch": 0.29, - "learning_rate": 0.001552344841964707, - "loss": 8.4535, - "step": 67600 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015588874430465648, - "loss": 8.4519, - "step": 67800 - }, - { - "epoch": 0.29, - "learning_rate": 0.001565446015935959, - "loss": 8.4568, - "step": 68000 - }, - { - "epoch": 0.29, - "learning_rate": 0.00157202053186359, - "loss": 8.4524, - "step": 68200 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015786109619902212, - "loss": 8.4589, - "step": 68400 - }, - { - "epoch": 0.29, - "learning_rate": 0.0015852172774068075, - "loss": 8.4559, - "step": 68600 - }, - { - "epoch": 0.3, - "learning_rate": 0.0015918062988814347, - "loss": 8.446, - "step": 68800 - }, - { - "epoch": 0.3, - "learning_rate": 0.0015984442188082624, - "loss": 8.439, - "step": 69000 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016050979370261006, - "loss": 8.4504, - "step": 69200 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016117674243482875, - "loss": 8.4487, - "step": 69400 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016184526515189961, - "loss": 8.4472, - "step": 69600 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016251535892133542, - "loss": 8.433, - "step": 69800 - }, - { - "epoch": 0.3, - "learning_rate": 0.001631870208037572, - "loss": 8.426, - "step": 70000 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016386024785290804, - "loss": 8.4311, - "step": 70200 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016453503711566474, - "loss": 8.432, - "step": 70400 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016520800001573153, - "loss": 8.4282, - "step": 70600 - }, - { - "epoch": 0.3, - "learning_rate": 0.0016588589704489114, - "loss": 8.4386, - "step": 70800 - }, - { - "epoch": 0.31, - "learning_rate": 0.0016656534740210893, - "loss": 8.4158, - "step": 71000 - }, - { - "epoch": 0.31, - "learning_rate": 0.0016724634810696363, - "loss": 8.4242, - "step": 71200 - }, - { - "epoch": 0.31, - "learning_rate": 0.0016792889617223312, - "loss": 8.4279, - "step": 71400 - }, - { - "epoch": 0.31, - "learning_rate": 0.0016861298860390735, - "loss": 8.4242, - "step": 71600 - }, - { - "epoch": 0.31, - "learning_rate": 0.0016929862240120247, - "loss": 8.4271, - "step": 71800 - }, - { - "epoch": 0.31, - "learning_rate": 0.0016998579455657307, - "loss": 8.4265, - "step": 72000 - }, - { - "epoch": 0.31, - "learning_rate": 0.0017067450205572581, - "loss": 8.421, - "step": 72200 - }, - { - "epoch": 0.31, - "learning_rate": 0.0017136474187763266, - "loss": 8.4156, - "step": 72400 - }, - { - "epoch": 0.31, - "learning_rate": 0.0017205304834985446, - "loss": 8.421, - "step": 72600 - }, - { - "epoch": 0.31, - "learning_rate": 0.0017274633610356825, - "loss": 8.4256, - "step": 72800 - }, - { - "epoch": 0.31, - "learning_rate": 0.0017344114709189774, - "loss": 8.4191, - "step": 73000 - }, - { - "epoch": 0.31, - "learning_rate": 0.0017413747826704132, - "loss": 8.4015, - "step": 73200 - }, - { - "epoch": 0.32, - "learning_rate": 0.001748318335641869, - "loss": 8.4129, - "step": 73400 - }, - { - "epoch": 0.32, - "learning_rate": 0.0017553118838016506, - "loss": 8.4179, - "step": 73600 - }, - { - "epoch": 0.32, - "learning_rate": 0.0017623205421495314, - "loss": 8.4142, - "step": 73800 - }, - { - "epoch": 0.32, - "learning_rate": 0.0017693442799418986, - "loss": 8.4005, - "step": 74000 - }, - { - "epoch": 0.32, - "learning_rate": 0.0017763830663689965, - "loss": 8.41, - "step": 74200 - }, - { - "epoch": 0.32, - "learning_rate": 0.0017834368705550597, - "loss": 8.4162, - "step": 74400 - }, - { - "epoch": 0.32, - "learning_rate": 0.001790505661558443, - "loss": 8.4081, - "step": 74600 - }, - { - "epoch": 0.32, - "learning_rate": 0.0017975894083717692, - "loss": 8.4027, - "step": 74800 - }, - { - "epoch": 0.32, - "learning_rate": 0.0018046880799220469, - "loss": 8.4097, - "step": 75000 - }, - { - "epoch": 0.32, - "learning_rate": 0.0018118016450708232, - "loss": 8.4077, - "step": 75200 - }, - { - "epoch": 0.32, - "learning_rate": 0.0018189300726143137, - "loss": 8.4086, - "step": 75400 - }, - { - "epoch": 0.32, - "learning_rate": 0.0018260375781495742, - "loss": 8.4084, - "step": 75600 - }, - { - "epoch": 0.33, - "learning_rate": 0.001833195562689592, - "loss": 8.4166, - "step": 75800 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018403683157795104, - "loss": 8.4121, - "step": 76000 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018475558059559121, - "loss": 8.404, - "step": 76200 - }, - { - "epoch": 0.33, - "learning_rate": 0.001854758001690741, - "loss": 8.4035, - "step": 76400 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018619748713914318, - "loss": 8.4044, - "step": 76600 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018692063834010522, - "loss": 8.3948, - "step": 76800 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018764525059984417, - "loss": 8.3935, - "step": 77000 - }, - { - "epoch": 0.33, - "learning_rate": 0.001883713207398349, - "loss": 8.4028, - "step": 77200 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018909884557515733, - "loss": 8.401, - "step": 77400 - }, - { - "epoch": 0.33, - "learning_rate": 0.0018982417342748425, - "loss": 8.4085, - "step": 77600 - }, - { - "epoch": 0.33, - "learning_rate": 0.0019055459083963232, - "loss": 8.3979, - "step": 77800 - }, - { - "epoch": 0.34, - "learning_rate": 0.00191282790468081, - "loss": 8.4027, - "step": 78000 - }, - { - "epoch": 0.34, - "learning_rate": 0.0019201608770511077, - "loss": 8.401, - "step": 78200 - }, - { - "epoch": 0.34, - "learning_rate": 0.001927508236496343, - "loss": 8.4054, - "step": 78400 - }, - { - "epoch": 0.34, - "learning_rate": 0.0019348699507871943, - "loss": 8.408, - "step": 78600 - }, - { - "epoch": 0.34, - "learning_rate": 0.0019422459876313608, - "loss": 8.4064, - "step": 78800 - }, - { - "epoch": 0.34, - "learning_rate": 0.0019496363146737205, - "loss": 8.4066, - "step": 79000 - }, - { - "epoch": 0.34, - "learning_rate": 0.001957040899496469, - "loss": 8.4061, - "step": 79200 - }, - { - "epoch": 0.34, - "learning_rate": 0.0019644597096192574, - "loss": 8.411, - "step": 79400 - }, - { - "epoch": 0.34, - "learning_rate": 0.00197189271249934, - "loss": 8.3999, - "step": 79600 - }, - { - "epoch": 0.34, - "learning_rate": 0.001979339875531708, - "loss": 8.4002, - "step": 79800 - }, - { - "epoch": 0.34, - "learning_rate": 0.001986801166049247, - "loss": 8.4058, - "step": 80000 - }, - { - "epoch": 0.34, - "learning_rate": 0.0019942391393900083, - "loss": 8.4172, - "step": 80200 - }, - { - "epoch": 0.35, - "learning_rate": 0.002001728516400637, - "loss": 8.4089, - "step": 80400 - }, - { - "epoch": 0.35, - "learning_rate": 0.002009231922688247, - "loss": 8.4087, - "step": 80600 - }, - { - "epoch": 0.35, - "learning_rate": 0.002016749325339009, - "loss": 8.4115, - "step": 80800 - }, - { - "epoch": 0.35, - "learning_rate": 0.0020242806913776997, - "loss": 8.4317, - "step": 81000 - }, - { - "epoch": 0.35, - "learning_rate": 0.0020318259877678373, - "loss": 8.4049, - "step": 81200 - }, - { - "epoch": 0.35, - "learning_rate": 0.002039385181411845, - "loss": 8.4129, - "step": 81400 - }, - { - "epoch": 0.35, - "learning_rate": 0.002046958239151178, - "loss": 8.424, - "step": 81600 - }, - { - "epoch": 0.35, - "learning_rate": 0.0020545451277664776, - "loss": 8.4146, - "step": 81800 - }, - { - "epoch": 0.35, - "learning_rate": 0.0020621458139777164, - "loss": 8.4233, - "step": 82000 - }, - { - "epoch": 0.35, - "learning_rate": 0.002069722158008656, - "loss": 8.4199, - "step": 82200 - }, - { - "epoch": 0.35, - "learning_rate": 0.0020773502707586607, - "loss": 8.4134, - "step": 82400 - }, - { - "epoch": 0.35, - "learning_rate": 0.0020849920810694245, - "loss": 8.415, - "step": 82600 - }, - { - "epoch": 0.36, - "learning_rate": 0.0020926475554200047, - "loss": 8.4301, - "step": 82800 - }, - { - "epoch": 0.36, - "learning_rate": 0.0021003166602295217, - "loss": 8.4128, - "step": 83000 - }, - { - "epoch": 0.36, - "learning_rate": 0.002107999361857309, - "loss": 8.4284, - "step": 83200 - }, - { - "epoch": 0.36, - "learning_rate": 0.002115695626603048, - "loss": 8.422, - "step": 83400 - }, - { - "epoch": 0.36, - "learning_rate": 0.002123405420706933, - "loss": 8.4173, - "step": 83600 - }, - { - "epoch": 0.36, - "learning_rate": 0.002131128710349813, - "loss": 8.4245, - "step": 83800 - }, - { - "epoch": 0.36, - "learning_rate": 0.002138865461653332, - "loss": 8.427, - "step": 84000 - }, - { - "epoch": 0.36, - "learning_rate": 0.0021465768564397046, - "loss": 8.4287, - "step": 84200 - }, - { - "epoch": 0.36, - "learning_rate": 0.002154340362309423, - "loss": 8.4361, - "step": 84400 - }, - { - "epoch": 0.36, - "learning_rate": 0.002162078310516678, - "loss": 8.4299, - "step": 84600 - }, - { - "epoch": 0.36, - "learning_rate": 0.0021698684354139377, - "loss": 8.4447, - "step": 84800 - }, - { - "epoch": 0.37, - "learning_rate": 0.0021776718520393184, - "loss": 8.4399, - "step": 85000 - }, - { - "epoch": 0.37, - "learning_rate": 0.0021854885261629875, - "loss": 8.4469, - "step": 85200 - }, - { - "epoch": 0.37, - "learning_rate": 0.0021933184234969594, - "loss": 8.4328, - "step": 85400 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022011615096952444, - "loss": 8.4504, - "step": 85600 - }, - { - "epoch": 0.37, - "learning_rate": 0.002209017750354, - "loss": 8.4383, - "step": 85800 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022168871110116815, - "loss": 8.4472, - "step": 86000 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022247695571491945, - "loss": 8.448, - "step": 86200 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022326650541900405, - "loss": 8.4451, - "step": 86400 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022405339926133165, - "loss": 8.4415, - "step": 86600 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022484554226809986, - "loss": 8.4439, - "step": 86800 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022563897997535266, - "loss": 8.4497, - "step": 87000 - }, - { - "epoch": 0.37, - "learning_rate": 0.0022643370890266133, - "loss": 8.452, - "step": 87200 - }, - { - "epoch": 0.38, - "learning_rate": 0.0022722972556393217, - "loss": 8.4677, - "step": 87400 - }, - { - "epoch": 0.38, - "learning_rate": 0.0022802702646742383, - "loss": 8.4595, - "step": 87600 - }, - { - "epoch": 0.38, - "learning_rate": 0.002288256081157608, - "loss": 8.4595, - "step": 87800 - }, - { - "epoch": 0.38, - "learning_rate": 0.002296254670059502, - "loss": 8.4642, - "step": 88000 - }, - { - "epoch": 0.38, - "learning_rate": 0.0023042659962939603, - "loss": 8.4709, - "step": 88200 - }, - { - "epoch": 0.38, - "learning_rate": 0.0023122900247191545, - "loss": 8.4679, - "step": 88400 - }, - { - "epoch": 0.38, - "learning_rate": 0.002320286505209589, - "loss": 8.4603, - "step": 88600 - }, - { - "epoch": 0.38, - "learning_rate": 0.0023283357692971242, - "loss": 8.4662, - "step": 88800 - }, - { - "epoch": 0.38, - "learning_rate": 0.002336397629992889, - "loss": 8.4653, - "step": 89000 - }, - { - "epoch": 0.38, - "learning_rate": 0.002344472051933384, - "loss": 8.4833, - "step": 89200 - }, - { - "epoch": 0.38, - "learning_rate": 0.002352558999700007, - "loss": 8.4974, - "step": 89400 - }, - { - "epoch": 0.39, - "learning_rate": 0.002360658437819213, - "loss": 8.4881, - "step": 89600 - }, - { - "epoch": 0.39, - "learning_rate": 0.0023687703307626647, - "loss": 8.4878, - "step": 89800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0023768946429473976, - "loss": 8.4846, - "step": 90000 - }, - { - "epoch": 0.39, - "learning_rate": 0.002385031338735963, - "loss": 8.4866, - "step": 90200 - }, - { - "epoch": 0.39, - "learning_rate": 0.0023931803824365962, - "loss": 8.4847, - "step": 90400 - }, - { - "epoch": 0.39, - "learning_rate": 0.002401300900956714, - "loss": 8.4934, - "step": 90600 - }, - { - "epoch": 0.39, - "learning_rate": 0.002409474471896992, - "loss": 8.4872, - "step": 90800 - }, - { - "epoch": 0.39, - "learning_rate": 0.0024176602835290807, - "loss": 8.4977, - "step": 91000 - }, - { - "epoch": 0.39, - "learning_rate": 0.0024258582999457665, - "loss": 8.4967, - "step": 91200 - }, - { - "epoch": 0.39, - "learning_rate": 0.0024340684851863, - "loss": 8.505, - "step": 91400 - }, - { - "epoch": 0.39, - "learning_rate": 0.002442290803236551, - "loss": 8.5126, - "step": 91600 - }, - { - "epoch": 0.39, - "learning_rate": 0.0024505252180291688, - "loss": 8.5033, - "step": 91800 - }, - { - "epoch": 0.4, - "learning_rate": 0.0024587304311256865, - "loss": 8.513, - "step": 92000 - }, - { - "epoch": 0.4, - "learning_rate": 0.0024669888709567232, - "loss": 8.5082, - "step": 92200 - }, - { - "epoch": 0.4, - "learning_rate": 0.0024752592991915973, - "loss": 8.517, - "step": 92400 - }, - { - "epoch": 0.4, - "learning_rate": 0.0024835416795519205, - "loss": 8.5293, - "step": 92600 - }, - { - "epoch": 0.4, - "learning_rate": 0.002491835975706881, - "loss": 8.5094, - "step": 92800 - }, - { - "epoch": 0.4, - "learning_rate": 0.0025001421512733943, - "loss": 8.5139, - "step": 93000 - }, - { - "epoch": 0.4, - "learning_rate": 0.0025084601698162666, - "loss": 8.5099, - "step": 93200 - }, - { - "epoch": 0.4, - "learning_rate": 0.0025167899948483575, - "loss": 8.5185, - "step": 93400 - }, - { - "epoch": 0.4, - "learning_rate": 0.0025251315898307336, - "loss": 8.5143, - "step": 93600 - }, - { - "epoch": 0.4, - "learning_rate": 0.002533484918172837, - "loss": 8.5277, - "step": 93800 - }, - { - "epoch": 0.4, - "learning_rate": 0.0025418499432326358, - "loss": 8.5231, - "step": 94000 - }, - { - "epoch": 0.4, - "learning_rate": 0.002550184715947826, - "loss": 8.5436, - "step": 94200 - }, - { - "epoch": 0.41, - "learning_rate": 0.0025585729662869474, - "loss": 8.5373, - "step": 94400 - }, - { - "epoch": 0.41, - "learning_rate": 0.002566972803294579, - "loss": 8.5347, - "step": 94600 - }, - { - "epoch": 0.41, - "learning_rate": 0.00257538419012468, - "loss": 8.5544, - "step": 94800 - }, - { - "epoch": 0.41, - "learning_rate": 0.0025838070898805453, - "loss": 8.5339, - "step": 95000 - }, - { - "epoch": 0.41, - "learning_rate": 0.002592241465614974, - "loss": 8.5405, - "step": 95200 - }, - { - "epoch": 0.41, - "learning_rate": 0.002600687280330416, - "loss": 8.5501, - "step": 95400 - }, - { - "epoch": 0.41, - "learning_rate": 0.0026091444969791513, - "loss": 8.5344, - "step": 95600 - }, - { - "epoch": 0.41, - "learning_rate": 0.002617613078463441, - "loss": 8.5477, - "step": 95800 - }, - { - "epoch": 0.41, - "learning_rate": 0.002626092987635699, - "loss": 8.5443, - "step": 96000 - }, - { - "epoch": 0.41, - "learning_rate": 0.002634541703276827, - "loss": 8.5398, - "step": 96200 - }, - { - "epoch": 0.41, - "learning_rate": 0.002643044100010169, - "loss": 8.5523, - "step": 96400 - }, - { - "epoch": 0.42, - "learning_rate": 0.002651557712877833, - "loss": 8.5562, - "step": 96600 - }, - { - "epoch": 0.42, - "learning_rate": 0.0026600825045346955, - "loss": 8.5525, - "step": 96800 - }, - { - "epoch": 0.42, - "learning_rate": 0.0026686184375866043, - "loss": 8.5728, - "step": 97000 - }, - { - "epoch": 0.42, - "learning_rate": 0.002677165474590528, - "loss": 8.5631, - "step": 97200 - }, - { - "epoch": 0.42, - "learning_rate": 0.002685723578054729, - "loss": 8.5658, - "step": 97400 - }, - { - "epoch": 0.42, - "learning_rate": 0.0026942927104389334, - "loss": 8.566, - "step": 97600 - }, - { - "epoch": 0.42, - "learning_rate": 0.002702872834154482, - "loss": 8.5716, - "step": 97800 - }, - { - "epoch": 0.42, - "learning_rate": 0.0027114639115645017, - "loss": 8.5697, - "step": 98000 - }, - { - "epoch": 0.42, - "learning_rate": 0.002720022867925799, - "loss": 8.5726, - "step": 98200 - }, - { - "epoch": 0.42, - "learning_rate": 0.0027286356853246747, - "loss": 8.5718, - "step": 98400 - }, - { - "epoch": 0.42, - "learning_rate": 0.0027372593434088002, - "loss": 8.5716, - "step": 98600 - }, - { - "epoch": 0.42, - "learning_rate": 0.002745893804350339, - "loss": 8.5767, - "step": 98800 - }, - { - "epoch": 0.43, - "learning_rate": 0.00275453903027407, - "loss": 8.5957, - "step": 99000 - }, - { - "epoch": 0.43, - "learning_rate": 0.0027631949832575475, - "loss": 8.5881, - "step": 99200 - }, - { - "epoch": 0.43, - "learning_rate": 0.002771861625331276, - "loss": 8.5835, - "step": 99400 - }, - { - "epoch": 0.43, - "learning_rate": 0.002780495505581529, - "loss": 8.5905, - "step": 99600 - }, - { - "epoch": 0.43, - "learning_rate": 0.002789183358769584, - "loss": 8.5938, - "step": 99800 - }, - { - "epoch": 0.43, - "learning_rate": 0.0027978817870494, - "loss": 8.5906, - "step": 100000 - }, - { - "epoch": 0.43, - "learning_rate": 0.0028065907522651585, - "loss": 8.5938, - "step": 100200 - }, - { - "epoch": 0.43, - "learning_rate": 0.002815310216214826, - "loss": 8.5887, - "step": 100400 - }, - { - "epoch": 0.43, - "learning_rate": 0.00282404014065031, - "loss": 8.5922, - "step": 100600 - }, - { - "epoch": 0.43, - "learning_rate": 0.0028327804872776367, - "loss": 8.5926, - "step": 100800 - }, - { - "epoch": 0.43, - "learning_rate": 0.002841531217757113, - "loss": 8.5978, - "step": 101000 - }, - { - "epoch": 0.43, - "learning_rate": 0.0028502922937035, - "loss": 8.5984, - "step": 101200 - }, - { - "epoch": 0.44, - "learning_rate": 0.0028590636766861726, - "loss": 8.6046, - "step": 101400 - }, - { - "epoch": 0.44, - "learning_rate": 0.0028678453282293013, - "loss": 8.6093, - "step": 101600 - }, - { - "epoch": 0.44, - "learning_rate": 0.0028766372098120076, - "loss": 8.6083, - "step": 101800 - }, - { - "epoch": 0.44, - "learning_rate": 0.0028854392828685377, - "loss": 8.6057, - "step": 102000 - }, - { - "epoch": 0.44, - "learning_rate": 0.0028942515087884407, - "loss": 8.6146, - "step": 102200 - }, - { - "epoch": 0.44, - "learning_rate": 0.00290307384891672, - "loss": 8.608, - "step": 102400 - }, - { - "epoch": 0.44, - "learning_rate": 0.00291190626455402, - "loss": 8.6081, - "step": 102600 - }, - { - "epoch": 0.44, - "learning_rate": 0.0029207044797924615, - "loss": 8.6164, - "step": 102800 - }, - { - "epoch": 0.44, - "learning_rate": 0.0029295568802797795, - "loss": 8.6008, - "step": 103000 - }, - { - "epoch": 0.44, - "learning_rate": 0.0029384192401078115, - "loss": 8.6166, - "step": 103200 - }, - { - "epoch": 0.44, - "learning_rate": 0.00294729152040165, - "loss": 8.5962, - "step": 103400 - }, - { - "epoch": 0.45, - "learning_rate": 0.002956173682242877, - "loss": 8.6129, - "step": 103600 - }, - { - "epoch": 0.45, - "learning_rate": 0.002965065686669722, - "loss": 8.6092, - "step": 103800 - }, - { - "epoch": 0.45, - "learning_rate": 0.0029739674946772463, - "loss": 8.6189, - "step": 104000 - }, - { - "epoch": 0.45, - "learning_rate": 0.002982879067217503, - "loss": 8.612, - "step": 104200 - }, - { - "epoch": 0.45, - "learning_rate": 0.0029918003651997144, - "loss": 8.6135, - "step": 104400 - }, - { - "epoch": 0.45, - "learning_rate": 0.003000731349490442, - "loss": 8.6182, - "step": 104600 - }, - { - "epoch": 0.45, - "learning_rate": 0.0030096719809137584, - "loss": 8.6423, - "step": 104800 - }, - { - "epoch": 0.45, - "learning_rate": 0.003018622220251419, - "loss": 8.6145, - "step": 105000 - }, - { - "epoch": 0.45, - "learning_rate": 0.0030275372054660438, - "loss": 8.6249, - "step": 105200 - }, - { - "epoch": 0.45, - "learning_rate": 0.0030365064952603237, - "loss": 8.6265, - "step": 105400 - }, - { - "epoch": 0.45, - "learning_rate": 0.0030454852752588536, - "loss": 8.6304, - "step": 105600 - }, - { - "epoch": 0.45, - "learning_rate": 0.0030544735060760494, - "loss": 8.6309, - "step": 105800 - }, - { - "epoch": 0.46, - "learning_rate": 0.0030634711482848704, - "loss": 8.6258, - "step": 106000 - }, - { - "epoch": 0.46, - "learning_rate": 0.003072478162416994, - "loss": 8.6328, - "step": 106200 - }, - { - "epoch": 0.46, - "learning_rate": 0.003081494508962985, - "loss": 8.6298, - "step": 106400 - }, - { - "epoch": 0.46, - "learning_rate": 0.0030905201483724717, - "loss": 8.639, - "step": 106600 - }, - { - "epoch": 0.46, - "learning_rate": 0.0030995550410543226, - "loss": 8.6212, - "step": 106800 - }, - { - "epoch": 0.46, - "learning_rate": 0.0031085991473768114, - "loss": 8.6374, - "step": 107000 - }, - { - "epoch": 0.46, - "learning_rate": 0.003117652427667799, - "loss": 8.6326, - "step": 107200 - }, - { - "epoch": 0.46, - "learning_rate": 0.0031267148422149046, - "loss": 8.6291, - "step": 107400 - }, - { - "epoch": 0.46, - "learning_rate": 0.003135740971163656, - "loss": 8.6375, - "step": 107600 - }, - { - "epoch": 0.46, - "learning_rate": 0.0031448214897512507, - "loss": 8.6226, - "step": 107800 - }, - { - "epoch": 0.46, - "learning_rate": 0.003153911023417371, - "loss": 8.6359, - "step": 108000 - }, - { - "epoch": 0.46, - "learning_rate": 0.003163009532290608, - "loss": 8.6491, - "step": 108200 - }, - { - "epoch": 0.47, - "learning_rate": 0.0031721169764601844, - "loss": 8.6405, - "step": 108400 - }, - { - "epoch": 0.47, - "learning_rate": 0.0031812333159761293, - "loss": 8.632, - "step": 108600 - }, - { - "epoch": 0.47, - "learning_rate": 0.003190358510849451, - "loss": 8.6363, - "step": 108800 - }, - { - "epoch": 0.47, - "learning_rate": 0.0031994925210523124, - "loss": 8.6316, - "step": 109000 - }, - { - "epoch": 0.47, - "learning_rate": 0.0032086353065182106, - "loss": 8.6423, - "step": 109200 - }, - { - "epoch": 0.47, - "learning_rate": 0.003217786827142146, - "loss": 8.6274, - "step": 109400 - }, - { - "epoch": 0.47, - "learning_rate": 0.003226947042780804, - "loss": 8.6366, - "step": 109600 - }, - { - "epoch": 0.47, - "learning_rate": 0.003236070047437989, - "loss": 8.6388, - "step": 109800 - }, - { - "epoch": 0.47, - "learning_rate": 0.003245247489550804, - "loss": 8.6364, - "step": 110000 - }, - { - "epoch": 0.47, - "learning_rate": 0.0032544335062216403, - "loss": 8.6422, - "step": 110200 - }, - { - "epoch": 0.47, - "learning_rate": 0.0032636280571558636, - "loss": 8.618, - "step": 110400 - }, - { - "epoch": 0.48, - "learning_rate": 0.003272831102021408, - "loss": 8.6276, - "step": 110600 - }, - { - "epoch": 0.48, - "learning_rate": 0.003282042600448948, - "loss": 8.6454, - "step": 110800 - }, - { - "epoch": 0.48, - "learning_rate": 0.0032912625120320753, - "loss": 8.6388, - "step": 111000 - }, - { - "epoch": 0.48, - "learning_rate": 0.0033004907963274733, - "loss": 8.6339, - "step": 111200 - }, - { - "epoch": 0.48, - "learning_rate": 0.003309727412855108, - "loss": 8.6243, - "step": 111400 - }, - { - "epoch": 0.48, - "learning_rate": 0.0033189723210983865, - "loss": 8.6264, - "step": 111600 - }, - { - "epoch": 0.48, - "learning_rate": 0.0033282254805043487, - "loss": 8.6401, - "step": 111800 - }, - { - "epoch": 0.48, - "learning_rate": 0.003337440523277331, - "loss": 8.6366, - "step": 112000 - }, - { - "epoch": 0.48, - "learning_rate": 0.0033467100224565524, - "loss": 8.6338, - "step": 112200 - }, - { - "epoch": 0.48, - "learning_rate": 0.003355987651126521, - "loss": 8.6377, - "step": 112400 - }, - { - "epoch": 0.48, - "learning_rate": 0.0033652733685907424, - "loss": 8.6414, - "step": 112600 - }, - { - "epoch": 0.48, - "learning_rate": 0.0033745671341172496, - "loss": 8.6264, - "step": 112800 - }, - { - "epoch": 0.49, - "learning_rate": 0.0033838689069387654, - "loss": 8.6289, - "step": 113000 - }, - { - "epoch": 0.49, - "learning_rate": 0.00339317864625289, - "loss": 8.6244, - "step": 113200 - }, - { - "epoch": 0.49, - "learning_rate": 0.003402496311222283, - "loss": 8.6287, - "step": 113400 - }, - { - "epoch": 0.49, - "learning_rate": 0.0034118218609748346, - "loss": 8.6251, - "step": 113600 - }, - { - "epoch": 0.49, - "learning_rate": 0.003421155254603846, - "loss": 8.6214, - "step": 113800 - }, - { - "epoch": 0.49, - "learning_rate": 0.0034304964511682147, - "loss": 8.6303, - "step": 114000 - }, - { - "epoch": 0.49, - "learning_rate": 0.0034398454096926092, - "loss": 8.6369, - "step": 114200 - }, - { - "epoch": 0.49, - "learning_rate": 0.003449202089167651, - "loss": 8.6236, - "step": 114400 - }, - { - "epoch": 0.49, - "learning_rate": 0.0034585196077173436, - "loss": 8.6251, - "step": 114600 - }, - { - "epoch": 0.49, - "learning_rate": 0.003467891567838331, - "loss": 8.6295, - "step": 114800 - }, - { - "epoch": 0.49, - "learning_rate": 0.003477271125884973, - "loss": 8.6219, - "step": 115000 - }, - { - "epoch": 0.5, - "learning_rate": 0.0034866582407136653, - "loss": 8.6271, - "step": 115200 - }, - { - "epoch": 0.5, - "learning_rate": 0.003496052871147656, - "loss": 8.6372, - "step": 115400 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035054549759772242, - "loss": 8.6238, - "step": 115600 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035148645139598637, - "loss": 8.6207, - "step": 115800 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035242814438204637, - "loss": 8.6099, - "step": 116000 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035337057242514833, - "loss": 8.6142, - "step": 116200 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035431373139131472, - "loss": 8.6033, - "step": 116400 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035525761714336104, - "loss": 8.6178, - "step": 116600 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035619750070819923, - "loss": 8.6138, - "step": 116800 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035714282402552104, - "loss": 8.6143, - "step": 117000 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035808886171885554, - "loss": 8.6034, - "step": 117200 - }, - { - "epoch": 0.5, - "learning_rate": 0.0035903560963839124, - "loss": 8.6156, - "step": 117400 - }, - { - "epoch": 0.51, - "learning_rate": 0.0035998306363120057, - "loss": 8.6148, - "step": 117600 - }, - { - "epoch": 0.51, - "learning_rate": 0.0036093121954125906, - "loss": 8.6039, - "step": 117800 - }, - { - "epoch": 0.51, - "learning_rate": 0.003618800732094636, - "loss": 8.6107, - "step": 118000 - }, - { - "epoch": 0.51, - "learning_rate": 0.0036282962047364973, - "loss": 8.6094, - "step": 118200 - }, - { - "epoch": 0.51, - "learning_rate": 0.0036377985716861084, - "loss": 8.616, - "step": 118400 - }, - { - "epoch": 0.51, - "learning_rate": 0.003647307791261164, - "loss": 8.6135, - "step": 118600 - }, - { - "epoch": 0.51, - "learning_rate": 0.003656823821749292, - "loss": 8.6062, - "step": 118800 - }, - { - "epoch": 0.51, - "learning_rate": 0.0036662989906407328, - "loss": 8.6029, - "step": 119000 - }, - { - "epoch": 0.51, - "learning_rate": 0.0036758284841655496, - "loss": 8.6011, - "step": 119200 - }, - { - "epoch": 0.51, - "learning_rate": 0.0036853646634968946, - "loss": 8.5993, - "step": 119400 - }, - { - "epoch": 0.51, - "learning_rate": 0.003694907486804143, - "loss": 8.6029, - "step": 119600 - }, - { - "epoch": 0.51, - "learning_rate": 0.00370445691222752, - "loss": 8.6018, - "step": 119800 - }, - { - "epoch": 0.52, - "learning_rate": 0.003714012897878298, - "loss": 8.5978, - "step": 120000 - }, - { - "epoch": 0.52, - "learning_rate": 0.0037235754018389664, - "loss": 8.5986, - "step": 120200 - }, - { - "epoch": 0.52, - "learning_rate": 0.0037331443821634266, - "loss": 8.6062, - "step": 120400 - }, - { - "epoch": 0.52, - "learning_rate": 0.0037427197968771695, - "loss": 8.5854, - "step": 120600 - }, - { - "epoch": 0.52, - "learning_rate": 0.0037523016039774605, - "loss": 8.5959, - "step": 120800 - }, - { - "epoch": 0.52, - "learning_rate": 0.003761841804919297, - "loss": 8.6054, - "step": 121000 - }, - { - "epoch": 0.52, - "learning_rate": 0.00377143623923569, - "loss": 8.5871, - "step": 121200 - }, - { - "epoch": 0.52, - "learning_rate": 0.0037810369399734253, - "loss": 8.5885, - "step": 121400 - }, - { - "epoch": 0.52, - "learning_rate": 0.0037906438650188523, - "loss": 8.5805, - "step": 121600 - }, - { - "epoch": 0.52, - "learning_rate": 0.0038002569722310163, - "loss": 8.5889, - "step": 121800 - }, - { - "epoch": 0.52, - "learning_rate": 0.0038098762194418477, - "loss": 8.5866, - "step": 122000 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038195015644563388, - "loss": 8.5782, - "step": 122200 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038291329650527338, - "loss": 8.579, - "step": 122400 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038387703789827194, - "loss": 8.5773, - "step": 122600 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038484137639716006, - "loss": 8.582, - "step": 122800 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038580148164719733, - "loss": 8.5778, - "step": 123000 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038676699873231536, - "loss": 8.5789, - "step": 123200 - }, - { - "epoch": 0.53, - "learning_rate": 0.0038773310024645593, - "loss": 8.578, - "step": 123400 - }, - { - "epoch": 0.53, - "learning_rate": 0.003886997819517974, - "loss": 8.5609, - "step": 123600 - }, - { - "epoch": 0.53, - "learning_rate": 0.003896670396079725, - "loss": 8.5707, - "step": 123800 - }, - { - "epoch": 0.53, - "learning_rate": 0.003906300284101649, - "loss": 8.5732, - "step": 124000 - }, - { - "epoch": 0.53, - "learning_rate": 0.003915984224100703, - "loss": 8.5731, - "step": 124200 - }, - { - "epoch": 0.53, - "learning_rate": 0.003925673796458692, - "loss": 8.5496, - "step": 124400 - }, - { - "epoch": 0.54, - "learning_rate": 0.0039353689586721285, - "loss": 8.5692, - "step": 124600 - }, - { - "epoch": 0.54, - "learning_rate": 0.0039450696682130065, - "loss": 8.5704, - "step": 124800 - }, - { - "epoch": 0.54, - "learning_rate": 0.003954775882528979, - "loss": 8.5663, - "step": 125000 - }, - { - "epoch": 0.54, - "learning_rate": 0.003964487559043562, - "loss": 8.5697, - "step": 125200 - }, - { - "epoch": 0.54, - "learning_rate": 0.003974204655156306, - "loss": 8.5784, - "step": 125400 - }, - { - "epoch": 0.54, - "learning_rate": 0.003983927128242989, - "loss": 8.566, - "step": 125600 - }, - { - "epoch": 0.54, - "learning_rate": 0.003993654935655802, - "loss": 8.5501, - "step": 125800 - }, - { - "epoch": 0.54, - "learning_rate": 0.004003388034723539, - "loss": 8.5701, - "step": 126000 - }, - { - "epoch": 0.54, - "learning_rate": 0.004013077678025505, - "loss": 8.563, - "step": 126200 - }, - { - "epoch": 0.54, - "learning_rate": 0.00402282120637189, - "loss": 8.5541, - "step": 126400 - }, - { - "epoch": 0.54, - "learning_rate": 0.004032569898434814, - "loss": 8.5581, - "step": 126600 - }, - { - "epoch": 0.54, - "learning_rate": 0.004042323711451458, - "loss": 8.5513, - "step": 126800 - }, - { - "epoch": 0.55, - "learning_rate": 0.004052082602636542, - "loss": 8.5474, - "step": 127000 - }, - { - "epoch": 0.55, - "learning_rate": 0.004061846529182508, - "loss": 8.5427, - "step": 127200 - }, - { - "epoch": 0.55, - "learning_rate": 0.004071615448259712, - "loss": 8.5414, - "step": 127400 - }, - { - "epoch": 0.55, - "learning_rate": 0.00408138931701661, - "loss": 8.5516, - "step": 127600 - }, - { - "epoch": 0.55, - "learning_rate": 0.004091168092579948, - "loss": 8.5422, - "step": 127800 - }, - { - "epoch": 0.55, - "learning_rate": 0.004100951732054943, - "loss": 8.5457, - "step": 128000 - }, - { - "epoch": 0.55, - "learning_rate": 0.004110740192525482, - "loss": 8.5488, - "step": 128200 - }, - { - "epoch": 0.55, - "learning_rate": 0.0041205334310543025, - "loss": 8.5424, - "step": 128400 - }, - { - "epoch": 0.55, - "learning_rate": 0.004130331404683179, - "loss": 8.5408, - "step": 128600 - }, - { - "epoch": 0.55, - "learning_rate": 0.004140134070433124, - "loss": 8.5473, - "step": 128800 - }, - { - "epoch": 0.55, - "learning_rate": 0.004149892337236666, - "loss": 8.5442, - "step": 129000 - }, - { - "epoch": 0.56, - "learning_rate": 0.004159704235286162, - "loss": 8.5338, - "step": 129200 - }, - { - "epoch": 0.56, - "learning_rate": 0.004169520696612262, - "loss": 8.5434, - "step": 129400 - }, - { - "epoch": 0.56, - "learning_rate": 0.004179341678154871, - "loss": 8.5343, - "step": 129600 - }, - { - "epoch": 0.56, - "learning_rate": 0.0041891671368340785, - "loss": 8.5373, - "step": 129800 - }, - { - "epoch": 0.56, - "learning_rate": 0.0041989970295503234, - "loss": 8.5275, - "step": 130000 - }, - { - "epoch": 0.56, - "learning_rate": 0.004208831313184605, - "loss": 8.541, - "step": 130200 - }, - { - "epoch": 0.56, - "learning_rate": 0.00421866994459865, - "loss": 8.5469, - "step": 130400 - }, - { - "epoch": 0.56, - "learning_rate": 0.004228512880635122, - "loss": 8.5235, - "step": 130600 - }, - { - "epoch": 0.56, - "learning_rate": 0.004238360078117803, - "loss": 8.5301, - "step": 130800 - }, - { - "epoch": 0.56, - "learning_rate": 0.004248211493851777, - "loss": 8.5392, - "step": 131000 - }, - { - "epoch": 0.56, - "learning_rate": 0.004258067084623627, - "loss": 8.5268, - "step": 131200 - }, - { - "epoch": 0.56, - "learning_rate": 0.00426792680720162, - "loss": 8.5277, - "step": 131400 - }, - { - "epoch": 0.57, - "learning_rate": 0.004277790618335904, - "loss": 8.5231, - "step": 131600 - }, - { - "epoch": 0.57, - "learning_rate": 0.004287658474758687, - "loss": 8.5297, - "step": 131800 - }, - { - "epoch": 0.57, - "learning_rate": 0.0042975303331844366, - "loss": 8.5147, - "step": 132000 - }, - { - "epoch": 0.57, - "learning_rate": 0.004307406150310058, - "loss": 8.5194, - "step": 132200 - }, - { - "epoch": 0.57, - "learning_rate": 0.0043172858828151054, - "loss": 8.5141, - "step": 132400 - }, - { - "epoch": 0.57, - "learning_rate": 0.004327169487361941, - "loss": 8.508, - "step": 132600 - }, - { - "epoch": 0.57, - "learning_rate": 0.004337007473977664, - "loss": 8.5237, - "step": 132800 - }, - { - "epoch": 0.57, - "learning_rate": 0.004346898673708784, - "loss": 8.5104, - "step": 133000 - }, - { - "epoch": 0.57, - "learning_rate": 0.00435679361558464, - "loss": 8.5094, - "step": 133200 - }, - { - "epoch": 0.57, - "learning_rate": 0.004366692256200889, - "loss": 8.5082, - "step": 133400 - }, - { - "epoch": 0.57, - "learning_rate": 0.004376594552136958, - "loss": 8.5097, - "step": 133600 - }, - { - "epoch": 0.57, - "learning_rate": 0.004386500459956245, - "loss": 8.4974, - "step": 133800 - }, - { - "epoch": 0.58, - "learning_rate": 0.004396409936206303, - "loss": 8.5038, - "step": 134000 - }, - { - "epoch": 0.58, - "learning_rate": 0.0044063229374190285, - "loss": 8.5006, - "step": 134200 - }, - { - "epoch": 0.58, - "learning_rate": 0.004416239420110862, - "loss": 8.4979, - "step": 134400 - }, - { - "epoch": 0.58, - "learning_rate": 0.004426159340782965, - "loss": 8.5004, - "step": 134600 - }, - { - "epoch": 0.58, - "learning_rate": 0.004436082655921426, - "loss": 8.5025, - "step": 134800 - }, - { - "epoch": 0.58, - "learning_rate": 0.0044460093219974356, - "loss": 8.4926, - "step": 135000 - }, - { - "epoch": 0.58, - "learning_rate": 0.004455939295467492, - "loss": 8.5013, - "step": 135200 - }, - { - "epoch": 0.58, - "learning_rate": 0.004465872532773585, - "loss": 8.4835, - "step": 135400 - }, - { - "epoch": 0.58, - "learning_rate": 0.004475808990343381, - "loss": 8.4755, - "step": 135600 - }, - { - "epoch": 0.58, - "learning_rate": 0.004485698918589326, - "loss": 8.4965, - "step": 135800 - }, - { - "epoch": 0.58, - "learning_rate": 0.0044956416703563285, - "loss": 8.4749, - "step": 136000 - }, - { - "epoch": 0.59, - "learning_rate": 0.004505587511804164, - "loss": 8.4748, - "step": 136200 - }, - { - "epoch": 0.59, - "learning_rate": 0.004515536399305215, - "loss": 8.4904, - "step": 136400 - }, - { - "epoch": 0.59, - "learning_rate": 0.004525488289218503, - "loss": 8.4706, - "step": 136600 - }, - { - "epoch": 0.59, - "learning_rate": 0.0045354431378898784, - "loss": 8.4638, - "step": 136800 - }, - { - "epoch": 0.59, - "learning_rate": 0.0045454009016522145, - "loss": 8.4697, - "step": 137000 - }, - { - "epoch": 0.59, - "learning_rate": 0.004555361536825594, - "loss": 8.4677, - "step": 137200 - }, - { - "epoch": 0.59, - "learning_rate": 0.00456532499971751, - "loss": 8.4774, - "step": 137400 - }, - { - "epoch": 0.59, - "learning_rate": 0.004575291246623046, - "loss": 8.4742, - "step": 137600 - }, - { - "epoch": 0.59, - "learning_rate": 0.0045852602338250755, - "loss": 8.4534, - "step": 137800 - }, - { - "epoch": 0.59, - "learning_rate": 0.004595231917594454, - "loss": 8.4585, - "step": 138000 - }, - { - "epoch": 0.59, - "learning_rate": 0.004605206254190205, - "loss": 8.4567, - "step": 138200 - }, - { - "epoch": 0.59, - "learning_rate": 0.004615183199859717, - "loss": 8.4623, - "step": 138400 - }, - { - "epoch": 0.6, - "learning_rate": 0.0046251627108389315, - "loss": 8.4622, - "step": 138600 - }, - { - "epoch": 0.6, - "learning_rate": 0.004635144743352541, - "loss": 8.462, - "step": 138800 - }, - { - "epoch": 0.6, - "learning_rate": 0.004645129253614176, - "loss": 8.4598, - "step": 139000 - }, - { - "epoch": 0.6, - "learning_rate": 0.004655116197826597, - "loss": 8.4574, - "step": 139200 - }, - { - "epoch": 0.6, - "learning_rate": 0.004665105532181887, - "loss": 8.4479, - "step": 139400 - }, - { - "epoch": 0.6, - "learning_rate": 0.004675097212861648, - "loss": 8.4439, - "step": 139600 - }, - { - "epoch": 0.6, - "learning_rate": 0.004685091196037188, - "loss": 8.4371, - "step": 139800 - }, - { - "epoch": 0.6, - "learning_rate": 0.004695037451114673, - "loss": 8.4446, - "step": 140000 - }, - { - "epoch": 0.6, - "learning_rate": 0.004705035896790534, - "loss": 8.443, - "step": 140200 - }, - { - "epoch": 0.6, - "learning_rate": 0.004715036513635584, - "loss": 8.4508, - "step": 140400 - }, - { - "epoch": 0.6, - "learning_rate": 0.004725039257781931, - "loss": 8.4526, - "step": 140600 - }, - { - "epoch": 0.61, - "learning_rate": 0.0047350440853523535, - "loss": 8.4303, - "step": 140800 - }, - { - "epoch": 0.61, - "learning_rate": 0.0047450509524604905, - "loss": 8.4566, - "step": 141000 - }, - { - "epoch": 0.61, - "learning_rate": 0.004755059815211036, - "loss": 8.4349, - "step": 141200 - }, - { - "epoch": 0.61, - "learning_rate": 0.004765070629699923, - "loss": 8.4244, - "step": 141400 - }, - { - "epoch": 0.61, - "learning_rate": 0.004775083352014535, - "loss": 8.4385, - "step": 141600 - }, - { - "epoch": 0.61, - "learning_rate": 0.004785097938233878, - "loss": 8.4384, - "step": 141800 - }, - { - "epoch": 0.61, - "learning_rate": 0.004795064257943296, - "loss": 8.4388, - "step": 142000 - }, - { - "epoch": 0.61, - "learning_rate": 0.00480508243140573, - "loss": 8.4269, - "step": 142200 - }, - { - "epoch": 0.61, - "learning_rate": 0.004815102337181376, - "loss": 8.4318, - "step": 142400 - }, - { - "epoch": 0.61, - "learning_rate": 0.0048251239313177345, - "loss": 8.4194, - "step": 142600 - }, - { - "epoch": 0.61, - "learning_rate": 0.0048351471698549016, - "loss": 8.4253, - "step": 142800 - }, - { - "epoch": 0.61, - "learning_rate": 0.0048451720088257474, - "loss": 8.4253, - "step": 143000 - }, - { - "epoch": 0.62, - "learning_rate": 0.0048551984042561405, - "loss": 8.4349, - "step": 143200 - }, - { - "epoch": 0.62, - "learning_rate": 0.004865226312165102, - "loss": 8.4241, - "step": 143400 - }, - { - "epoch": 0.62, - "learning_rate": 0.004875255688565038, - "loss": 8.4197, - "step": 143600 - }, - { - "epoch": 0.62, - "learning_rate": 0.004885286489461897, - "loss": 8.4201, - "step": 143800 - }, - { - "epoch": 0.62, - "learning_rate": 0.004895318670855387, - "loss": 8.409, - "step": 144000 - }, - { - "epoch": 0.62, - "learning_rate": 0.004905302017898024, - "loss": 8.4155, - "step": 144200 - }, - { - "epoch": 0.62, - "learning_rate": 0.004915336821906966, - "loss": 8.4233, - "step": 144400 - }, - { - "epoch": 0.62, - "learning_rate": 0.004925372874596202, - "loss": 8.4047, - "step": 144600 - }, - { - "epoch": 0.62, - "learning_rate": 0.004935410131942399, - "loss": 8.4082, - "step": 144800 - }, - { - "epoch": 0.62, - "learning_rate": 0.004945448549916944, - "loss": 8.4056, - "step": 145000 - }, - { - "epoch": 0.62, - "learning_rate": 0.00495548808448613, - "loss": 8.4157, - "step": 145200 - }, - { - "epoch": 0.62, - "learning_rate": 0.004965528691611352, - "loss": 8.4034, - "step": 145400 - }, - { - "epoch": 0.63, - "learning_rate": 0.0049755703272493035, - "loss": 8.4053, - "step": 145600 - }, - { - "epoch": 0.63, - "learning_rate": 0.004985612947352163, - "loss": 8.3967, - "step": 145800 - }, - { - "epoch": 0.63, - "learning_rate": 0.004995656507867792, - "loss": 8.3975, - "step": 146000 - }, - { - "epoch": 0.63, - "learning_rate": 0.005005700964739926, - "loss": 8.398, - "step": 146200 - }, - { - "epoch": 0.63, - "learning_rate": 0.005015746273908371, - "loss": 8.3969, - "step": 146400 - }, - { - "epoch": 0.63, - "learning_rate": 0.0050257421587846015, - "loss": 8.3947, - "step": 146600 - }, - { - "epoch": 0.63, - "learning_rate": 0.005035789036639117, - "loss": 8.3868, - "step": 146800 - }, - { - "epoch": 0.63, - "learning_rate": 0.0050458366348080565, - "loss": 8.3901, - "step": 147000 - }, - { - "epoch": 0.63, - "learning_rate": 0.005055884909217449, - "loss": 8.3809, - "step": 147200 - }, - { - "epoch": 0.63, - "learning_rate": 0.00506593381579035, - "loss": 8.3774, - "step": 147400 - }, - { - "epoch": 0.63, - "learning_rate": 0.005075983310447046, - "loss": 8.3868, - "step": 147600 - }, - { - "epoch": 0.64, - "learning_rate": 0.005086033349105245, - "loss": 8.3922, - "step": 147800 - }, - { - "epoch": 0.64, - "learning_rate": 0.005096083887680262, - "loss": 8.3754, - "step": 148000 - }, - { - "epoch": 0.64, - "learning_rate": 0.005106134882085229, - "loss": 8.3799, - "step": 148200 - }, - { - "epoch": 0.64, - "learning_rate": 0.005116186288231269, - "loss": 8.3862, - "step": 148400 - }, - { - "epoch": 0.64, - "learning_rate": 0.005126238062027705, - "loss": 8.3862, - "step": 148600 - }, - { - "epoch": 0.64, - "learning_rate": 0.005136290159382248, - "loss": 8.3797, - "step": 148800 - }, - { - "epoch": 0.64, - "learning_rate": 0.005146342536201182, - "loss": 8.361, - "step": 149000 - }, - { - "epoch": 0.64, - "learning_rate": 0.0051563951483895734, - "loss": 8.3778, - "step": 149200 - }, - { - "epoch": 0.64, - "learning_rate": 0.005166447951851452, - "loss": 8.366, - "step": 149400 - }, - { - "epoch": 0.64, - "learning_rate": 0.005176500902490011, - "loss": 8.3735, - "step": 149600 - }, - { - "epoch": 0.64, - "learning_rate": 0.005186553956207795, - "loss": 8.3626, - "step": 149800 - }, - { - "epoch": 0.64, - "learning_rate": 0.005196607068906898, - "loss": 8.3754, - "step": 150000 - }, - { - "epoch": 0.65, - "learning_rate": 0.0052066601964891575, - "loss": 8.3638, - "step": 150200 - }, - { - "epoch": 0.65, - "learning_rate": 0.005216713294856342, - "loss": 8.364, - "step": 150400 - }, - { - "epoch": 0.65, - "learning_rate": 0.00522671605504039, - "loss": 8.3722, - "step": 150600 - }, - { - "epoch": 0.65, - "learning_rate": 0.005236768963380187, - "loss": 8.3665, - "step": 150800 - }, - { - "epoch": 0.65, - "learning_rate": 0.005246821710432247, - "loss": 8.3738, - "step": 151000 - }, - { - "epoch": 0.65, - "learning_rate": 0.005256874252100009, - "loss": 8.3705, - "step": 151200 - }, - { - "epoch": 0.65, - "learning_rate": 0.005266926544287812, - "loss": 8.3604, - "step": 151400 - }, - { - "epoch": 0.65, - "learning_rate": 0.00527697854290109, - "loss": 8.3604, - "step": 151600 - }, - { - "epoch": 0.65, - "learning_rate": 0.00528703020384657, - "loss": 8.3627, - "step": 151800 - }, - { - "epoch": 0.65, - "learning_rate": 0.005297081483032449, - "loss": 8.3571, - "step": 152000 - }, - { - "epoch": 0.65, - "learning_rate": 0.005307132336368609, - "loss": 8.3635, - "step": 152200 - }, - { - "epoch": 0.65, - "learning_rate": 0.005317182719766797, - "loss": 8.355, - "step": 152400 - }, - { - "epoch": 0.66, - "learning_rate": 0.005327182341145505, - "loss": 8.3507, - "step": 152600 - }, - { - "epoch": 0.66, - "learning_rate": 0.005337231655311616, - "loss": 8.3495, - "step": 152800 - }, - { - "epoch": 0.66, - "learning_rate": 0.005347280367508536, - "loss": 8.355, - "step": 153000 - }, - { - "epoch": 0.66, - "learning_rate": 0.0053573284336574, - "loss": 8.3572, - "step": 153200 - }, - { - "epoch": 0.66, - "learning_rate": 0.0053673758096821796, - "loss": 8.3379, - "step": 153400 - }, - { - "epoch": 0.66, - "learning_rate": 0.00537742245150988, - "loss": 8.3463, - "step": 153600 - }, - { - "epoch": 0.66, - "learning_rate": 0.005387468315070715, - "loss": 8.3348, - "step": 153800 - }, - { - "epoch": 0.66, - "learning_rate": 0.005397513356298321, - "loss": 8.3424, - "step": 154000 - }, - { - "epoch": 0.66, - "learning_rate": 0.005407557531129937, - "loss": 8.328, - "step": 154200 - }, - { - "epoch": 0.66, - "learning_rate": 0.005417600795506608, - "loss": 8.3338, - "step": 154400 - }, - { - "epoch": 0.66, - "learning_rate": 0.0054275928962712475, - "loss": 8.3438, - "step": 154600 - }, - { - "epoch": 0.67, - "learning_rate": 0.005437634212679688, - "loss": 8.3409, - "step": 154800 - }, - { - "epoch": 0.67, - "learning_rate": 0.005447674486701264, - "loss": 8.3349, - "step": 155000 - }, - { - "epoch": 0.67, - "learning_rate": 0.005457713674294124, - "loss": 8.3268, - "step": 155200 - }, - { - "epoch": 0.67, - "learning_rate": 0.00546775173142119, - "loss": 8.3379, - "step": 155400 - }, - { - "epoch": 0.67, - "learning_rate": 0.005477788614050335, - "loss": 8.3365, - "step": 155600 - }, - { - "epoch": 0.67, - "learning_rate": 0.00548782427815459, - "loss": 8.3309, - "step": 155800 - }, - { - "epoch": 0.67, - "learning_rate": 0.005497858679712329, - "loss": 8.3113, - "step": 156000 - }, - { - "epoch": 0.67, - "learning_rate": 0.005507891774707462, - "loss": 8.3367, - "step": 156200 - }, - { - "epoch": 0.67, - "learning_rate": 0.005517923519129636, - "loss": 8.3225, - "step": 156400 - }, - { - "epoch": 0.67, - "learning_rate": 0.005527953868974416, - "loss": 8.3239, - "step": 156600 - }, - { - "epoch": 0.67, - "learning_rate": 0.005537982780243487, - "loss": 8.3119, - "step": 156800 - }, - { - "epoch": 0.67, - "learning_rate": 0.005547960075561988, - "loss": 8.3104, - "step": 157000 - }, - { - "epoch": 0.68, - "learning_rate": 0.005557985985452301, - "loss": 8.3146, - "step": 157200 - }, - { - "epoch": 0.68, - "learning_rate": 0.005568010325030472, - "loss": 8.3196, - "step": 157400 - }, - { - "epoch": 0.68, - "learning_rate": 0.005578033050324548, - "loss": 8.3283, - "step": 157600 - }, - { - "epoch": 0.68, - "learning_rate": 0.005588054117369661, - "loss": 8.3336, - "step": 157800 - }, - { - "epoch": 0.68, - "learning_rate": 0.005598073482208217, - "loss": 8.3147, - "step": 158000 - }, - { - "epoch": 0.68, - "learning_rate": 0.0056080911008900836, - "loss": 8.3081, - "step": 158200 - }, - { - "epoch": 0.68, - "learning_rate": 0.005618106929472792, - "loss": 8.3133, - "step": 158400 - }, - { - "epoch": 0.68, - "learning_rate": 0.005628120924021727, - "loss": 8.314, - "step": 158600 - }, - { - "epoch": 0.68, - "learning_rate": 0.005638133040610314, - "loss": 8.3186, - "step": 158800 - }, - { - "epoch": 0.68, - "learning_rate": 0.005648143235320219, - "loss": 8.2955, - "step": 159000 - }, - { - "epoch": 0.68, - "learning_rate": 0.005658101428059457, - "loss": 8.3044, - "step": 159200 - }, - { - "epoch": 0.68, - "learning_rate": 0.005668107657448549, - "loss": 8.302, - "step": 159400 - }, - { - "epoch": 0.69, - "learning_rate": 0.005678111833474749, - "loss": 8.3044, - "step": 159600 - }, - { - "epoch": 0.69, - "learning_rate": 0.005688113912254554, - "loss": 8.2975, - "step": 159800 - }, - { - "epoch": 0.69, - "learning_rate": 0.005698113849913661, - "loss": 8.3034, - "step": 160000 - }, - { - "epoch": 0.69, - "learning_rate": 0.0057081116025871565, - "loss": 8.3105, - "step": 160200 - }, - { - "epoch": 0.69, - "learning_rate": 0.005718107126419714, - "loss": 8.3043, - "step": 160400 - }, - { - "epoch": 0.69, - "learning_rate": 0.005728100377565787, - "loss": 8.3041, - "step": 160600 - }, - { - "epoch": 0.69, - "learning_rate": 0.00573809131218979, - "loss": 8.3044, - "step": 160800 - }, - { - "epoch": 0.69, - "learning_rate": 0.005748079886466307, - "loss": 8.309, - "step": 161000 - }, - { - "epoch": 0.69, - "learning_rate": 0.005758016131782519, - "loss": 8.3032, - "step": 161200 - }, - { - "epoch": 0.69, - "learning_rate": 0.005767999866278191, - "loss": 8.2924, - "step": 161400 - }, - { - "epoch": 0.69, - "learning_rate": 0.00577798110923195, - "loss": 8.3013, - "step": 161600 - }, - { - "epoch": 0.7, - "learning_rate": 0.005787959816860888, - "loss": 8.2932, - "step": 161800 - }, - { - "epoch": 0.7, - "learning_rate": 0.005797935945393221, - "loss": 8.3004, - "step": 162000 - }, - { - "epoch": 0.7, - "learning_rate": 0.005807909451068479, - "loss": 8.3021, - "step": 162200 - }, - { - "epoch": 0.7, - "learning_rate": 0.005817880290137692, - "loss": 8.2853, - "step": 162400 - }, - { - "epoch": 0.7, - "learning_rate": 0.005827848418863589, - "loss": 8.2939, - "step": 162600 - }, - { - "epoch": 0.7, - "learning_rate": 0.005837813793520793, - "loss": 8.2853, - "step": 162800 - }, - { - "epoch": 0.7, - "learning_rate": 0.005847776370396001, - "loss": 8.2891, - "step": 163000 - }, - { - "epoch": 0.7, - "learning_rate": 0.005857686314251688, - "loss": 8.289, - "step": 163200 - }, - { - "epoch": 0.7, - "learning_rate": 0.0058676431790067995, - "loss": 8.2947, - "step": 163400 - }, - { - "epoch": 0.7, - "learning_rate": 0.005877597115132765, - "loss": 8.2841, - "step": 163600 - }, - { - "epoch": 0.7, - "learning_rate": 0.005887548078966459, - "loss": 8.2965, - "step": 163800 - }, - { - "epoch": 0.7, - "learning_rate": 0.005897496026857796, - "loss": 8.2889, - "step": 164000 - }, - { - "epoch": 0.71, - "learning_rate": 0.005907440915169918, - "loss": 8.2831, - "step": 164200 - }, - { - "epoch": 0.71, - "learning_rate": 0.005917382700279389, - "loss": 8.2909, - "step": 164400 - }, - { - "epoch": 0.71, - "learning_rate": 0.005927321338576383, - "loss": 8.2886, - "step": 164600 - }, - { - "epoch": 0.71, - "learning_rate": 0.005937256786464883, - "loss": 8.2708, - "step": 164800 - }, - { - "epoch": 0.71, - "learning_rate": 0.00594718900036286, - "loss": 8.2819, - "step": 165000 - }, - { - "epoch": 0.71, - "learning_rate": 0.005957117936702476, - "loss": 8.2866, - "step": 165200 - }, - { - "epoch": 0.71, - "learning_rate": 0.005966993932187419, - "loss": 8.2745, - "step": 165400 - }, - { - "epoch": 0.71, - "learning_rate": 0.005976916199696024, - "loss": 8.275, - "step": 165600 - }, - { - "epoch": 0.71, - "learning_rate": 0.0059868350592473555, - "loss": 8.2783, - "step": 165800 - }, - { - "epoch": 0.71, - "learning_rate": 0.005996750467332155, - "loss": 8.2911, - "step": 166000 - }, - { - "epoch": 0.71, - "learning_rate": 0.006006662380456304, - "loss": 8.2759, - "step": 166200 - }, - { - "epoch": 0.72, - "learning_rate": 0.0060165707551410085, - "loss": 8.2685, - "step": 166400 - }, - { - "epoch": 0.72, - "learning_rate": 0.006026475547923, - "loss": 8.2824, - "step": 166600 - }, - { - "epoch": 0.72, - "learning_rate": 0.006036376715354726, - "loss": 8.2739, - "step": 166800 - }, - { - "epoch": 0.72, - "learning_rate": 0.006046274214004529, - "loss": 8.2747, - "step": 167000 - }, - { - "epoch": 0.72, - "learning_rate": 0.00605616800045685, - "loss": 8.2756, - "step": 167200 - }, - { - "epoch": 0.72, - "learning_rate": 0.0060660085905719485, - "loss": 8.2762, - "step": 167400 - }, - { - "epoch": 0.72, - "learning_rate": 0.006075894841550729, - "loss": 8.2703, - "step": 167600 - }, - { - "epoch": 0.72, - "learning_rate": 0.0060857772504005975, - "loss": 8.2609, - "step": 167800 - }, - { - "epoch": 0.72, - "learning_rate": 0.006095655773772182, - "loss": 8.2584, - "step": 168000 - }, - { - "epoch": 0.72, - "learning_rate": 0.0061055303683331605, - "loss": 8.2796, - "step": 168200 - }, - { - "epoch": 0.72, - "learning_rate": 0.006115400990768439, - "loss": 8.2669, - "step": 168400 - }, - { - "epoch": 0.72, - "learning_rate": 0.0061252675977803515, - "loss": 8.2772, - "step": 168600 - }, - { - "epoch": 0.73, - "learning_rate": 0.00613513014608884, - "loss": 8.2722, - "step": 168800 - }, - { - "epoch": 0.73, - "learning_rate": 0.006144988592431659, - "loss": 8.2616, - "step": 169000 - }, - { - "epoch": 0.73, - "learning_rate": 0.006154842893564549, - "loss": 8.2538, - "step": 169200 - }, - { - "epoch": 0.73, - "learning_rate": 0.006164643766188168, - "loss": 8.2573, - "step": 169400 - }, - { - "epoch": 0.73, - "learning_rate": 0.006174489668507017, - "loss": 8.2711, - "step": 169600 - }, - { - "epoch": 0.73, - "learning_rate": 0.006184331296208924, - "loss": 8.269, - "step": 169800 - }, - { - "epoch": 0.73, - "learning_rate": 0.006194168606123404, - "loss": 8.2657, - "step": 170000 - }, - { - "epoch": 0.73, - "learning_rate": 0.006204001555098915, - "loss": 8.2654, - "step": 170200 - }, - { - "epoch": 0.73, - "learning_rate": 0.0062138301000030476, - "loss": 8.2635, - "step": 170400 - }, - { - "epoch": 0.73, - "learning_rate": 0.006223654197722705, - "loss": 8.2583, - "step": 170600 - }, - { - "epoch": 0.73, - "learning_rate": 0.0062334738051643005, - "loss": 8.2626, - "step": 170800 - }, - { - "epoch": 0.73, - "learning_rate": 0.006243288879253949, - "loss": 8.2705, - "step": 171000 - }, - { - "epoch": 0.74, - "learning_rate": 0.0062530993769376415, - "loss": 8.2574, - "step": 171200 - }, - { - "epoch": 0.74, - "learning_rate": 0.00626285623735225, - "loss": 8.2608, - "step": 171400 - }, - { - "epoch": 0.74, - "learning_rate": 0.00627265747656174, - "loss": 8.2509, - "step": 171600 - }, - { - "epoch": 0.74, - "learning_rate": 0.006282454010539383, - "loss": 8.2597, - "step": 171800 - }, - { - "epoch": 0.74, - "learning_rate": 0.006292245796312498, - "loss": 8.2696, - "step": 172000 - }, - { - "epoch": 0.74, - "learning_rate": 0.006302032790929236, - "loss": 8.2617, - "step": 172200 - }, - { - "epoch": 0.74, - "learning_rate": 0.00631181495145877, - "loss": 8.264, - "step": 172400 - }, - { - "epoch": 0.74, - "learning_rate": 0.006321592234991465, - "loss": 8.2502, - "step": 172600 - }, - { - "epoch": 0.74, - "learning_rate": 0.006331364598639089, - "loss": 8.2464, - "step": 172800 - }, - { - "epoch": 0.74, - "learning_rate": 0.006341131999534987, - "loss": 8.2592, - "step": 173000 - }, - { - "epoch": 0.74, - "learning_rate": 0.006350894394834276, - "loss": 8.2656, - "step": 173200 - }, - { - "epoch": 0.75, - "learning_rate": 0.006360651741714029, - "loss": 8.2499, - "step": 173400 - }, - { - "epoch": 0.75, - "learning_rate": 0.006370355248830346, - "loss": 8.2524, - "step": 173600 - }, - { - "epoch": 0.75, - "learning_rate": 0.006380102396267379, - "loss": 8.2597, - "step": 173800 - }, - { - "epoch": 0.75, - "learning_rate": 0.006389844367163435, - "loss": 8.2635, - "step": 174000 - }, - { - "epoch": 0.75, - "learning_rate": 0.006399581118785181, - "loss": 8.252, - "step": 174200 - }, - { - "epoch": 0.75, - "learning_rate": 0.006409312608422174, - "loss": 8.2597, - "step": 174400 - }, - { - "epoch": 0.75, - "learning_rate": 0.006419038793387056, - "loss": 8.2518, - "step": 174600 - }, - { - "epoch": 0.75, - "learning_rate": 0.006428759631015739, - "loss": 8.2548, - "step": 174800 - }, - { - "epoch": 0.75, - "learning_rate": 0.006438475078667585, - "loss": 8.2569, - "step": 175000 - }, - { - "epoch": 0.75, - "learning_rate": 0.006448185093725606, - "loss": 8.2625, - "step": 175200 - }, - { - "epoch": 0.75, - "learning_rate": 0.006457889633596642, - "loss": 8.2421, - "step": 175400 - }, - { - "epoch": 0.75, - "learning_rate": 0.006467540174396778, - "loss": 8.2546, - "step": 175600 - }, - { - "epoch": 0.76, - "learning_rate": 0.00647723366411791, - "loss": 8.2497, - "step": 175800 - }, - { - "epoch": 0.76, - "learning_rate": 0.006486921551229965, - "loss": 8.2505, - "step": 176000 - }, - { - "epoch": 0.76, - "learning_rate": 0.006496603793236846, - "loss": 8.2406, - "step": 176200 - }, - { - "epoch": 0.76, - "learning_rate": 0.006506280347667219, - "loss": 8.2468, - "step": 176400 - }, - { - "epoch": 0.76, - "learning_rate": 0.006515951172074704, - "loss": 8.2552, - "step": 176600 - }, - { - "epoch": 0.76, - "learning_rate": 0.006525616224038046, - "loss": 8.2551, - "step": 176800 - }, - { - "epoch": 0.76, - "learning_rate": 0.006535275461161318, - "loss": 8.26, - "step": 177000 - }, - { - "epoch": 0.76, - "learning_rate": 0.006544928841074094, - "loss": 8.253, - "step": 177200 - }, - { - "epoch": 0.76, - "learning_rate": 0.006554576321431651, - "loss": 8.2469, - "step": 177400 - }, - { - "epoch": 0.76, - "learning_rate": 0.0065641696670731, - "loss": 8.2543, - "step": 177600 - }, - { - "epoch": 0.76, - "learning_rate": 0.006573805251415716, - "loss": 8.2443, - "step": 177800 - }, - { - "epoch": 0.76, - "learning_rate": 0.0065834348095362046, - "loss": 8.2469, - "step": 178000 - }, - { - "epoch": 0.77, - "learning_rate": 0.006593058299194326, - "loss": 8.2494, - "step": 178200 - }, - { - "epoch": 0.77, - "learning_rate": 0.00660267567817647, - "loss": 8.2512, - "step": 178400 - }, - { - "epoch": 0.77, - "learning_rate": 0.006612286904295821, - "loss": 8.2449, - "step": 178600 - }, - { - "epoch": 0.77, - "learning_rate": 0.0066218919353925616, - "loss": 8.2448, - "step": 178800 - }, - { - "epoch": 0.77, - "learning_rate": 0.006631490729334045, - "loss": 8.2604, - "step": 179000 - }, - { - "epoch": 0.77, - "learning_rate": 0.006641083244014981, - "loss": 8.2434, - "step": 179200 - }, - { - "epoch": 0.77, - "learning_rate": 0.006650669437357629, - "loss": 8.2451, - "step": 179400 - }, - { - "epoch": 0.77, - "learning_rate": 0.006660249267311978, - "loss": 8.239, - "step": 179600 - }, - { - "epoch": 0.77, - "learning_rate": 0.006669774840736135, - "loss": 8.2516, - "step": 179800 - }, - { - "epoch": 0.77, - "learning_rate": 0.006679341850217107, - "loss": 8.2426, - "step": 180000 - }, - { - "epoch": 0.77, - "learning_rate": 0.0066889023705377065, - "loss": 8.2418, - "step": 180200 - }, - { - "epoch": 0.78, - "learning_rate": 0.006698456359760537, - "loss": 8.2491, - "step": 180400 - }, - { - "epoch": 0.78, - "learning_rate": 0.006708003775976843, - "loss": 8.2488, - "step": 180600 - }, - { - "epoch": 0.78, - "learning_rate": 0.0067175445773067135, - "loss": 8.2334, - "step": 180800 - }, - { - "epoch": 0.78, - "learning_rate": 0.0067270787218992436, - "loss": 8.2533, - "step": 181000 - }, - { - "epoch": 0.78, - "learning_rate": 0.006736606167932732, - "loss": 8.2462, - "step": 181200 - }, - { - "epoch": 0.78, - "learning_rate": 0.006746126873614859, - "loss": 8.2453, - "step": 181400 - }, - { - "epoch": 0.78, - "learning_rate": 0.006755640797182877, - "loss": 8.2365, - "step": 181600 - }, - { - "epoch": 0.78, - "learning_rate": 0.006765100378448494, - "loss": 8.2326, - "step": 181800 - }, - { - "epoch": 0.78, - "learning_rate": 0.006774600647050655, - "loss": 8.2368, - "step": 182000 - }, - { - "epoch": 0.78, - "learning_rate": 0.0067840940086379715, - "loss": 8.2338, - "step": 182200 - }, - { - "epoch": 0.78, - "learning_rate": 0.006793580421567637, - "loss": 8.2408, - "step": 182400 - }, - { - "epoch": 0.78, - "learning_rate": 0.006803059844227327, - "loss": 8.2357, - "step": 182600 - }, - { - "epoch": 0.79, - "learning_rate": 0.006812532235035376, - "loss": 8.2351, - "step": 182800 - }, - { - "epoch": 0.79, - "learning_rate": 0.006821997552440971, - "loss": 8.2375, - "step": 183000 - }, - { - "epoch": 0.79, - "learning_rate": 0.006831455754924318, - "loss": 8.2335, - "step": 183200 - }, - { - "epoch": 0.79, - "learning_rate": 0.006840906800996838, - "loss": 8.2307, - "step": 183400 - }, - { - "epoch": 0.79, - "learning_rate": 0.006850350649201346, - "loss": 8.239, - "step": 183600 - }, - { - "epoch": 0.79, - "learning_rate": 0.006859740093143898, - "loss": 8.2355, - "step": 183800 - }, - { - "epoch": 0.79, - "learning_rate": 0.006869169457873638, - "loss": 8.2304, - "step": 184000 - }, - { - "epoch": 0.79, - "learning_rate": 0.006878591500760698, - "loss": 8.2408, - "step": 184200 - }, - { - "epoch": 0.79, - "learning_rate": 0.006888006180475111, - "loss": 8.2358, - "step": 184400 - }, - { - "epoch": 0.79, - "learning_rate": 0.006897413455719212, - "loss": 8.2346, - "step": 184600 - }, - { - "epoch": 0.79, - "learning_rate": 0.00690681328522781, - "loss": 8.2301, - "step": 184800 - }, - { - "epoch": 0.79, - "learning_rate": 0.006916205627768383, - "loss": 8.2342, - "step": 185000 - }, - { - "epoch": 0.8, - "learning_rate": 0.006925590442141248, - "loss": 8.234, - "step": 185200 - }, - { - "epoch": 0.8, - "learning_rate": 0.00693496768717974, - "loss": 8.2303, - "step": 185400 - }, - { - "epoch": 0.8, - "learning_rate": 0.006944337321750399, - "loss": 8.2387, - "step": 185600 - }, - { - "epoch": 0.8, - "learning_rate": 0.006953652513939363, - "loss": 8.2432, - "step": 185800 - }, - { - "epoch": 0.8, - "learning_rate": 0.00696300684287297, - "loss": 8.2354, - "step": 186000 - }, - { - "epoch": 0.8, - "learning_rate": 0.006972353438344472, - "loss": 8.2388, - "step": 186200 - }, - { - "epoch": 0.8, - "learning_rate": 0.006981692259354855, - "loss": 8.2356, - "step": 186400 - }, - { - "epoch": 0.8, - "learning_rate": 0.006991023264939209, - "loss": 8.2305, - "step": 186600 - }, - { - "epoch": 0.8, - "learning_rate": 0.007000346414166903, - "loss": 8.2279, - "step": 186800 - }, - { - "epoch": 0.8, - "learning_rate": 0.007009661666141772, - "loss": 8.2372, - "step": 187000 - }, - { - "epoch": 0.8, - "learning_rate": 0.007018968980002293, - "loss": 8.2265, - "step": 187200 - }, - { - "epoch": 0.81, - "learning_rate": 0.0070282683149217595, - "loss": 8.2277, - "step": 187400 - }, - { - "epoch": 0.81, - "learning_rate": 0.007037559630108464, - "loss": 8.226, - "step": 187600 - }, - { - "epoch": 0.81, - "learning_rate": 0.007046796488650243, - "loss": 8.227, - "step": 187800 - }, - { - "epoch": 0.81, - "learning_rate": 0.0070560716827445, - "loss": 8.2293, - "step": 188000 - }, - { - "epoch": 0.81, - "learning_rate": 0.007065338735146011, - "loss": 8.2353, - "step": 188200 - }, - { - "epoch": 0.81, - "learning_rate": 0.00707459760520468, - "loss": 8.2206, - "step": 188400 - }, - { - "epoch": 0.81, - "learning_rate": 0.007083848252306303, - "loss": 8.2354, - "step": 188600 - }, - { - "epoch": 0.81, - "learning_rate": 0.007093090635872743, - "loss": 8.2356, - "step": 188800 - }, - { - "epoch": 0.81, - "learning_rate": 0.007102324715362115, - "loss": 8.2221, - "step": 189000 - }, - { - "epoch": 0.81, - "learning_rate": 0.0071115504502689605, - "loss": 8.2237, - "step": 189200 - }, - { - "epoch": 0.81, - "learning_rate": 0.00712076780012442, - "loss": 8.2361, - "step": 189400 - }, - { - "epoch": 0.81, - "learning_rate": 0.00712997672449642, - "loss": 8.2405, - "step": 189600 - }, - { - "epoch": 0.82, - "learning_rate": 0.007139131201823022, - "loss": 8.2174, - "step": 189800 - }, - { - "epoch": 0.82, - "learning_rate": 0.007148323196711397, - "loss": 8.2172, - "step": 190000 - }, - { - "epoch": 0.82, - "learning_rate": 0.007157506645244055, - "loss": 8.2265, - "step": 190200 - }, - { - "epoch": 0.82, - "learning_rate": 0.007166681507137629, - "loss": 8.227, - "step": 190400 - }, - { - "epoch": 0.82, - "learning_rate": 0.007175847742146418, - "loss": 8.2182, - "step": 190600 - }, - { - "epoch": 0.82, - "learning_rate": 0.0071850053100625645, - "loss": 8.2301, - "step": 190800 - }, - { - "epoch": 0.82, - "learning_rate": 0.007194108448138671, - "loss": 8.2282, - "step": 191000 - }, - { - "epoch": 0.82, - "learning_rate": 0.007203248605234934, - "loss": 8.2262, - "step": 191200 - }, - { - "epoch": 0.82, - "learning_rate": 0.007212379975044163, - "loss": 8.2217, - "step": 191400 - }, - { - "epoch": 0.82, - "learning_rate": 0.007221502517511431, - "loss": 8.2126, - "step": 191600 - }, - { - "epoch": 0.82, - "learning_rate": 0.007230616192620535, - "loss": 8.2391, - "step": 191800 - }, - { - "epoch": 0.83, - "learning_rate": 0.007239720960394179, - "loss": 8.2305, - "step": 192000 - }, - { - "epoch": 0.83, - "learning_rate": 0.0072488167808941245, - "loss": 8.2162, - "step": 192200 - }, - { - "epoch": 0.83, - "learning_rate": 0.007257903614221387, - "loss": 8.2307, - "step": 192400 - }, - { - "epoch": 0.83, - "learning_rate": 0.007266981420516409, - "loss": 8.2219, - "step": 192600 - }, - { - "epoch": 0.83, - "learning_rate": 0.0072760501599592195, - "loss": 8.2438, - "step": 192800 - }, - { - "epoch": 0.83, - "learning_rate": 0.007285109792769629, - "loss": 8.2174, - "step": 193000 - }, - { - "epoch": 0.83, - "learning_rate": 0.007294160279207389, - "loss": 8.2187, - "step": 193200 - }, - { - "epoch": 0.83, - "learning_rate": 0.007303201579572373, - "loss": 8.2265, - "step": 193400 - }, - { - "epoch": 0.83, - "learning_rate": 0.0073122336542047525, - "loss": 8.2221, - "step": 193600 - }, - { - "epoch": 0.83, - "learning_rate": 0.0073212564634851625, - "loss": 8.2116, - "step": 193800 - }, - { - "epoch": 0.83, - "learning_rate": 0.007330269967834884, - "loss": 8.2177, - "step": 194000 - }, - { - "epoch": 0.83, - "learning_rate": 0.007339274127716014, - "loss": 8.2238, - "step": 194200 - }, - { - "epoch": 0.84, - "learning_rate": 0.007348268903631638, - "loss": 8.2249, - "step": 194400 - }, - { - "epoch": 0.84, - "learning_rate": 0.007357254256126006, - "loss": 8.2236, - "step": 194600 - }, - { - "epoch": 0.84, - "learning_rate": 0.007366230145784704, - "loss": 8.213, - "step": 194800 - }, - { - "epoch": 0.84, - "learning_rate": 0.007375151724999397, - "loss": 8.2272, - "step": 195000 - }, - { - "epoch": 0.84, - "learning_rate": 0.007384108618715181, - "loss": 8.2287, - "step": 195200 - }, - { - "epoch": 0.84, - "learning_rate": 0.007393055931798136, - "loss": 8.2199, - "step": 195400 - }, - { - "epoch": 0.84, - "learning_rate": 0.007401993625000712, - "loss": 8.2156, - "step": 195600 - }, - { - "epoch": 0.84, - "learning_rate": 0.007410921659117544, - "loss": 8.2257, - "step": 195800 - }, - { - "epoch": 0.84, - "learning_rate": 0.00741983999498565, - "loss": 8.2159, - "step": 196000 - }, - { - "epoch": 0.84, - "learning_rate": 0.007428748593484588, - "loss": 8.2148, - "step": 196200 - }, - { - "epoch": 0.84, - "learning_rate": 0.007437647415536619, - "loss": 8.2229, - "step": 196400 - }, - { - "epoch": 0.84, - "learning_rate": 0.0074465364221069, - "loss": 8.2228, - "step": 196600 - }, - { - "epoch": 0.85, - "learning_rate": 0.0074554155742036395, - "loss": 8.2162, - "step": 196800 - }, - { - "epoch": 0.85, - "learning_rate": 0.007464240511259159, - "loss": 8.2117, - "step": 197000 - }, - { - "epoch": 0.85, - "learning_rate": 0.0074730998873648565, - "loss": 8.2057, - "step": 197200 - }, - { - "epoch": 0.85, - "learning_rate": 0.00748194929247588, - "loss": 8.2118, - "step": 197400 - }, - { - "epoch": 0.85, - "learning_rate": 0.007490788687774152, - "loss": 8.2106, - "step": 197600 - }, - { - "epoch": 0.85, - "learning_rate": 0.007499618034485498, - "loss": 8.2192, - "step": 197800 - }, - { - "epoch": 0.85, - "learning_rate": 0.007508437293879828, - "loss": 8.2083, - "step": 198000 - }, - { - "epoch": 0.85, - "learning_rate": 0.007517246427271298, - "loss": 8.2165, - "step": 198200 - }, - { - "epoch": 0.85, - "learning_rate": 0.0075260453960184805, - "loss": 8.2069, - "step": 198400 - }, - { - "epoch": 0.85, - "learning_rate": 0.007534834161524539, - "loss": 8.2156, - "step": 198600 - }, - { - "epoch": 0.85, - "learning_rate": 0.007543612685237384, - "loss": 8.209, - "step": 198800 - }, - { - "epoch": 0.86, - "learning_rate": 0.007552337113068695, - "loss": 8.2189, - "step": 199000 - }, - { - "epoch": 0.86, - "learning_rate": 0.007561095089408162, - "loss": 8.2203, - "step": 199200 - }, - { - "epoch": 0.86, - "learning_rate": 0.0075698427087603735, - "loss": 8.2176, - "step": 199400 - }, - { - "epoch": 0.86, - "learning_rate": 0.007578579932753739, - "loss": 8.216, - "step": 199600 - }, - { - "epoch": 0.86, - "learning_rate": 0.007587306723062261, - "loss": 8.204, - "step": 199800 - }, - { - "epoch": 0.86, - "learning_rate": 0.0075960230414057126, - "loss": 8.2027, - "step": 200000 - }, - { - "epoch": 0.86, - "learning_rate": 0.007604728849549804, - "loss": 8.202, - "step": 200200 - }, - { - "epoch": 0.86, - "learning_rate": 0.007613424109306344, - "loss": 8.208, - "step": 200400 - }, - { - "epoch": 0.86, - "learning_rate": 0.007622108782533417, - "loss": 8.2064, - "step": 200600 - }, - { - "epoch": 0.86, - "learning_rate": 0.007630782831135545, - "loss": 8.2117, - "step": 200800 - }, - { - "epoch": 0.86, - "learning_rate": 0.00763940292672049, - "loss": 8.2037, - "step": 201000 - }, - { - "epoch": 0.86, - "learning_rate": 0.007648055665570705, - "loss": 8.2028, - "step": 201200 - }, - { - "epoch": 0.87, - "learning_rate": 0.007656697665979489, - "loss": 8.2039, - "step": 201400 - }, - { - "epoch": 0.87, - "learning_rate": 0.007665328890038548, - "loss": 8.2014, - "step": 201600 - }, - { - "epoch": 0.87, - "learning_rate": 0.007673949299886863, - "loss": 8.208, - "step": 201800 - }, - { - "epoch": 0.87, - "learning_rate": 0.007682558857710842, - "loss": 8.2054, - "step": 202000 - }, - { - "epoch": 0.87, - "learning_rate": 0.007691157525744499, - "loss": 8.2124, - "step": 202200 - }, - { - "epoch": 0.87, - "learning_rate": 0.007699745266269625, - "loss": 8.2065, - "step": 202400 - }, - { - "epoch": 0.87, - "learning_rate": 0.007708322041615931, - "loss": 8.2088, - "step": 202600 - }, - { - "epoch": 0.87, - "learning_rate": 0.007716887814161235, - "loss": 8.2067, - "step": 202800 - }, - { - "epoch": 0.87, - "learning_rate": 0.007725399800195792, - "loss": 8.1994, - "step": 203000 - }, - { - "epoch": 0.87, - "learning_rate": 0.0077339435099485305, - "loss": 8.1974, - "step": 203200 - }, - { - "epoch": 0.87, - "learning_rate": 0.007742476104511222, - "loss": 8.2037, - "step": 203400 - }, - { - "epoch": 0.87, - "learning_rate": 0.007750997546455483, - "loss": 8.2063, - "step": 203600 - }, - { - "epoch": 0.88, - "learning_rate": 0.007759507798401852, - "loss": 8.204, - "step": 203800 - }, - { - "epoch": 0.88, - "learning_rate": 0.007768006823019953, - "loss": 8.2052, - "step": 204000 - }, - { - "epoch": 0.88, - "learning_rate": 0.007776494583028655, - "loss": 8.2027, - "step": 204200 - }, - { - "epoch": 0.88, - "learning_rate": 0.007784971041196239, - "loss": 8.2206, - "step": 204400 - }, - { - "epoch": 0.88, - "learning_rate": 0.00779343616034057, - "loss": 8.2035, - "step": 204600 - }, - { - "epoch": 0.88, - "learning_rate": 0.007801889903329247, - "loss": 8.2226, - "step": 204800 - }, - { - "epoch": 0.88, - "learning_rate": 0.007810290049882719, - "loss": 8.2112, - "step": 205000 - }, - { - "epoch": 0.88, - "learning_rate": 0.007818720986706034, - "loss": 8.199, - "step": 205200 - }, - { - "epoch": 0.88, - "learning_rate": 0.007827140436461334, - "loss": 8.2052, - "step": 205400 - }, - { - "epoch": 0.88, - "learning_rate": 0.007835548362216546, - "loss": 8.1952, - "step": 205600 - }, - { - "epoch": 0.88, - "learning_rate": 0.007843944727090149, - "loss": 8.1924, - "step": 205800 - }, - { - "epoch": 0.89, - "learning_rate": 0.007852329494251334, - "loss": 8.1967, - "step": 206000 - }, - { - "epoch": 0.89, - "learning_rate": 0.007860702626920164, - "loss": 8.1961, - "step": 206200 - }, - { - "epoch": 0.89, - "learning_rate": 0.007869064088367739, - "loss": 8.202, - "step": 206400 - }, - { - "epoch": 0.89, - "learning_rate": 0.007877413841916352, - "loss": 8.2045, - "step": 206600 - }, - { - "epoch": 0.89, - "learning_rate": 0.007885751850939657, - "loss": 8.2146, - "step": 206800 - }, - { - "epoch": 0.89, - "learning_rate": 0.007894036477089129, - "loss": 8.1876, - "step": 207000 - }, - { - "epoch": 0.89, - "learning_rate": 0.007902350946567882, - "loss": 8.1981, - "step": 207200 - }, - { - "epoch": 0.89, - "learning_rate": 0.00791065356213426, - "loss": 8.1975, - "step": 207400 - }, - { - "epoch": 0.89, - "learning_rate": 0.007918944287368676, - "loss": 8.2017, - "step": 207600 - }, - { - "epoch": 0.89, - "learning_rate": 0.007927223085903717, - "loss": 8.1964, - "step": 207800 - }, - { - "epoch": 0.89, - "learning_rate": 0.007935489921424281, - "loss": 8.2027, - "step": 208000 - }, - { - "epoch": 0.89, - "learning_rate": 0.00794374475766774, - "loss": 8.1952, - "step": 208200 - }, - { - "epoch": 0.9, - "learning_rate": 0.0079519875584241, - "loss": 8.2016, - "step": 208400 - }, - { - "epoch": 0.9, - "learning_rate": 0.007960218287536163, - "loss": 8.1876, - "step": 208600 - }, - { - "epoch": 0.9, - "learning_rate": 0.007968436908899689, - "loss": 8.1998, - "step": 208800 - }, - { - "epoch": 0.9, - "learning_rate": 0.007976602384342989, - "loss": 8.1967, - "step": 209000 - }, - { - "epoch": 0.9, - "learning_rate": 0.007984796743097742, - "loss": 8.1886, - "step": 209200 - }, - { - "epoch": 0.9, - "learning_rate": 0.007992978886290105, - "loss": 8.1987, - "step": 209400 - }, - { - "epoch": 0.9, - "learning_rate": 0.008001148778028955, - "loss": 8.187, - "step": 209600 - }, - { - "epoch": 0.9, - "learning_rate": 0.008009306382476912, - "loss": 8.1869, - "step": 209800 - }, - { - "epoch": 0.9, - "learning_rate": 0.00801745166385049, - "loss": 8.1888, - "step": 210000 - }, - { - "epoch": 0.9, - "learning_rate": 0.008025584586420267, - "loss": 8.1991, - "step": 210200 - }, - { - "epoch": 0.9, - "learning_rate": 0.008033705114511026, - "loss": 8.1865, - "step": 210400 - }, - { - "epoch": 0.91, - "learning_rate": 0.008041813212501915, - "loss": 8.1892, - "step": 210600 - }, - { - "epoch": 0.91, - "learning_rate": 0.00804990884482662, - "loss": 8.189, - "step": 210800 - }, - { - "epoch": 0.91, - "learning_rate": 0.008057951591473118, - "loss": 8.2038, - "step": 211000 - }, - { - "epoch": 0.91, - "learning_rate": 0.008066022248756641, - "loss": 8.1832, - "step": 211200 - }, - { - "epoch": 0.91, - "learning_rate": 0.008074080334180595, - "loss": 8.1805, - "step": 211400 - }, - { - "epoch": 0.91, - "learning_rate": 0.008082125812398037, - "loss": 8.1949, - "step": 211600 - }, - { - "epoch": 0.91, - "learning_rate": 0.008090158648117324, - "loss": 8.1973, - "step": 211800 - }, - { - "epoch": 0.91, - "learning_rate": 0.008098178806102278, - "loss": 8.2107, - "step": 212000 - }, - { - "epoch": 0.91, - "learning_rate": 0.008106186251172326, - "loss": 8.1891, - "step": 212200 - }, - { - "epoch": 0.91, - "learning_rate": 0.008114180948202663, - "loss": 8.1986, - "step": 212400 - }, - { - "epoch": 0.91, - "learning_rate": 0.008122122984410717, - "loss": 8.1907, - "step": 212600 - }, - { - "epoch": 0.91, - "learning_rate": 0.008130092144388654, - "loss": 8.1888, - "step": 212800 - }, - { - "epoch": 0.92, - "learning_rate": 0.00813804845146324, - "loss": 8.1984, - "step": 213000 - }, - { - "epoch": 0.92, - "learning_rate": 0.008145991870733981, - "loss": 8.1925, - "step": 213200 - }, - { - "epoch": 0.92, - "learning_rate": 0.008153922367356924, - "loss": 8.1879, - "step": 213400 - }, - { - "epoch": 0.92, - "learning_rate": 0.008161839906544802, - "loss": 8.1888, - "step": 213600 - }, - { - "epoch": 0.92, - "learning_rate": 0.008169744453567182, - "loss": 8.2048, - "step": 213800 - }, - { - "epoch": 0.92, - "learning_rate": 0.008177635973750616, - "loss": 8.1844, - "step": 214000 - }, - { - "epoch": 0.92, - "learning_rate": 0.008185514432478813, - "loss": 8.1966, - "step": 214200 - }, - { - "epoch": 0.92, - "learning_rate": 0.008193379795192766, - "loss": 8.1862, - "step": 214400 - }, - { - "epoch": 0.92, - "learning_rate": 0.008201232027390915, - "loss": 8.1855, - "step": 214600 - }, - { - "epoch": 0.92, - "learning_rate": 0.008209071094629296, - "loss": 8.1756, - "step": 214800 - }, - { - "epoch": 0.92, - "learning_rate": 0.0082168969625217, - "loss": 8.1943, - "step": 215000 - }, - { - "epoch": 0.92, - "learning_rate": 0.00822470959673981, - "loss": 8.178, - "step": 215200 - }, - { - "epoch": 0.93, - "learning_rate": 0.008232508963013363, - "loss": 8.1926, - "step": 215400 - }, - { - "epoch": 0.93, - "learning_rate": 0.008240295027130294, - "loss": 8.192, - "step": 215600 - }, - { - "epoch": 0.93, - "learning_rate": 0.008248067754936889, - "loss": 8.1819, - "step": 215800 - }, - { - "epoch": 0.93, - "learning_rate": 0.008255827112337934, - "loss": 8.1849, - "step": 216000 - }, - { - "epoch": 0.93, - "learning_rate": 0.008263573065296865, - "loss": 8.178, - "step": 216200 - }, - { - "epoch": 0.93, - "learning_rate": 0.008271305579835913, - "loss": 8.1889, - "step": 216400 - }, - { - "epoch": 0.93, - "learning_rate": 0.00827898606039374, - "loss": 8.1884, - "step": 216600 - }, - { - "epoch": 0.93, - "learning_rate": 0.00828669166401079, - "loss": 8.1792, - "step": 216800 - }, - { - "epoch": 0.93, - "learning_rate": 0.0082943837277978, - "loss": 8.1716, - "step": 217000 - }, - { - "epoch": 0.93, - "learning_rate": 0.008302062218013388, - "loss": 8.1858, - "step": 217200 - }, - { - "epoch": 0.93, - "learning_rate": 0.008309727100975717, - "loss": 8.1798, - "step": 217400 - }, - { - "epoch": 0.94, - "learning_rate": 0.00831737834306263, - "loss": 8.1763, - "step": 217600 - }, - { - "epoch": 0.94, - "learning_rate": 0.008325015910711822, - "loss": 8.1809, - "step": 217800 - }, - { - "epoch": 0.94, - "learning_rate": 0.008332639770420956, - "loss": 8.1811, - "step": 218000 - }, - { - "epoch": 0.94, - "learning_rate": 0.008340249888747828, - "loss": 8.1877, - "step": 218200 - }, - { - "epoch": 0.94, - "learning_rate": 0.008347846232310514, - "loss": 8.1772, - "step": 218400 - }, - { - "epoch": 0.94, - "learning_rate": 0.008355390889512792, - "loss": 8.1974, - "step": 218600 - }, - { - "epoch": 0.94, - "learning_rate": 0.008362959652932546, - "loss": 8.1883, - "step": 218800 - }, - { - "epoch": 0.94, - "learning_rate": 0.008370514541971316, - "loss": 8.1876, - "step": 219000 - }, - { - "epoch": 0.94, - "learning_rate": 0.008378055523489442, - "loss": 8.1721, - "step": 219200 - }, - { - "epoch": 0.94, - "learning_rate": 0.008385582564408266, - "loss": 8.19, - "step": 219400 - }, - { - "epoch": 0.94, - "learning_rate": 0.00839309563171029, - "loss": 8.1648, - "step": 219600 - }, - { - "epoch": 0.94, - "learning_rate": 0.008400594692439296, - "loss": 8.1754, - "step": 219800 - }, - { - "epoch": 0.95, - "learning_rate": 0.008408079713700525, - "loss": 8.174, - "step": 220000 - }, - { - "epoch": 0.95, - "learning_rate": 0.008415550662660788, - "loss": 8.1785, - "step": 220200 - }, - { - "epoch": 0.95, - "learning_rate": 0.00842300750654863, - "loss": 8.1834, - "step": 220400 - }, - { - "epoch": 0.95, - "learning_rate": 0.008430413034345703, - "loss": 8.174, - "step": 220600 - }, - { - "epoch": 0.95, - "learning_rate": 0.008437804533224642, - "loss": 8.17, - "step": 220800 - }, - { - "epoch": 0.95, - "learning_rate": 0.00844521900807717, - "loss": 8.19, - "step": 221000 - }, - { - "epoch": 0.95, - "learning_rate": 0.00845261924771653, - "loss": 8.1853, - "step": 221200 - }, - { - "epoch": 0.95, - "learning_rate": 0.008460005219681436, - "loss": 8.1901, - "step": 221400 - }, - { - "epoch": 0.95, - "learning_rate": 0.008467376891573187, - "loss": 8.1842, - "step": 221600 - }, - { - "epoch": 0.95, - "learning_rate": 0.008474734231055802, - "loss": 8.1911, - "step": 221800 - }, - { - "epoch": 0.95, - "learning_rate": 0.008482077205856182, - "loss": 8.1823, - "step": 222000 - }, - { - "epoch": 0.95, - "learning_rate": 0.00848940578376423, - "loss": 8.1846, - "step": 222200 - }, - { - "epoch": 0.96, - "learning_rate": 0.008496719932633002, - "loss": 8.1803, - "step": 222400 - }, - { - "epoch": 0.96, - "learning_rate": 0.00850401962037885, - "loss": 8.176, - "step": 222600 - }, - { - "epoch": 0.96, - "learning_rate": 0.008511304814981559, - "loss": 8.1787, - "step": 222800 - }, - { - "epoch": 0.96, - "learning_rate": 0.008518575484484491, - "loss": 8.171, - "step": 223000 - }, - { - "epoch": 0.96, - "learning_rate": 0.008525831596994712, - "loss": 8.177, - "step": 223200 - }, - { - "epoch": 0.96, - "learning_rate": 0.008533073120683157, - "loss": 8.1742, - "step": 223400 - }, - { - "epoch": 0.96, - "learning_rate": 0.00854030002378475, - "loss": 8.1773, - "step": 223600 - }, - { - "epoch": 0.96, - "learning_rate": 0.008547512274598539, - "loss": 8.1775, - "step": 223800 - }, - { - "epoch": 0.96, - "learning_rate": 0.008554709841487857, - "loss": 8.1809, - "step": 224000 - }, - { - "epoch": 0.96, - "learning_rate": 0.008561892692880447, - "loss": 8.1698, - "step": 224200 - }, - { - "epoch": 0.96, - "learning_rate": 0.008569060797268595, - "loss": 8.1778, - "step": 224400 - }, - { - "epoch": 0.97, - "learning_rate": 0.008576214123209277, - "loss": 8.1784, - "step": 224600 - }, - { - "epoch": 0.97, - "learning_rate": 0.008583316983634992, - "loss": 8.1796, - "step": 224800 - }, - { - "epoch": 0.97, - "learning_rate": 0.008590440732894599, - "loss": 8.1762, - "step": 225000 - }, - { - "epoch": 0.97, - "learning_rate": 0.00859754960992326, - "loss": 8.1795, - "step": 225200 - }, - { - "epoch": 0.97, - "learning_rate": 0.008604643583537755, - "loss": 8.1776, - "step": 225400 - }, - { - "epoch": 0.97, - "learning_rate": 0.008611722622620235, - "loss": 8.1733, - "step": 225600 - }, - { - "epoch": 0.97, - "learning_rate": 0.008618786696118368, - "loss": 8.1745, - "step": 225800 - }, - { - "epoch": 0.97, - "learning_rate": 0.00862580056501597, - "loss": 8.1641, - "step": 226000 - }, - { - "epoch": 0.97, - "learning_rate": 0.008632834689665386, - "loss": 8.1675, - "step": 226200 - }, - { - "epoch": 0.97, - "learning_rate": 0.008639853756121979, - "loss": 8.1675, - "step": 226400 - }, - { - "epoch": 0.97, - "learning_rate": 0.008646857733596483, - "loss": 8.1739, - "step": 226600 - }, - { - "epoch": 0.97, - "learning_rate": 0.008653846591365828, - "loss": 8.1756, - "step": 226800 - }, - { - "epoch": 0.98, - "learning_rate": 0.008660820298773252, - "loss": 8.1621, - "step": 227000 - }, - { - "epoch": 0.98, - "learning_rate": 0.008667778825228465, - "loss": 8.1797, - "step": 227200 - }, - { - "epoch": 0.98, - "learning_rate": 0.00867472214020776, - "loss": 8.1612, - "step": 227400 - }, - { - "epoch": 0.98, - "learning_rate": 0.008681650213254156, - "loss": 8.1713, - "step": 227600 - }, - { - "epoch": 0.98, - "learning_rate": 0.008688563013977534, - "loss": 8.1703, - "step": 227800 - }, - { - "epoch": 0.98, - "learning_rate": 0.008695460512054765, - "loss": 8.1582, - "step": 228000 - }, - { - "epoch": 0.98, - "learning_rate": 0.008702342677229847, - "loss": 8.1577, - "step": 228200 - }, - { - "epoch": 0.98, - "learning_rate": 0.008709209479314035, - "loss": 8.1736, - "step": 228400 - }, - { - "epoch": 0.98, - "learning_rate": 0.008716060888185976, - "loss": 8.167, - "step": 228600 - }, - { - "epoch": 0.98, - "learning_rate": 0.008722896873791835, - "loss": 8.1628, - "step": 228800 - }, - { - "epoch": 0.98, - "learning_rate": 0.008729717406145438, - "loss": 8.1667, - "step": 229000 - }, - { - "epoch": 0.98, - "learning_rate": 0.008736522455328391, - "loss": 8.1752, - "step": 229200 - }, - { - "epoch": 0.99, - "learning_rate": 0.008743311991490223, - "loss": 8.166, - "step": 229400 - }, - { - "epoch": 0.99, - "learning_rate": 0.008750085984848506, - "loss": 8.1672, - "step": 229600 - }, - { - "epoch": 0.99, - "learning_rate": 0.008756844405688994, - "loss": 8.1631, - "step": 229800 - }, - { - "epoch": 0.99, - "learning_rate": 0.008763553549131702, - "loss": 8.1708, - "step": 230000 - }, - { - "epoch": 0.99, - "learning_rate": 0.00877028081429939, - "loss": 8.1609, - "step": 230200 - }, - { - "epoch": 0.99, - "learning_rate": 0.008776992418364292, - "loss": 8.1545, - "step": 230400 - }, - { - "epoch": 0.99, - "learning_rate": 0.00878368833188583, - "loss": 8.1718, - "step": 230600 - }, - { - "epoch": 0.99, - "learning_rate": 0.008790368525492254, - "loss": 8.1817, - "step": 230800 - }, - { - "epoch": 0.99, - "learning_rate": 0.008797032969880776, - "loss": 8.1614, - "step": 231000 - }, - { - "epoch": 0.99, - "learning_rate": 0.008803681635817684, - "loss": 8.1741, - "step": 231200 - }, - { - "epoch": 0.99, - "learning_rate": 0.008810314494138483, - "loss": 8.1812, - "step": 231400 - }, - { - "epoch": 1.0, - "learning_rate": 0.008816931515748016, - "loss": 8.17, - "step": 231600 - }, - { - "epoch": 1.0, - "learning_rate": 0.008823532671620598, - "loss": 8.1602, - "step": 231800 - }, - { - "epoch": 1.0, - "learning_rate": 0.008830085046080098, - "loss": 8.1677, - "step": 232000 - }, - { - "epoch": 1.0, - "learning_rate": 0.008836654463369856, - "loss": 8.162, - "step": 232200 - }, - { - "epoch": 1.0, - "learning_rate": 0.00884320792840759, - "loss": 8.1568, - "step": 232400 - }, - { - "epoch": 1.0, - "learning_rate": 0.008849745412446397, - "loss": 8.1637, - "step": 232600 - }, - { - "epoch": 1.0, - "learning_rate": 0.008856266886809492, - "loss": 8.1704, - "step": 232800 - }, - { - "epoch": 1.0, - "learning_rate": 0.0088627723228903, - "loss": 8.1791, - "step": 233000 - }, - { - "epoch": 1.0, - "learning_rate": 0.008869261692152606, - "loss": 8.1617, - "step": 233200 - }, - { - "epoch": 1.0, - "learning_rate": 0.008875734966130679, - "loss": 8.1595, - "step": 233400 - }, - { - "epoch": 1.0, - "learning_rate": 0.008882192116429372, - "loss": 8.1635, - "step": 233600 - }, - { - "epoch": 1.0, - "learning_rate": 0.008888633114724282, - "loss": 8.1632, - "step": 233800 - }, - { - "epoch": 1.01, - "learning_rate": 0.008895025848966703, - "loss": 8.1664, - "step": 234000 - }, - { - "epoch": 1.01, - "learning_rate": 0.008901434539676536, - "loss": 8.175, - "step": 234200 - }, - { - "epoch": 1.01, - "learning_rate": 0.008907826993975337, - "loss": 8.1642, - "step": 234400 - }, - { - "epoch": 1.01, - "learning_rate": 0.008914203183822493, - "loss": 8.1627, - "step": 234600 - }, - { - "epoch": 1.01, - "learning_rate": 0.008920563081248724, - "loss": 8.1653, - "step": 234800 - }, - { - "epoch": 1.01, - "learning_rate": 0.008926906658356226, - "loss": 8.153, - "step": 235000 - }, - { - "epoch": 1.01, - "learning_rate": 0.00893323388731878, - "loss": 8.1536, - "step": 235200 - }, - { - "epoch": 1.01, - "learning_rate": 0.008939544740381878, - "loss": 8.1535, - "step": 235400 - }, - { - "epoch": 1.01, - "learning_rate": 0.008945839189862848, - "loss": 8.165, - "step": 235600 - }, - { - "epoch": 1.01, - "learning_rate": 0.00895211720815097, - "loss": 8.1751, - "step": 235800 - }, - { - "epoch": 1.01, - "learning_rate": 0.008958347500896378, - "loss": 8.158, - "step": 236000 - }, - { - "epoch": 1.02, - "learning_rate": 0.008964592656754264, - "loss": 8.1545, - "step": 236200 - }, - { - "epoch": 1.02, - "learning_rate": 0.008970821299156872, - "loss": 8.1605, - "step": 236400 - }, - { - "epoch": 1.02, - "learning_rate": 0.00897703340078215, - "loss": 8.1718, - "step": 236600 - }, - { - "epoch": 1.02, - "learning_rate": 0.008983228934380597, - "loss": 8.1589, - "step": 236800 - }, - { - "epoch": 1.02, - "learning_rate": 0.00898940787277539, - "loss": 8.1577, - "step": 237000 - }, - { - "epoch": 1.02, - "learning_rate": 0.008995570188862503, - "loss": 8.16, - "step": 237200 - }, - { - "epoch": 1.02, - "learning_rate": 0.009001715855610822, - "loss": 8.1503, - "step": 237400 - }, - { - "epoch": 1.02, - "learning_rate": 0.009007844846062265, - "loss": 8.1598, - "step": 237600 - }, - { - "epoch": 1.02, - "learning_rate": 0.009013957133331904, - "loss": 8.1518, - "step": 237800 - }, - { - "epoch": 1.02, - "learning_rate": 0.009020022254481815, - "loss": 8.1645, - "step": 238000 - }, - { - "epoch": 1.02, - "learning_rate": 0.009026101138876296, - "loss": 8.1415, - "step": 238200 - }, - { - "epoch": 1.02, - "learning_rate": 0.009032163240007407, - "loss": 8.1512, - "step": 238400 - }, - { - "epoch": 1.03, - "learning_rate": 0.009038208531283627, - "loss": 8.1564, - "step": 238600 - }, - { - "epoch": 1.03, - "learning_rate": 0.009044236986187178, - "loss": 8.1526, - "step": 238800 - }, - { - "epoch": 1.03, - "learning_rate": 0.009050248578274129, - "loss": 8.1544, - "step": 239000 - }, - { - "epoch": 1.03, - "learning_rate": 0.009056243281174516, - "loss": 8.1517, - "step": 239200 - }, - { - "epoch": 1.03, - "learning_rate": 0.00906222106859247, - "loss": 8.159, - "step": 239400 - }, - { - "epoch": 1.03, - "learning_rate": 0.009068181914306316, - "loss": 8.148, - "step": 239600 - }, - { - "epoch": 1.03, - "learning_rate": 0.00907412579216869, - "loss": 8.1454, - "step": 239800 - }, - { - "epoch": 1.03, - "learning_rate": 0.009080023084002398, - "loss": 8.1585, - "step": 240000 - }, - { - "epoch": 1.03, - "learning_rate": 0.009085933033181748, - "loss": 8.1549, - "step": 240200 - }, - { - "epoch": 1.03, - "learning_rate": 0.009091825936644017, - "loss": 8.1585, - "step": 240400 - }, - { - "epoch": 1.03, - "learning_rate": 0.009097701768539874, - "loss": 8.1535, - "step": 240600 - }, - { - "epoch": 1.03, - "learning_rate": 0.009103560503094875, - "loss": 8.147, - "step": 240800 - }, - { - "epoch": 1.04, - "learning_rate": 0.009109402114609573, - "loss": 8.161, - "step": 241000 - }, - { - "epoch": 1.04, - "learning_rate": 0.00911522657745963, - "loss": 8.1608, - "step": 241200 - }, - { - "epoch": 1.04, - "learning_rate": 0.009121033866095932, - "loss": 8.1631, - "step": 241400 - }, - { - "epoch": 1.04, - "learning_rate": 0.0091268239550447, - "loss": 8.1582, - "step": 241600 - }, - { - "epoch": 1.04, - "learning_rate": 0.009132596818907605, - "loss": 8.1542, - "step": 241800 - }, - { - "epoch": 1.04, - "learning_rate": 0.009138323697246783, - "loss": 8.1478, - "step": 242000 - }, - { - "epoch": 1.04, - "learning_rate": 0.009144062121486258, - "loss": 8.1602, - "step": 242200 - }, - { - "epoch": 1.04, - "learning_rate": 0.00914978324502433, - "loss": 8.1589, - "step": 242400 - }, - { - "epoch": 1.04, - "learning_rate": 0.00915548704276519, - "loss": 8.1592, - "step": 242600 - }, - { - "epoch": 1.04, - "learning_rate": 0.009161173489689015, - "loss": 8.1485, - "step": 242800 - }, - { - "epoch": 1.04, - "learning_rate": 0.00916684256085211, - "loss": 8.1585, - "step": 243000 - }, - { - "epoch": 1.05, - "learning_rate": 0.009172494231386982, - "loss": 8.1396, - "step": 243200 - }, - { - "epoch": 1.05, - "learning_rate": 0.009178128476502479, - "loss": 8.1382, - "step": 243400 - }, - { - "epoch": 1.05, - "learning_rate": 0.009183745271483879, - "loss": 8.1554, - "step": 243600 - }, - { - "epoch": 1.05, - "learning_rate": 0.009189344591693006, - "loss": 8.1526, - "step": 243800 - }, - { - "epoch": 1.05, - "learning_rate": 0.009194898547034088, - "loss": 8.1536, - "step": 244000 - }, - { - "epoch": 1.05, - "learning_rate": 0.009200462931770738, - "loss": 8.1455, - "step": 244200 - }, - { - "epoch": 1.05, - "learning_rate": 0.009206009768402789, - "loss": 8.1525, - "step": 244400 - }, - { - "epoch": 1.05, - "learning_rate": 0.009211539032598942, - "loss": 8.156, - "step": 244600 - }, - { - "epoch": 1.05, - "learning_rate": 0.009217050700104973, - "loss": 8.1408, - "step": 244800 - }, - { - "epoch": 1.05, - "learning_rate": 0.009222544746743853, - "loss": 8.1475, - "step": 245000 - }, - { - "epoch": 1.05, - "learning_rate": 0.009228021148415849, - "loss": 8.1482, - "step": 245200 - }, - { - "epoch": 1.05, - "learning_rate": 0.009233479881098617, - "loss": 8.1449, - "step": 245400 - }, - { - "epoch": 1.06, - "learning_rate": 0.009238920920847328, - "loss": 8.1364, - "step": 245600 - }, - { - "epoch": 1.06, - "learning_rate": 0.009244344243794764, - "loss": 8.1518, - "step": 245800 - }, - { - "epoch": 1.06, - "learning_rate": 0.009249722842408603, - "loss": 8.1493, - "step": 246000 - }, - { - "epoch": 1.06, - "learning_rate": 0.009255110749343147, - "loss": 8.1652, - "step": 246200 - }, - { - "epoch": 1.06, - "learning_rate": 0.00926048086845943, - "loss": 8.1556, - "step": 246400 - }, - { - "epoch": 1.06, - "learning_rate": 0.009265833176201327, - "loss": 8.1591, - "step": 246600 - }, - { - "epoch": 1.06, - "learning_rate": 0.009271167649090839, - "loss": 8.1329, - "step": 246800 - }, - { - "epoch": 1.06, - "learning_rate": 0.009276484263728204, - "loss": 8.1457, - "step": 247000 - }, - { - "epoch": 1.06, - "learning_rate": 0.009281782996791993, - "loss": 8.1417, - "step": 247200 - }, - { - "epoch": 1.06, - "learning_rate": 0.009287063825039214, - "loss": 8.1471, - "step": 247400 - }, - { - "epoch": 1.06, - "learning_rate": 0.009292326725305418, - "loss": 8.1466, - "step": 247600 - }, - { - "epoch": 1.06, - "learning_rate": 0.009297571674504797, - "loss": 8.1479, - "step": 247800 - }, - { - "epoch": 1.07, - "learning_rate": 0.00930279864963028, - "loss": 8.1592, - "step": 248000 - }, - { - "epoch": 1.07, - "learning_rate": 0.009307981627668403, - "loss": 8.1425, - "step": 248200 - }, - { - "epoch": 1.07, - "learning_rate": 0.00931317267609634, - "loss": 8.1544, - "step": 248400 - }, - { - "epoch": 1.07, - "learning_rate": 0.009318345682016303, - "loss": 8.1482, - "step": 248600 - }, - { - "epoch": 1.07, - "learning_rate": 0.009323500622736803, - "loss": 8.1354, - "step": 248800 - }, - { - "epoch": 1.07, - "learning_rate": 0.009328637475645598, - "loss": 8.1473, - "step": 249000 - }, - { - "epoch": 1.07, - "learning_rate": 0.00933375621820979, - "loss": 8.1451, - "step": 249200 - }, - { - "epoch": 1.07, - "learning_rate": 0.009338856827975918, - "loss": 8.1473, - "step": 249400 - }, - { - "epoch": 1.07, - "learning_rate": 0.009343939282570065, - "loss": 8.1493, - "step": 249600 - }, - { - "epoch": 1.07, - "learning_rate": 0.009349003559697948, - "loss": 8.1561, - "step": 249800 - }, - { - "epoch": 1.07, - "learning_rate": 0.009354049637145022, - "loss": 8.1603, - "step": 250000 - }, - { - "epoch": 1.08, - "learning_rate": 0.009359052398861702, - "loss": 8.1359, - "step": 250200 - }, - { - "epoch": 1.08, - "learning_rate": 0.009364062101897034, - "loss": 8.1473, - "step": 250400 - }, - { - "epoch": 1.08, - "learning_rate": 0.009369053539196983, - "loss": 8.1438, - "step": 250600 - }, - { - "epoch": 1.08, - "learning_rate": 0.00937402668886652, - "loss": 8.1452, - "step": 250800 - }, - { - "epoch": 1.08, - "learning_rate": 0.009378981529090828, - "loss": 8.1485, - "step": 251000 - }, - { - "epoch": 1.08, - "learning_rate": 0.009383918038135413, - "loss": 8.159, - "step": 251200 - }, - { - "epoch": 1.08, - "learning_rate": 0.009388836194346183, - "loss": 8.1468, - "step": 251400 - }, - { - "epoch": 1.08, - "learning_rate": 0.009393735976149557, - "loss": 8.1418, - "step": 251600 - }, - { - "epoch": 1.08, - "learning_rate": 0.00939861736205255, - "loss": 8.1352, - "step": 251800 - }, - { - "epoch": 1.08, - "learning_rate": 0.009403480330642873, - "loss": 8.1534, - "step": 252000 - }, - { - "epoch": 1.08, - "learning_rate": 0.009408300683840609, - "loss": 8.1547, - "step": 252200 - }, - { - "epoch": 1.08, - "learning_rate": 0.009413126846244171, - "loss": 8.1511, - "step": 252400 - }, - { - "epoch": 1.09, - "learning_rate": 0.009417934527688942, - "loss": 8.1381, - "step": 252600 - }, - { - "epoch": 1.09, - "learning_rate": 0.009422723707085941, - "loss": 8.1443, - "step": 252800 - }, - { - "epoch": 1.09, - "learning_rate": 0.009427494363427342, - "loss": 8.1442, - "step": 253000 - }, - { - "epoch": 1.09, - "learning_rate": 0.009432246475786572, - "loss": 8.1472, - "step": 253200 - }, - { - "epoch": 1.09, - "learning_rate": 0.009436980023318404, - "loss": 8.1464, - "step": 253400 - }, - { - "epoch": 1.09, - "learning_rate": 0.009441694985259043, - "loss": 8.1456, - "step": 253600 - }, - { - "epoch": 1.09, - "learning_rate": 0.009446391340926223, - "loss": 8.1412, - "step": 253800 - }, - { - "epoch": 1.09, - "learning_rate": 0.00945106906971929, - "loss": 8.1361, - "step": 254000 - }, - { - "epoch": 1.09, - "learning_rate": 0.009455704902131537, - "loss": 8.1399, - "step": 254200 - }, - { - "epoch": 1.09, - "learning_rate": 0.009460345409091202, - "loss": 8.1381, - "step": 254400 - }, - { - "epoch": 1.09, - "learning_rate": 0.009464967227966976, - "loss": 8.1487, - "step": 254600 - }, - { - "epoch": 1.09, - "learning_rate": 0.00946957033848517, - "loss": 8.1515, - "step": 254800 - }, - { - "epoch": 1.1, - "learning_rate": 0.009474154720454151, - "loss": 8.1382, - "step": 255000 - }, - { - "epoch": 1.1, - "learning_rate": 0.009478720353764443, - "loss": 8.141, - "step": 255200 - }, - { - "epoch": 1.1, - "learning_rate": 0.009483267218388814, - "loss": 8.1325, - "step": 255400 - }, - { - "epoch": 1.1, - "learning_rate": 0.009487795294382354, - "loss": 8.1515, - "step": 255600 - }, - { - "epoch": 1.1, - "learning_rate": 0.009492282062363955, - "loss": 8.1507, - "step": 255800 - }, - { - "epoch": 1.1, - "learning_rate": 0.009496772595781305, - "loss": 8.1444, - "step": 256000 - }, - { - "epoch": 1.1, - "learning_rate": 0.009501244281326239, - "loss": 8.1402, - "step": 256200 - }, - { - "epoch": 1.1, - "learning_rate": 0.009505697099383627, - "loss": 8.1254, - "step": 256400 - }, - { - "epoch": 1.1, - "learning_rate": 0.009510131030421094, - "loss": 8.1313, - "step": 256600 - }, - { - "epoch": 1.1, - "learning_rate": 0.009514546054989127, - "loss": 8.1507, - "step": 256800 - }, - { - "epoch": 1.1, - "learning_rate": 0.009518942153721136, - "loss": 8.1466, - "step": 257000 - }, - { - "epoch": 1.11, - "learning_rate": 0.009523319307333552, - "loss": 8.1353, - "step": 257200 - }, - { - "epoch": 1.11, - "learning_rate": 0.009527677496625909, - "loss": 8.1343, - "step": 257400 - }, - { - "epoch": 1.11, - "learning_rate": 0.00953201670248093, - "loss": 8.1295, - "step": 257600 - }, - { - "epoch": 1.11, - "learning_rate": 0.009536336905864612, - "loss": 8.1375, - "step": 257800 - }, - { - "epoch": 1.11, - "learning_rate": 0.009540638087826297, - "loss": 8.1373, - "step": 258000 - }, - { - "epoch": 1.11, - "learning_rate": 0.009544920229498774, - "loss": 8.1396, - "step": 258200 - }, - { - "epoch": 1.11, - "learning_rate": 0.00954918331209835, - "loss": 8.1268, - "step": 258400 - }, - { - "epoch": 1.11, - "learning_rate": 0.009553427316924932, - "loss": 8.1281, - "step": 258600 - }, - { - "epoch": 1.11, - "learning_rate": 0.009557652225362116, - "loss": 8.1305, - "step": 258800 - }, - { - "epoch": 1.11, - "learning_rate": 0.009561858018877264, - "loss": 8.1377, - "step": 259000 - }, - { - "epoch": 1.11, - "learning_rate": 0.009566044679021583, - "loss": 8.1287, - "step": 259200 - }, - { - "epoch": 1.11, - "learning_rate": 0.009570212187430212, - "loss": 8.1329, - "step": 259400 - }, - { - "epoch": 1.12, - "learning_rate": 0.009574360525822296, - "loss": 8.1338, - "step": 259600 - }, - { - "epoch": 1.12, - "learning_rate": 0.009578469078010859, - "loss": 8.1408, - "step": 259800 - }, - { - "epoch": 1.12, - "learning_rate": 0.009582579117940285, - "loss": 8.132, - "step": 260000 - }, - { - "epoch": 1.12, - "learning_rate": 0.009586669933605397, - "loss": 8.1305, - "step": 260200 - }, - { - "epoch": 1.12, - "learning_rate": 0.009590741507061759, - "loss": 8.1208, - "step": 260400 - }, - { - "epoch": 1.12, - "learning_rate": 0.009594793820449338, - "loss": 8.1265, - "step": 260600 - }, - { - "epoch": 1.12, - "learning_rate": 0.009598826855992585, - "loss": 8.1427, - "step": 260800 - }, - { - "epoch": 1.12, - "learning_rate": 0.009602840596000518, - "loss": 8.1346, - "step": 261000 - }, - { - "epoch": 1.12, - "learning_rate": 0.00960683502286679, - "loss": 8.1269, - "step": 261200 - }, - { - "epoch": 1.12, - "learning_rate": 0.009610810119069776, - "loss": 8.1414, - "step": 261400 - }, - { - "epoch": 1.12, - "learning_rate": 0.009614765867172638, - "loss": 8.1255, - "step": 261600 - }, - { - "epoch": 1.13, - "learning_rate": 0.009618682616110326, - "loss": 8.1226, - "step": 261800 - }, - { - "epoch": 1.13, - "learning_rate": 0.009622580175756732, - "loss": 8.1375, - "step": 262000 - }, - { - "epoch": 1.13, - "learning_rate": 0.009626477969871108, - "loss": 8.1416, - "step": 262200 - }, - { - "epoch": 1.13, - "learning_rate": 0.009630356347158045, - "loss": 8.1342, - "step": 262400 - }, - { - "epoch": 1.13, - "learning_rate": 0.00963421529060497, - "loss": 8.1211, - "step": 262600 - }, - { - "epoch": 1.13, - "learning_rate": 0.009638054783284557, - "loss": 8.1331, - "step": 262800 - }, - { - "epoch": 1.13, - "learning_rate": 0.009641874808354801, - "loss": 8.127, - "step": 263000 - }, - { - "epoch": 1.13, - "learning_rate": 0.00964567534905909, - "loss": 8.1231, - "step": 263200 - }, - { - "epoch": 1.13, - "learning_rate": 0.009649456388726285, - "loss": 8.1197, - "step": 263400 - }, - { - "epoch": 1.13, - "learning_rate": 0.009653217910770779, - "loss": 8.137, - "step": 263600 - }, - { - "epoch": 1.13, - "learning_rate": 0.009656959898692592, - "loss": 8.1307, - "step": 263800 - }, - { - "epoch": 1.13, - "learning_rate": 0.009660682336077423, - "loss": 8.1272, - "step": 264000 - }, - { - "epoch": 1.14, - "learning_rate": 0.009664385206596732, - "loss": 8.1313, - "step": 264200 - }, - { - "epoch": 1.14, - "learning_rate": 0.009668068494007809, - "loss": 8.1261, - "step": 264400 - }, - { - "epoch": 1.14, - "learning_rate": 0.009671732182153845, - "loss": 8.1431, - "step": 264600 - }, - { - "epoch": 1.14, - "learning_rate": 0.009675376254964004, - "loss": 8.1366, - "step": 264800 - }, - { - "epoch": 1.14, - "learning_rate": 0.009679000696453492, - "loss": 8.1389, - "step": 265000 - }, - { - "epoch": 1.14, - "learning_rate": 0.009682605490723634, - "loss": 8.1276, - "step": 265200 - }, - { - "epoch": 1.14, - "learning_rate": 0.00968617274524358, - "loss": 8.1276, - "step": 265400 - }, - { - "epoch": 1.14, - "learning_rate": 0.009689738296156552, - "loss": 8.1342, - "step": 265600 - }, - { - "epoch": 1.14, - "learning_rate": 0.009693284152749497, - "loss": 8.1304, - "step": 265800 - }, - { - "epoch": 1.14, - "learning_rate": 0.009696810299468447, - "loss": 8.1159, - "step": 266000 - }, - { - "epoch": 1.14, - "learning_rate": 0.009700316720845896, - "loss": 8.1305, - "step": 266200 - }, - { - "epoch": 1.14, - "learning_rate": 0.009703803401500859, - "loss": 8.1158, - "step": 266400 - }, - { - "epoch": 1.15, - "learning_rate": 0.009707270326138951, - "loss": 8.1202, - "step": 266600 - }, - { - "epoch": 1.15, - "learning_rate": 0.009710717479552442, - "loss": 8.1324, - "step": 266800 - }, - { - "epoch": 1.15, - "learning_rate": 0.009714144846620327, - "loss": 8.1265, - "step": 267000 - }, - { - "epoch": 1.15, - "learning_rate": 0.0097175524123084, - "loss": 8.1109, - "step": 267200 - }, - { - "epoch": 1.15, - "learning_rate": 0.009720940161669309, - "loss": 8.1358, - "step": 267400 - }, - { - "epoch": 1.15, - "learning_rate": 0.00972430807984263, - "loss": 8.1444, - "step": 267600 - }, - { - "epoch": 1.15, - "learning_rate": 0.009727656152054929, - "loss": 8.1187, - "step": 267800 - }, - { - "epoch": 1.15, - "learning_rate": 0.009730984363619822, - "loss": 8.1431, - "step": 268000 - }, - { - "epoch": 1.15, - "learning_rate": 0.009734292699938049, - "loss": 8.137, - "step": 268200 - }, - { - "epoch": 1.15, - "learning_rate": 0.009737581146497533, - "loss": 8.1205, - "step": 268400 - }, - { - "epoch": 1.15, - "learning_rate": 0.00974084968887344, - "loss": 8.1233, - "step": 268600 - }, - { - "epoch": 1.16, - "learning_rate": 0.009744098312728251, - "loss": 8.1275, - "step": 268800 - }, - { - "epoch": 1.16, - "learning_rate": 0.009747327003811816, - "loss": 8.1314, - "step": 269000 - }, - { - "epoch": 1.16, - "learning_rate": 0.009750535747961421, - "loss": 8.1349, - "step": 269200 - }, - { - "epoch": 1.16, - "learning_rate": 0.009753708636862335, - "loss": 8.1311, - "step": 269400 - }, - { - "epoch": 1.16, - "learning_rate": 0.009756877544915568, - "loss": 8.1165, - "step": 269600 - }, - { - "epoch": 1.16, - "learning_rate": 0.009760026464141218, - "loss": 8.1199, - "step": 269800 - }, - { - "epoch": 1.16, - "learning_rate": 0.009763155380726491, - "loss": 8.1304, - "step": 270000 - }, - { - "epoch": 1.16, - "learning_rate": 0.009766264280946338, - "loss": 8.1227, - "step": 270200 - }, - { - "epoch": 1.16, - "learning_rate": 0.009769353151163509, - "loss": 8.1176, - "step": 270400 - }, - { - "epoch": 1.16, - "learning_rate": 0.009772421977828618, - "loss": 8.1195, - "step": 270600 - }, - { - "epoch": 1.16, - "learning_rate": 0.0097754707474802, - "loss": 8.1284, - "step": 270800 - }, - { - "epoch": 1.16, - "learning_rate": 0.009778499446744773, - "loss": 8.1142, - "step": 271000 - }, - { - "epoch": 1.17, - "learning_rate": 0.009781508062336889, - "loss": 8.1313, - "step": 271200 - }, - { - "epoch": 1.17, - "learning_rate": 0.009784481688478276, - "loss": 8.1223, - "step": 271400 - }, - { - "epoch": 1.17, - "learning_rate": 0.009787450197803948, - "loss": 8.1243, - "step": 271600 - }, - { - "epoch": 1.17, - "learning_rate": 0.00979039858419453, - "loss": 8.1228, - "step": 271800 - }, - { - "epoch": 1.17, - "learning_rate": 0.00979332683471687, - "loss": 8.1401, - "step": 272000 - }, - { - "epoch": 1.17, - "learning_rate": 0.009796234936526142, - "loss": 8.1253, - "step": 272200 - }, - { - "epoch": 1.17, - "learning_rate": 0.009799122876865903, - "loss": 8.1222, - "step": 272400 - }, - { - "epoch": 1.17, - "learning_rate": 0.009801990643068153, - "loss": 8.116, - "step": 272600 - }, - { - "epoch": 1.17, - "learning_rate": 0.009804838222553378, - "loss": 8.1256, - "step": 272800 - }, - { - "epoch": 1.17, - "learning_rate": 0.00980766560283062, - "loss": 8.1362, - "step": 273000 - }, - { - "epoch": 1.17, - "learning_rate": 0.009810472771497523, - "loss": 8.1128, - "step": 273200 - }, - { - "epoch": 1.17, - "learning_rate": 0.009813245831843946, - "loss": 8.1211, - "step": 273400 - }, - { - "epoch": 1.18, - "learning_rate": 0.0098160126416488, - "loss": 8.1193, - "step": 273600 - }, - { - "epoch": 1.18, - "learning_rate": 0.009818759203228876, - "loss": 8.1163, - "step": 273800 - }, - { - "epoch": 1.18, - "learning_rate": 0.009821485504536329, - "loss": 8.1311, - "step": 274000 - }, - { - "epoch": 1.18, - "learning_rate": 0.009824191533612189, - "loss": 8.1275, - "step": 274200 - }, - { - "epoch": 1.18, - "learning_rate": 0.009826877278586409, - "loss": 8.1236, - "step": 274400 - }, - { - "epoch": 1.18, - "learning_rate": 0.009829542727677918, - "loss": 8.1244, - "step": 274600 - }, - { - "epoch": 1.18, - "learning_rate": 0.009832187869194675, - "loss": 8.1203, - "step": 274800 - }, - { - "epoch": 1.18, - "learning_rate": 0.009834812691533721, - "loss": 8.1292, - "step": 275000 - }, - { - "epoch": 1.18, - "learning_rate": 0.009837417183181221, - "loss": 8.1279, - "step": 275200 - }, - { - "epoch": 1.18, - "learning_rate": 0.00983998846258467, - "loss": 8.1192, - "step": 275400 - }, - { - "epoch": 1.18, - "learning_rate": 0.009842552360459663, - "loss": 8.1283, - "step": 275600 - }, - { - "epoch": 1.19, - "learning_rate": 0.009845095893692913, - "loss": 8.123, - "step": 275800 - }, - { - "epoch": 1.19, - "learning_rate": 0.009847619051127158, - "loss": 8.1158, - "step": 276000 - }, - { - "epoch": 1.19, - "learning_rate": 0.009850109358572218, - "loss": 8.1215, - "step": 276200 - }, - { - "epoch": 1.19, - "learning_rate": 0.009852591833310647, - "loss": 8.125, - "step": 276400 - }, - { - "epoch": 1.19, - "learning_rate": 0.009855053899368997, - "loss": 8.1246, - "step": 276600 - }, - { - "epoch": 1.19, - "learning_rate": 0.009857495545947366, - "loss": 8.1158, - "step": 276800 - }, - { - "epoch": 1.19, - "learning_rate": 0.009859916762335432, - "loss": 8.1245, - "step": 277000 - }, - { - "epoch": 1.19, - "learning_rate": 0.00986231753791248, - "loss": 8.1072, - "step": 277200 - }, - { - "epoch": 1.19, - "learning_rate": 0.009864697862147468, - "loss": 8.1148, - "step": 277400 - }, - { - "epoch": 1.19, - "learning_rate": 0.009867057724599056, - "loss": 8.127, - "step": 277600 - }, - { - "epoch": 1.19, - "learning_rate": 0.009869397114915666, - "loss": 8.1155, - "step": 277800 - }, - { - "epoch": 1.19, - "learning_rate": 0.009871716022835517, - "loss": 8.1186, - "step": 278000 - }, - { - "epoch": 1.2, - "learning_rate": 0.009874014438186677, - "loss": 8.1117, - "step": 278200 - }, - { - "epoch": 1.2, - "learning_rate": 0.009876292350887108, - "loss": 8.1123, - "step": 278400 - }, - { - "epoch": 1.2, - "learning_rate": 0.009878549750944697, - "loss": 8.111, - "step": 278600 - }, - { - "epoch": 1.2, - "learning_rate": 0.009880786628457323, - "loss": 8.1242, - "step": 278800 - }, - { - "epoch": 1.2, - "learning_rate": 0.009883002973612881, - "loss": 8.1179, - "step": 279000 - }, - { - "epoch": 1.2, - "learning_rate": 0.00988519877668933, - "loss": 8.1149, - "step": 279200 - }, - { - "epoch": 1.2, - "learning_rate": 0.009887374028054738, - "loss": 8.1159, - "step": 279400 - }, - { - "epoch": 1.2, - "learning_rate": 0.009889517995878554, - "loss": 8.1177, - "step": 279600 - }, - { - "epoch": 1.2, - "learning_rate": 0.00989165221816362, - "loss": 8.1155, - "step": 279800 - }, - { - "epoch": 1.2, - "learning_rate": 0.009893765860429504, - "loss": 8.1133, - "step": 280000 - }, - { - "epoch": 1.2, - "learning_rate": 0.00989585891340468, - "loss": 8.1168, - "step": 280200 - }, - { - "epoch": 1.2, - "learning_rate": 0.009897931367907929, - "loss": 8.1115, - "step": 280400 - }, - { - "epoch": 1.21, - "learning_rate": 0.009899983214848392, - "loss": 8.1096, - "step": 280600 - }, - { - "epoch": 1.21, - "learning_rate": 0.009902014445225605, - "loss": 8.108, - "step": 280800 - }, - { - "epoch": 1.21, - "learning_rate": 0.009904025050129538, - "loss": 8.1242, - "step": 281000 - }, - { - "epoch": 1.21, - "learning_rate": 0.009906015020740634, - "loss": 8.1112, - "step": 281200 - }, - { - "epoch": 1.21, - "learning_rate": 0.009907984348329853, - "loss": 8.107, - "step": 281400 - }, - { - "epoch": 1.21, - "learning_rate": 0.0099099330242587, - "loss": 8.1117, - "step": 281600 - }, - { - "epoch": 1.21, - "learning_rate": 0.009911861039979274, - "loss": 8.1151, - "step": 281800 - }, - { - "epoch": 1.21, - "learning_rate": 0.009913768387034299, - "loss": 8.1082, - "step": 282000 - }, - { - "epoch": 1.21, - "learning_rate": 0.009915655057057158, - "loss": 8.1111, - "step": 282200 - }, - { - "epoch": 1.21, - "learning_rate": 0.009917521041771944, - "loss": 8.1164, - "step": 282400 - }, - { - "epoch": 1.21, - "learning_rate": 0.009919366332993475, - "loss": 8.1196, - "step": 282600 - }, - { - "epoch": 1.22, - "learning_rate": 0.009921190922627351, - "loss": 8.1087, - "step": 282800 - }, - { - "epoch": 1.22, - "learning_rate": 0.009922994802669973, - "loss": 8.1115, - "step": 283000 - }, - { - "epoch": 1.22, - "learning_rate": 0.009924777965208591, - "loss": 8.1151, - "step": 283200 - }, - { - "epoch": 1.22, - "learning_rate": 0.009926540402421326, - "loss": 8.1221, - "step": 283400 - }, - { - "epoch": 1.22, - "learning_rate": 0.00992826479218945, - "loss": 8.1216, - "step": 283600 - }, - { - "epoch": 1.22, - "learning_rate": 0.009929985963092967, - "loss": 8.1149, - "step": 283800 - }, - { - "epoch": 1.22, - "learning_rate": 0.009931686385825624, - "loss": 8.1164, - "step": 284000 - }, - { - "epoch": 1.22, - "learning_rate": 0.009933366052928484, - "loss": 8.1185, - "step": 284200 - }, - { - "epoch": 1.22, - "learning_rate": 0.009935024957033656, - "loss": 8.1081, - "step": 284400 - }, - { - "epoch": 1.22, - "learning_rate": 0.009936663090864328, - "loss": 8.1121, - "step": 284600 - }, - { - "epoch": 1.22, - "learning_rate": 0.009938280447234797, - "loss": 8.1176, - "step": 284800 - }, - { - "epoch": 1.22, - "learning_rate": 0.009939877019050497, - "loss": 8.1054, - "step": 285000 - }, - { - "epoch": 1.23, - "learning_rate": 0.009941452799308036, - "loss": 8.1126, - "step": 285200 - }, - { - "epoch": 1.23, - "learning_rate": 0.009943007781095227, - "loss": 8.129, - "step": 285400 - }, - { - "epoch": 1.23, - "learning_rate": 0.00994454195759111, - "loss": 8.1091, - "step": 285600 - }, - { - "epoch": 1.23, - "learning_rate": 0.009946055322065996, - "loss": 8.1113, - "step": 285800 - }, - { - "epoch": 1.23, - "learning_rate": 0.00994754786788148, - "loss": 8.1119, - "step": 286000 - }, - { - "epoch": 1.23, - "learning_rate": 0.009949019588490484, - "loss": 8.131, - "step": 286200 - }, - { - "epoch": 1.23, - "learning_rate": 0.009950470477437278, - "loss": 8.1115, - "step": 286400 - }, - { - "epoch": 1.23, - "learning_rate": 0.009951900528357512, - "loss": 8.1081, - "step": 286600 - }, - { - "epoch": 1.23, - "learning_rate": 0.009953309734978238, - "loss": 8.1129, - "step": 286800 - }, - { - "epoch": 1.23, - "learning_rate": 0.00995469809111795, - "loss": 8.1179, - "step": 287000 - }, - { - "epoch": 1.23, - "learning_rate": 0.009956065590686593, - "loss": 8.1144, - "step": 287200 - }, - { - "epoch": 1.24, - "learning_rate": 0.009957412227685607, - "loss": 8.108, - "step": 287400 - }, - { - "epoch": 1.24, - "learning_rate": 0.009958731419285327, - "loss": 8.1174, - "step": 287600 - }, - { - "epoch": 1.24, - "learning_rate": 0.009960036417901256, - "loss": 8.1122, - "step": 287800 - }, - { - "epoch": 1.24, - "learning_rate": 0.009961320536529447, - "loss": 8.1073, - "step": 288000 - }, - { - "epoch": 1.24, - "learning_rate": 0.009962583769537093, - "loss": 8.1126, - "step": 288200 - }, - { - "epoch": 1.24, - "learning_rate": 0.009963826111382999, - "loss": 8.1208, - "step": 288400 - }, - { - "epoch": 1.24, - "learning_rate": 0.009965047556617607, - "loss": 8.1068, - "step": 288600 - }, - { - "epoch": 1.24, - "learning_rate": 0.009966248099883028, - "loss": 8.1157, - "step": 288800 - }, - { - "epoch": 1.24, - "learning_rate": 0.009967427735913055, - "loss": 8.1101, - "step": 289000 - }, - { - "epoch": 1.24, - "learning_rate": 0.009968586459533193, - "loss": 8.1252, - "step": 289200 - }, - { - "epoch": 1.24, - "learning_rate": 0.00996972426566068, - "loss": 8.1127, - "step": 289400 - }, - { - "epoch": 1.24, - "learning_rate": 0.009970835616939108, - "loss": 8.1273, - "step": 289600 - }, - { - "epoch": 1.25, - "learning_rate": 0.009971931677848996, - "loss": 8.1141, - "step": 289800 - }, - { - "epoch": 1.25, - "learning_rate": 0.009973006806592369, - "loss": 8.1118, - "step": 290000 - }, - { - "epoch": 1.25, - "learning_rate": 0.009974060998453157, - "loss": 8.1112, - "step": 290200 - }, - { - "epoch": 1.25, - "learning_rate": 0.009975094248807126, - "loss": 8.121, - "step": 290400 - }, - { - "epoch": 1.25, - "learning_rate": 0.009976106553121908, - "loss": 8.1051, - "step": 290600 - }, - { - "epoch": 1.25, - "learning_rate": 0.009977097906957009, - "loss": 8.1083, - "step": 290800 - }, - { - "epoch": 1.25, - "learning_rate": 0.009978068305963834, - "loss": 8.1075, - "step": 291000 - }, - { - "epoch": 1.25, - "learning_rate": 0.009979017745885714, - "loss": 8.1134, - "step": 291200 - }, - { - "epoch": 1.25, - "learning_rate": 0.009979946222557913, - "loss": 8.1196, - "step": 291400 - }, - { - "epoch": 1.25, - "learning_rate": 0.009980849246523739, - "loss": 8.1114, - "step": 291600 - }, - { - "epoch": 1.25, - "learning_rate": 0.009981735889436474, - "loss": 8.1118, - "step": 291800 - }, - { - "epoch": 1.25, - "learning_rate": 0.00998260155717634, - "loss": 8.1038, - "step": 292000 - }, - { - "epoch": 1.26, - "learning_rate": 0.009983446245946069, - "loss": 8.1087, - "step": 292200 - }, - { - "epoch": 1.26, - "learning_rate": 0.00998426995204042, - "loss": 8.0971, - "step": 292400 - }, - { - "epoch": 1.26, - "learning_rate": 0.009985072671846187, - "loss": 8.1061, - "step": 292600 - }, - { - "epoch": 1.26, - "learning_rate": 0.00998585440184223, - "loss": 8.108, - "step": 292800 - }, - { - "epoch": 1.26, - "learning_rate": 0.009986615138599473, - "loss": 8.1036, - "step": 293000 - }, - { - "epoch": 1.26, - "learning_rate": 0.009987354878780929, - "loss": 8.1177, - "step": 293200 - }, - { - "epoch": 1.26, - "learning_rate": 0.009988073619141717, - "loss": 8.1136, - "step": 293400 - }, - { - "epoch": 1.26, - "learning_rate": 0.009988767920092128, - "loss": 8.1232, - "step": 293600 - }, - { - "epoch": 1.26, - "learning_rate": 0.009989444756483034, - "loss": 8.1003, - "step": 293800 - }, - { - "epoch": 1.26, - "learning_rate": 0.009990100583885983, - "loss": 8.1102, - "step": 294000 - }, - { - "epoch": 1.26, - "learning_rate": 0.009990735399424178, - "loss": 8.1173, - "step": 294200 - }, - { - "epoch": 1.27, - "learning_rate": 0.009991349200312985, - "loss": 8.1126, - "step": 294400 - }, - { - "epoch": 1.27, - "learning_rate": 0.00999194198385996, - "loss": 8.1186, - "step": 294600 - }, - { - "epoch": 1.27, - "learning_rate": 0.009992513747464842, - "loss": 8.1033, - "step": 294800 - }, - { - "epoch": 1.27, - "learning_rate": 0.009993064488619582, - "loss": 8.099, - "step": 295000 - }, - { - "epoch": 1.27, - "learning_rate": 0.009993594204908344, - "loss": 8.1082, - "step": 295200 - }, - { - "epoch": 1.27, - "learning_rate": 0.009994100402870885, - "loss": 8.1142, - "step": 295400 - }, - { - "epoch": 1.27, - "learning_rate": 0.009994588167701598, - "loss": 8.1204, - "step": 295600 - }, - { - "epoch": 1.27, - "learning_rate": 0.009995054900982684, - "loss": 8.1004, - "step": 295800 - }, - { - "epoch": 1.27, - "learning_rate": 0.009995500600666814, - "loss": 8.1129, - "step": 296000 - }, - { - "epoch": 1.27, - "learning_rate": 0.009995925264798913, - "loss": 8.1096, - "step": 296200 - }, - { - "epoch": 1.27, - "learning_rate": 0.009996328891516186, - "loss": 8.1105, - "step": 296400 - }, - { - "epoch": 1.27, - "learning_rate": 0.009996711479048117, - "loss": 8.1001, - "step": 296600 - }, - { - "epoch": 1.28, - "learning_rate": 0.009997073025716478, - "loss": 8.1125, - "step": 296800 - }, - { - "epoch": 1.28, - "learning_rate": 0.009997413529935339, - "loss": 8.1103, - "step": 297000 - }, - { - "epoch": 1.28, - "learning_rate": 0.009997732990211072, - "loss": 8.1048, - "step": 297200 - }, - { - "epoch": 1.28, - "learning_rate": 0.009998031405142356, - "loss": 8.1077, - "step": 297400 - }, - { - "epoch": 1.28, - "learning_rate": 0.009998308773420192, - "loss": 8.1068, - "step": 297600 - }, - { - "epoch": 1.28, - "learning_rate": 0.009998565093827898, - "loss": 8.1048, - "step": 297800 - }, - { - "epoch": 1.28, - "learning_rate": 0.009998800365241116, - "loss": 8.1018, - "step": 298000 - }, - { - "epoch": 1.28, - "learning_rate": 0.009999014586627828, - "loss": 8.0989, - "step": 298200 - }, - { - "epoch": 1.28, - "learning_rate": 0.009999207757048347, - "loss": 8.1034, - "step": 298400 - }, - { - "epoch": 1.28, - "learning_rate": 0.009999379875655327, - "loss": 8.1058, - "step": 298600 - }, - { - "epoch": 1.28, - "learning_rate": 0.009999530941693766, - "loss": 8.0988, - "step": 298800 - }, - { - "epoch": 1.28, - "learning_rate": 0.00999966095450101, - "loss": 8.1038, - "step": 299000 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999769913506757, - "loss": 8.1068, - "step": 299200 - }, - { - "epoch": 1.29, - "learning_rate": 0.00999985743108262, - "loss": 8.1005, - "step": 299400 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999924386418004, - "loss": 8.1009, - "step": 299600 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999970286796342, - "loss": 8.105, - "step": 299800 - }, - { - "epoch": 1.29, - "learning_rate": 0.00999999513201629, - "loss": 8.1075, - "step": 300000 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999999793746236, - "loss": 8.1018, - "step": 300200 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999996490463693, - "loss": 8.1058, - "step": 300400 - }, - { - "epoch": 1.29, - "learning_rate": 0.0099999891587904, - "loss": 8.1008, - "step": 300600 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999977798732266, - "loss": 8.0989, - "step": 300800 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999962410298444, - "loss": 8.0856, - "step": 301000 - }, - { - "epoch": 1.29, - "learning_rate": 0.009999942993501333, - "loss": 8.1074, - "step": 301200 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999919675602783, - "loss": 8.0999, - "step": 301400 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999892222270858, - "loss": 8.106, - "step": 301600 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999860740632194, - "loss": 8.0983, - "step": 301800 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999825230712151, - "loss": 8.107, - "step": 302000 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999785692539342, - "loss": 8.109, - "step": 302200 - }, - { - "epoch": 1.3, - "learning_rate": 0.00999974212614562, - "loss": 8.1154, - "step": 302400 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999694531566091, - "loss": 8.0995, - "step": 302600 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999642908839092, - "loss": 8.0961, - "step": 302800 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999587258006221, - "loss": 8.103, - "step": 303000 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999527579112315, - "loss": 8.1004, - "step": 303200 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999464200759586, - "loss": 8.1009, - "step": 303400 - }, - { - "epoch": 1.3, - "learning_rate": 0.009999396486030772, - "loss": 8.116, - "step": 303600 - }, - { - "epoch": 1.31, - "learning_rate": 0.009999324743394622, - "loss": 8.1079, - "step": 303800 - }, - { - "epoch": 1.31, - "learning_rate": 0.00999924897290894, - "loss": 8.0983, - "step": 304000 - }, - { - "epoch": 1.31, - "learning_rate": 0.00999916917463477, - "loss": 8.0989, - "step": 304200 - }, - { - "epoch": 1.31, - "learning_rate": 0.009999085348636408, - "loss": 8.1003, - "step": 304400 - }, - { - "epoch": 1.31, - "learning_rate": 0.009998997494981387, - "loss": 8.113, - "step": 304600 - }, - { - "epoch": 1.31, - "learning_rate": 0.00999890561374049, - "loss": 8.1073, - "step": 304800 - }, - { - "epoch": 1.31, - "learning_rate": 0.009998809704987744, - "loss": 8.0997, - "step": 305000 - }, - { - "epoch": 1.31, - "learning_rate": 0.009998709768800422, - "loss": 8.088, - "step": 305200 - }, - { - "epoch": 1.31, - "learning_rate": 0.009998606335094657, - "loss": 8.0992, - "step": 305400 - }, - { - "epoch": 1.31, - "learning_rate": 0.009998498364419114, - "loss": 8.1141, - "step": 305600 - }, - { - "epoch": 1.31, - "learning_rate": 0.00999838636655984, - "loss": 8.0966, - "step": 305800 - }, - { - "epoch": 1.31, - "learning_rate": 0.009998270341607064, - "loss": 8.0962, - "step": 306000 - }, - { - "epoch": 1.32, - "learning_rate": 0.009998150289654267, - "loss": 8.1001, - "step": 306200 - }, - { - "epoch": 1.32, - "learning_rate": 0.009998026210798172, - "loss": 8.1026, - "step": 306400 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997898105138748, - "loss": 8.1068, - "step": 306600 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997765972779208, - "loss": 8.0862, - "step": 306800 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997630504636746, - "loss": 8.1038, - "step": 307000 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997490339331727, - "loss": 8.1069, - "step": 307200 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997346147655121, - "loss": 8.0951, - "step": 307400 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997197929723102, - "loss": 8.1007, - "step": 307600 - }, - { - "epoch": 1.32, - "learning_rate": 0.009997045685655084, - "loss": 8.1087, - "step": 307800 - }, - { - "epoch": 1.32, - "learning_rate": 0.009996889415573728, - "loss": 8.0993, - "step": 308000 - }, - { - "epoch": 1.32, - "learning_rate": 0.009996729119604938, - "loss": 8.0918, - "step": 308200 - }, - { - "epoch": 1.33, - "learning_rate": 0.009996564797877862, - "loss": 8.1026, - "step": 308400 - }, - { - "epoch": 1.33, - "learning_rate": 0.009996396450524891, - "loss": 8.1062, - "step": 308600 - }, - { - "epoch": 1.33, - "learning_rate": 0.009996224077681658, - "loss": 8.1046, - "step": 308800 - }, - { - "epoch": 1.33, - "learning_rate": 0.009996047679487044, - "loss": 8.0895, - "step": 309000 - }, - { - "epoch": 1.33, - "learning_rate": 0.009995867256083165, - "loss": 8.0895, - "step": 309200 - }, - { - "epoch": 1.33, - "learning_rate": 0.00999568280761539, - "loss": 8.092, - "step": 309400 - }, - { - "epoch": 1.33, - "learning_rate": 0.009995494334232323, - "loss": 8.0908, - "step": 309600 - }, - { - "epoch": 1.33, - "learning_rate": 0.009995301836085813, - "loss": 8.1025, - "step": 309800 - }, - { - "epoch": 1.33, - "learning_rate": 0.009995105313330954, - "loss": 8.1121, - "step": 310000 - }, - { - "epoch": 1.33, - "learning_rate": 0.00999490476612608, - "loss": 8.0992, - "step": 310200 - }, - { - "epoch": 1.33, - "learning_rate": 0.009994700194632769, - "loss": 8.1112, - "step": 310400 - }, - { - "epoch": 1.33, - "learning_rate": 0.009994491599015838, - "loss": 8.1, - "step": 310600 - }, - { - "epoch": 1.34, - "learning_rate": 0.009994278979443352, - "loss": 8.102, - "step": 310800 - }, - { - "epoch": 1.34, - "learning_rate": 0.009994063429312274, - "loss": 8.1109, - "step": 311000 - }, - { - "epoch": 1.34, - "learning_rate": 0.009993842782463437, - "loss": 8.0873, - "step": 311200 - }, - { - "epoch": 1.34, - "learning_rate": 0.009993618112181784, - "loss": 8.1026, - "step": 311400 - }, - { - "epoch": 1.34, - "learning_rate": 0.009993389418648328, - "loss": 8.1025, - "step": 311600 - }, - { - "epoch": 1.34, - "learning_rate": 0.009993156702047321, - "loss": 8.094, - "step": 311800 - }, - { - "epoch": 1.34, - "learning_rate": 0.009992919962566259, - "loss": 8.0995, - "step": 312000 - }, - { - "epoch": 1.34, - "learning_rate": 0.009992679200395881, - "loss": 8.0974, - "step": 312200 - }, - { - "epoch": 1.34, - "learning_rate": 0.009992434415730164, - "loss": 8.0904, - "step": 312400 - }, - { - "epoch": 1.34, - "learning_rate": 0.009992185608766326, - "loss": 8.103, - "step": 312600 - }, - { - "epoch": 1.34, - "learning_rate": 0.009991932779704827, - "loss": 8.0962, - "step": 312800 - }, - { - "epoch": 1.35, - "learning_rate": 0.009991675928749366, - "loss": 8.0972, - "step": 313000 - }, - { - "epoch": 1.35, - "learning_rate": 0.009991415056106883, - "loss": 8.1061, - "step": 313200 - }, - { - "epoch": 1.35, - "learning_rate": 0.009991150161987558, - "loss": 8.1075, - "step": 313400 - }, - { - "epoch": 1.35, - "learning_rate": 0.009990881246604811, - "loss": 8.1173, - "step": 313600 - }, - { - "epoch": 1.35, - "learning_rate": 0.009990608310175307, - "loss": 8.0983, - "step": 313800 - }, - { - "epoch": 1.35, - "learning_rate": 0.009990331352918939, - "loss": 8.0938, - "step": 314000 - }, - { - "epoch": 1.35, - "learning_rate": 0.00999005037505885, - "loss": 8.102, - "step": 314200 - }, - { - "epoch": 1.35, - "learning_rate": 0.00998976537682142, - "loss": 8.103, - "step": 314400 - }, - { - "epoch": 1.35, - "learning_rate": 0.009989476358436266, - "loss": 8.1057, - "step": 314600 - }, - { - "epoch": 1.35, - "learning_rate": 0.009989183320136244, - "loss": 8.0909, - "step": 314800 - }, - { - "epoch": 1.35, - "learning_rate": 0.0099888877574459, - "loss": 8.0904, - "step": 315000 - }, - { - "epoch": 1.35, - "learning_rate": 0.009988586700124263, - "loss": 8.0995, - "step": 315200 - }, - { - "epoch": 1.36, - "learning_rate": 0.009988281623604541, - "loss": 8.0974, - "step": 315400 - }, - { - "epoch": 1.36, - "learning_rate": 0.009987972528132529, - "loss": 8.1009, - "step": 315600 - }, - { - "epoch": 1.36, - "learning_rate": 0.009987659413957259, - "loss": 8.0922, - "step": 315800 - }, - { - "epoch": 1.36, - "learning_rate": 0.009987342281330998, - "loss": 8.0904, - "step": 316000 - }, - { - "epoch": 1.36, - "learning_rate": 0.009987021130509258, - "loss": 8.0976, - "step": 316200 - }, - { - "epoch": 1.36, - "learning_rate": 0.009986695961750783, - "loss": 8.0903, - "step": 316400 - }, - { - "epoch": 1.36, - "learning_rate": 0.009986368431243252, - "loss": 8.1025, - "step": 316600 - }, - { - "epoch": 1.36, - "learning_rate": 0.009986035247486874, - "loss": 8.0848, - "step": 316800 - }, - { - "epoch": 1.36, - "learning_rate": 0.00998569804658807, - "loss": 8.0876, - "step": 317000 - }, - { - "epoch": 1.36, - "learning_rate": 0.009985356828818516, - "loss": 8.0889, - "step": 317200 - }, - { - "epoch": 1.36, - "learning_rate": 0.009985011594453126, - "loss": 8.0949, - "step": 317400 - }, - { - "epoch": 1.36, - "learning_rate": 0.009984662343770047, - "loss": 8.1025, - "step": 317600 - }, - { - "epoch": 1.37, - "learning_rate": 0.009984309077050666, - "loss": 8.096, - "step": 317800 - }, - { - "epoch": 1.37, - "learning_rate": 0.009983951794579601, - "loss": 8.1032, - "step": 318000 - }, - { - "epoch": 1.37, - "learning_rate": 0.009983590496644712, - "loss": 8.0975, - "step": 318200 - }, - { - "epoch": 1.37, - "learning_rate": 0.009983225183537087, - "loss": 8.1073, - "step": 318400 - }, - { - "epoch": 1.37, - "learning_rate": 0.009982855855551052, - "loss": 8.0863, - "step": 318600 - }, - { - "epoch": 1.37, - "learning_rate": 0.009982482512984172, - "loss": 8.0961, - "step": 318800 - }, - { - "epoch": 1.37, - "learning_rate": 0.009982105156137235, - "loss": 8.0901, - "step": 319000 - }, - { - "epoch": 1.37, - "learning_rate": 0.009981723785314279, - "loss": 8.0988, - "step": 319200 - }, - { - "epoch": 1.37, - "learning_rate": 0.009981338400822562, - "loss": 8.0925, - "step": 319400 - }, - { - "epoch": 1.37, - "learning_rate": 0.009980949002972582, - "loss": 8.0846, - "step": 319600 - }, - { - "epoch": 1.37, - "learning_rate": 0.009980557569114468, - "loss": 8.1035, - "step": 319800 - }, - { - "epoch": 1.38, - "learning_rate": 0.009980160165555235, - "loss": 8.0954, - "step": 320000 - }, - { - "epoch": 1.38, - "learning_rate": 0.009979758749587023, - "loss": 8.0963, - "step": 320200 - }, - { - "epoch": 1.38, - "learning_rate": 0.009979353321533243, - "loss": 8.09, - "step": 320400 - }, - { - "epoch": 1.38, - "learning_rate": 0.009978943881720544, - "loss": 8.0953, - "step": 320600 - }, - { - "epoch": 1.38, - "learning_rate": 0.009978530430478805, - "loss": 8.085, - "step": 320800 - }, - { - "epoch": 1.38, - "learning_rate": 0.009978112968141134, - "loss": 8.101, - "step": 321000 - }, - { - "epoch": 1.38, - "learning_rate": 0.009977691495043874, - "loss": 8.0878, - "step": 321200 - }, - { - "epoch": 1.38, - "learning_rate": 0.009977266011526598, - "loss": 8.0985, - "step": 321400 - }, - { - "epoch": 1.38, - "learning_rate": 0.00997683651793211, - "loss": 8.0875, - "step": 321600 - }, - { - "epoch": 1.38, - "learning_rate": 0.009976403014606445, - "loss": 8.093, - "step": 321800 - }, - { - "epoch": 1.38, - "learning_rate": 0.009975965501898871, - "loss": 8.0876, - "step": 322000 - }, - { - "epoch": 1.38, - "learning_rate": 0.00997552398016188, - "loss": 8.095, - "step": 322200 - }, - { - "epoch": 1.39, - "learning_rate": 0.009975078449751203, - "loss": 8.0878, - "step": 322400 - }, - { - "epoch": 1.39, - "learning_rate": 0.009974628911025792, - "loss": 8.0976, - "step": 322600 - }, - { - "epoch": 1.39, - "learning_rate": 0.009974175364347835, - "loss": 8.0771, - "step": 322800 - }, - { - "epoch": 1.39, - "learning_rate": 0.009973717810082744, - "loss": 8.0817, - "step": 323000 - }, - { - "epoch": 1.39, - "learning_rate": 0.009973256248599164, - "loss": 8.0844, - "step": 323200 - }, - { - "epoch": 1.39, - "learning_rate": 0.009972790680268967, - "loss": 8.0957, - "step": 323400 - }, - { - "epoch": 1.39, - "learning_rate": 0.009972321105467249, - "loss": 8.0952, - "step": 323600 - }, - { - "epoch": 1.39, - "learning_rate": 0.009971849902441344, - "loss": 8.0897, - "step": 323800 - }, - { - "epoch": 1.39, - "learning_rate": 0.009971372335862407, - "loss": 8.1089, - "step": 324000 - }, - { - "epoch": 1.39, - "learning_rate": 0.009970890763954684, - "loss": 8.0873, - "step": 324200 - }, - { - "epoch": 1.39, - "learning_rate": 0.009970405187106172, - "loss": 8.1065, - "step": 324400 - }, - { - "epoch": 1.39, - "learning_rate": 0.009969915605708092, - "loss": 8.0983, - "step": 324600 - }, - { - "epoch": 1.4, - "learning_rate": 0.00996942202015489, - "loss": 8.1041, - "step": 324800 - }, - { - "epoch": 1.4, - "learning_rate": 0.009968924430844236, - "loss": 8.0898, - "step": 325000 - }, - { - "epoch": 1.4, - "learning_rate": 0.009968422838177034, - "loss": 8.0878, - "step": 325200 - }, - { - "epoch": 1.4, - "learning_rate": 0.009967917242557404, - "loss": 8.0889, - "step": 325400 - }, - { - "epoch": 1.4, - "learning_rate": 0.009967407644392696, - "loss": 8.0861, - "step": 325600 - }, - { - "epoch": 1.4, - "learning_rate": 0.00996689662204961, - "loss": 8.0901, - "step": 325800 - }, - { - "epoch": 1.4, - "learning_rate": 0.009966379040037263, - "loss": 8.0967, - "step": 326000 - }, - { - "epoch": 1.4, - "learning_rate": 0.009965860074588287, - "loss": 8.0895, - "step": 326200 - }, - { - "epoch": 1.4, - "learning_rate": 0.009965334510387997, - "loss": 8.0918, - "step": 326400 - }, - { - "epoch": 1.4, - "learning_rate": 0.00996480494572349, - "loss": 8.0844, - "step": 326600 - }, - { - "epoch": 1.4, - "learning_rate": 0.009964271381021428, - "loss": 8.0876, - "step": 326800 - }, - { - "epoch": 1.41, - "learning_rate": 0.009963733816711694, - "loss": 8.0963, - "step": 327000 - }, - { - "epoch": 1.41, - "learning_rate": 0.009963192253227391, - "loss": 8.0937, - "step": 327200 - }, - { - "epoch": 1.41, - "learning_rate": 0.009962646691004851, - "loss": 8.0976, - "step": 327400 - }, - { - "epoch": 1.41, - "learning_rate": 0.009962097130483622, - "loss": 8.0971, - "step": 327600 - }, - { - "epoch": 1.41, - "learning_rate": 0.009961543572106473, - "loss": 8.0938, - "step": 327800 - }, - { - "epoch": 1.41, - "learning_rate": 0.0099609860163194, - "loss": 8.0926, - "step": 328000 - }, - { - "epoch": 1.41, - "learning_rate": 0.009960424463571612, - "loss": 8.095, - "step": 328200 - }, - { - "epoch": 1.41, - "learning_rate": 0.009959858914315544, - "loss": 8.0891, - "step": 328400 - }, - { - "epoch": 1.41, - "learning_rate": 0.009959289369006849, - "loss": 8.09, - "step": 328600 - }, - { - "epoch": 1.41, - "learning_rate": 0.009958715828104397, - "loss": 8.107, - "step": 328800 - }, - { - "epoch": 1.41, - "learning_rate": 0.009958138292070283, - "loss": 8.0798, - "step": 329000 - }, - { - "epoch": 1.41, - "learning_rate": 0.009957556761369814, - "loss": 8.0843, - "step": 329200 - }, - { - "epoch": 1.42, - "learning_rate": 0.00995697123647152, - "loss": 8.0952, - "step": 329400 - }, - { - "epoch": 1.42, - "learning_rate": 0.00995638171784715, - "loss": 8.0914, - "step": 329600 - }, - { - "epoch": 1.42, - "learning_rate": 0.009955788205971663, - "loss": 8.0849, - "step": 329800 - }, - { - "epoch": 1.42, - "learning_rate": 0.009955190701323247, - "loss": 8.0951, - "step": 330000 - }, - { - "epoch": 1.42, - "learning_rate": 0.009954592221798023, - "loss": 8.0825, - "step": 330200 - }, - { - "epoch": 1.42, - "learning_rate": 0.00995398675300898, - "loss": 8.0914, - "step": 330400 - }, - { - "epoch": 1.42, - "learning_rate": 0.009953377292898403, - "loss": 8.0895, - "step": 330600 - }, - { - "epoch": 1.42, - "learning_rate": 0.009952763841957323, - "loss": 8.0972, - "step": 330800 - }, - { - "epoch": 1.42, - "learning_rate": 0.009952146400679988, - "loss": 8.0956, - "step": 331000 - }, - { - "epoch": 1.42, - "learning_rate": 0.009951524969563856, - "loss": 8.0915, - "step": 331200 - }, - { - "epoch": 1.42, - "learning_rate": 0.009950899549109608, - "loss": 8.1049, - "step": 331400 - }, - { - "epoch": 1.42, - "learning_rate": 0.00995027013982113, - "loss": 8.0997, - "step": 331600 - }, - { - "epoch": 1.43, - "learning_rate": 0.009949636742205524, - "loss": 8.0824, - "step": 331800 - }, - { - "epoch": 1.43, - "learning_rate": 0.009948999356773113, - "loss": 8.0942, - "step": 332000 - }, - { - "epoch": 1.43, - "learning_rate": 0.009948361200818663, - "loss": 8.0954, - "step": 332200 - }, - { - "epoch": 1.43, - "learning_rate": 0.009947715861229079, - "loss": 8.0814, - "step": 332400 - }, - { - "epoch": 1.43, - "learning_rate": 0.009947066535370305, - "loss": 8.0816, - "step": 332600 - }, - { - "epoch": 1.43, - "learning_rate": 0.00994641322376549, - "loss": 8.0701, - "step": 332800 - }, - { - "epoch": 1.43, - "learning_rate": 0.009945755926941, - "loss": 8.074, - "step": 333000 - }, - { - "epoch": 1.43, - "learning_rate": 0.009945094645426403, - "loss": 8.0864, - "step": 333200 - }, - { - "epoch": 1.43, - "learning_rate": 0.009944429379754482, - "loss": 8.088, - "step": 333400 - }, - { - "epoch": 1.43, - "learning_rate": 0.009943760130461232, - "loss": 8.0898, - "step": 333600 - }, - { - "epoch": 1.43, - "learning_rate": 0.009943086898085854, - "loss": 8.0827, - "step": 333800 - }, - { - "epoch": 1.44, - "learning_rate": 0.009942409683170757, - "loss": 8.104, - "step": 334000 - }, - { - "epoch": 1.44, - "learning_rate": 0.009941728486261563, - "loss": 8.0832, - "step": 334200 - }, - { - "epoch": 1.44, - "learning_rate": 0.009941046743701807, - "loss": 8.0839, - "step": 334400 - }, - { - "epoch": 1.44, - "learning_rate": 0.009940357604357198, - "loss": 8.0845, - "step": 334600 - }, - { - "epoch": 1.44, - "learning_rate": 0.009939664484671814, - "loss": 8.09, - "step": 334800 - }, - { - "epoch": 1.44, - "learning_rate": 0.00993896738520409, - "loss": 8.0982, - "step": 335000 - }, - { - "epoch": 1.44, - "learning_rate": 0.009938266306515665, - "loss": 8.0898, - "step": 335200 - }, - { - "epoch": 1.44, - "learning_rate": 0.009937561249171388, - "loss": 8.0974, - "step": 335400 - }, - { - "epoch": 1.44, - "learning_rate": 0.009936852213739309, - "loss": 8.0903, - "step": 335600 - }, - { - "epoch": 1.44, - "learning_rate": 0.009936139200790683, - "loss": 8.0852, - "step": 335800 - }, - { - "epoch": 1.44, - "learning_rate": 0.009935422210899976, - "loss": 8.0816, - "step": 336000 - }, - { - "epoch": 1.44, - "learning_rate": 0.00993470124464485, - "loss": 8.0841, - "step": 336200 - }, - { - "epoch": 1.45, - "learning_rate": 0.009933976302606179, - "loss": 8.0839, - "step": 336400 - }, - { - "epoch": 1.45, - "learning_rate": 0.00993324738536803, - "loss": 8.0811, - "step": 336600 - }, - { - "epoch": 1.45, - "learning_rate": 0.009932514493517683, - "loss": 8.0682, - "step": 336800 - }, - { - "epoch": 1.45, - "learning_rate": 0.009931777627645615, - "loss": 8.0825, - "step": 337000 - }, - { - "epoch": 1.45, - "learning_rate": 0.009931036788345502, - "loss": 8.0837, - "step": 337200 - }, - { - "epoch": 1.45, - "learning_rate": 0.00993029197621423, - "loss": 8.0969, - "step": 337400 - }, - { - "epoch": 1.45, - "learning_rate": 0.009929543191851877, - "loss": 8.0846, - "step": 337600 - }, - { - "epoch": 1.45, - "learning_rate": 0.009928790435861727, - "loss": 8.0924, - "step": 337800 - }, - { - "epoch": 1.45, - "learning_rate": 0.009928037502362225, - "loss": 8.1005, - "step": 338000 - }, - { - "epoch": 1.45, - "learning_rate": 0.00992727682478966, - "loss": 8.0887, - "step": 338200 - }, - { - "epoch": 1.45, - "learning_rate": 0.00992651217741527, - "loss": 8.0885, - "step": 338400 - }, - { - "epoch": 1.46, - "learning_rate": 0.009925743560855113, - "loss": 8.0877, - "step": 338600 - }, - { - "epoch": 1.46, - "learning_rate": 0.009924970975728453, - "loss": 8.0836, - "step": 338800 - }, - { - "epoch": 1.46, - "learning_rate": 0.009924194422657748, - "loss": 8.092, - "step": 339000 - }, - { - "epoch": 1.46, - "learning_rate": 0.009923413902268651, - "loss": 8.0908, - "step": 339200 - }, - { - "epoch": 1.46, - "learning_rate": 0.009922629415190015, - "loss": 8.0852, - "step": 339400 - }, - { - "epoch": 1.46, - "learning_rate": 0.009921840962053886, - "loss": 8.0839, - "step": 339600 - }, - { - "epoch": 1.46, - "learning_rate": 0.009921048543495506, - "loss": 8.0914, - "step": 339800 - }, - { - "epoch": 1.46, - "learning_rate": 0.009920252160153313, - "loss": 8.075, - "step": 340000 - }, - { - "epoch": 1.46, - "learning_rate": 0.00991945181266894, - "loss": 8.0905, - "step": 340200 - }, - { - "epoch": 1.46, - "learning_rate": 0.00991864750168721, - "loss": 8.0911, - "step": 340400 - }, - { - "epoch": 1.46, - "learning_rate": 0.009917839227856143, - "loss": 8.0812, - "step": 340600 - }, - { - "epoch": 1.46, - "learning_rate": 0.009917031062861983, - "loss": 8.0929, - "step": 340800 - }, - { - "epoch": 1.47, - "learning_rate": 0.009916214885095155, - "loss": 8.0897, - "step": 341000 - }, - { - "epoch": 1.47, - "learning_rate": 0.009915394746438903, - "loss": 8.0862, - "step": 341200 - }, - { - "epoch": 1.47, - "learning_rate": 0.009914570647554003, - "loss": 8.0814, - "step": 341400 - }, - { - "epoch": 1.47, - "learning_rate": 0.009913742589104413, - "loss": 8.0861, - "step": 341600 - }, - { - "epoch": 1.47, - "learning_rate": 0.009912910571757286, - "loss": 8.0776, - "step": 341800 - }, - { - "epoch": 1.47, - "learning_rate": 0.009912074596182962, - "loss": 8.085, - "step": 342000 - }, - { - "epoch": 1.47, - "learning_rate": 0.009911234663054972, - "loss": 8.0756, - "step": 342200 - }, - { - "epoch": 1.47, - "learning_rate": 0.009910390773050035, - "loss": 8.0854, - "step": 342400 - }, - { - "epoch": 1.47, - "learning_rate": 0.009909542926848059, - "loss": 8.0887, - "step": 342600 - }, - { - "epoch": 1.47, - "learning_rate": 0.009908691125132136, - "loss": 8.0783, - "step": 342800 - }, - { - "epoch": 1.47, - "learning_rate": 0.009907835368588549, - "loss": 8.0819, - "step": 343000 - }, - { - "epoch": 1.47, - "learning_rate": 0.009906975657906767, - "loss": 8.0974, - "step": 343200 - }, - { - "epoch": 1.48, - "learning_rate": 0.00990611199377944, - "loss": 8.0691, - "step": 343400 - }, - { - "epoch": 1.48, - "learning_rate": 0.00990524437690241, - "loss": 8.0828, - "step": 343600 - }, - { - "epoch": 1.48, - "learning_rate": 0.009904372807974697, - "loss": 8.0893, - "step": 343800 - }, - { - "epoch": 1.48, - "learning_rate": 0.00990349728769851, - "loss": 8.084, - "step": 344000 - }, - { - "epoch": 1.48, - "learning_rate": 0.009902617816779239, - "loss": 8.086, - "step": 344200 - }, - { - "epoch": 1.48, - "learning_rate": 0.009901738822854014, - "loss": 8.0903, - "step": 344400 - }, - { - "epoch": 1.48, - "learning_rate": 0.00990085147252182, - "loss": 8.0967, - "step": 344600 - }, - { - "epoch": 1.48, - "learning_rate": 0.009899960173678227, - "loss": 8.0674, - "step": 344800 - }, - { - "epoch": 1.48, - "learning_rate": 0.009899064927041342, - "loss": 8.0838, - "step": 345000 - }, - { - "epoch": 1.48, - "learning_rate": 0.009898165733332444, - "loss": 8.0805, - "step": 345200 - }, - { - "epoch": 1.48, - "learning_rate": 0.009897262593276, - "loss": 8.0759, - "step": 345400 - }, - { - "epoch": 1.49, - "learning_rate": 0.009896355507599654, - "loss": 8.0858, - "step": 345600 - }, - { - "epoch": 1.49, - "learning_rate": 0.009895444477034228, - "loss": 8.0832, - "step": 345800 - }, - { - "epoch": 1.49, - "learning_rate": 0.009894529502313724, - "loss": 8.0974, - "step": 346000 - }, - { - "epoch": 1.49, - "learning_rate": 0.009893610584175318, - "loss": 8.0923, - "step": 346200 - }, - { - "epoch": 1.49, - "learning_rate": 0.009892687723359366, - "loss": 8.0984, - "step": 346400 - }, - { - "epoch": 1.49, - "learning_rate": 0.009891760920609404, - "loss": 8.0771, - "step": 346600 - }, - { - "epoch": 1.49, - "learning_rate": 0.009890830176672137, - "loss": 8.096, - "step": 346800 - }, - { - "epoch": 1.49, - "learning_rate": 0.009889895492297449, - "loss": 8.0893, - "step": 347000 - }, - { - "epoch": 1.49, - "learning_rate": 0.009888956868238399, - "loss": 8.092, - "step": 347200 - }, - { - "epoch": 1.49, - "learning_rate": 0.009888014305251217, - "loss": 8.0782, - "step": 347400 - }, - { - "epoch": 1.49, - "learning_rate": 0.009887067804095312, - "loss": 8.0812, - "step": 347600 - }, - { - "epoch": 1.49, - "learning_rate": 0.009886117365533263, - "loss": 8.0825, - "step": 347800 - }, - { - "epoch": 1.5, - "learning_rate": 0.00988516299033082, - "loss": 8.083, - "step": 348000 - }, - { - "epoch": 1.5, - "learning_rate": 0.00988420948060148, - "loss": 8.1012, - "step": 348200 - }, - { - "epoch": 1.5, - "learning_rate": 0.00988324725410176, - "loss": 8.0922, - "step": 348400 - }, - { - "epoch": 1.5, - "learning_rate": 0.009882281093274041, - "loss": 8.0844, - "step": 348600 - }, - { - "epoch": 1.5, - "learning_rate": 0.009881310998896746, - "loss": 8.0971, - "step": 348800 - }, - { - "epoch": 1.5, - "learning_rate": 0.009880336971751459, - "loss": 8.0875, - "step": 349000 - }, - { - "epoch": 1.5, - "learning_rate": 0.009879359012622936, - "loss": 8.0866, - "step": 349200 - }, - { - "epoch": 1.5, - "learning_rate": 0.009878377122299102, - "loss": 8.0735, - "step": 349400 - }, - { - "epoch": 1.5, - "learning_rate": 0.00987739130157105, - "loss": 8.1036, - "step": 349600 - }, - { - "epoch": 1.5, - "learning_rate": 0.009876401551233037, - "loss": 8.0714, - "step": 349800 - }, - { - "epoch": 1.5, - "learning_rate": 0.009875407872082484, - "loss": 8.0715, - "step": 350000 - }, - { - "epoch": 1.5, - "learning_rate": 0.009874410264919982, - "loss": 8.087, - "step": 350200 - }, - { - "epoch": 1.51, - "learning_rate": 0.009873413747988738, - "loss": 8.0952, - "step": 350400 - }, - { - "epoch": 1.51, - "learning_rate": 0.009872408306846759, - "loss": 8.0772, - "step": 350600 - }, - { - "epoch": 1.51, - "learning_rate": 0.009871398940109528, - "loss": 8.0841, - "step": 350800 - }, - { - "epoch": 1.51, - "learning_rate": 0.009870385648590274, - "loss": 8.0837, - "step": 351000 - }, - { - "epoch": 1.51, - "learning_rate": 0.009869368433105385, - "loss": 8.0923, - "step": 351200 - }, - { - "epoch": 1.51, - "learning_rate": 0.009868347294474415, - "loss": 8.0955, - "step": 351400 - }, - { - "epoch": 1.51, - "learning_rate": 0.009867322233520076, - "loss": 8.0797, - "step": 351600 - }, - { - "epoch": 1.51, - "learning_rate": 0.009866293251068243, - "loss": 8.0893, - "step": 351800 - }, - { - "epoch": 1.51, - "learning_rate": 0.009865260347947944, - "loss": 8.0866, - "step": 352000 - }, - { - "epoch": 1.51, - "learning_rate": 0.009864223524991375, - "loss": 8.1012, - "step": 352200 - }, - { - "epoch": 1.51, - "learning_rate": 0.009863182783033884, - "loss": 8.0747, - "step": 352400 - }, - { - "epoch": 1.52, - "learning_rate": 0.009862138122913979, - "loss": 8.0762, - "step": 352600 - }, - { - "epoch": 1.52, - "learning_rate": 0.009861089545473321, - "loss": 8.0828, - "step": 352800 - }, - { - "epoch": 1.52, - "learning_rate": 0.009860042323767148, - "loss": 8.0836, - "step": 353000 - }, - { - "epoch": 1.52, - "learning_rate": 0.00985898593379863, - "loss": 8.0835, - "step": 353200 - }, - { - "epoch": 1.52, - "learning_rate": 0.00985792562904902, - "loss": 8.086, - "step": 353400 - }, - { - "epoch": 1.52, - "learning_rate": 0.00985686141037259, - "loss": 8.0757, - "step": 353600 - }, - { - "epoch": 1.52, - "learning_rate": 0.009855793278626762, - "loss": 8.0811, - "step": 353800 - }, - { - "epoch": 1.52, - "learning_rate": 0.009854721234672108, - "loss": 8.0791, - "step": 354000 - }, - { - "epoch": 1.52, - "learning_rate": 0.009853645279372356, - "loss": 8.0775, - "step": 354200 - }, - { - "epoch": 1.52, - "learning_rate": 0.009852565413594383, - "loss": 8.077, - "step": 354400 - }, - { - "epoch": 1.52, - "learning_rate": 0.00985148163820822, - "loss": 8.0828, - "step": 354600 - }, - { - "epoch": 1.52, - "learning_rate": 0.00985039395408704, - "loss": 8.073, - "step": 354800 - }, - { - "epoch": 1.53, - "learning_rate": 0.009849302362107173, - "loss": 8.0769, - "step": 355000 - }, - { - "epoch": 1.53, - "learning_rate": 0.009848206863148098, - "loss": 8.0768, - "step": 355200 - }, - { - "epoch": 1.53, - "learning_rate": 0.009847107458092435, - "loss": 8.0851, - "step": 355400 - }, - { - "epoch": 1.53, - "learning_rate": 0.009846004147825957, - "loss": 8.0638, - "step": 355600 - }, - { - "epoch": 1.53, - "learning_rate": 0.009844896933237578, - "loss": 8.0695, - "step": 355800 - }, - { - "epoch": 1.53, - "learning_rate": 0.009843785815219363, - "loss": 8.0958, - "step": 356000 - }, - { - "epoch": 1.53, - "learning_rate": 0.009842670794666521, - "loss": 8.092, - "step": 356200 - }, - { - "epoch": 1.53, - "learning_rate": 0.009841557476792178, - "loss": 8.0753, - "step": 356400 - }, - { - "epoch": 1.53, - "learning_rate": 0.009840434673369704, - "loss": 8.0763, - "step": 356600 - }, - { - "epoch": 1.53, - "learning_rate": 0.009839307970112557, - "loss": 8.0814, - "step": 356800 - }, - { - "epoch": 1.53, - "learning_rate": 0.009838177367928498, - "loss": 8.0819, - "step": 357000 - }, - { - "epoch": 1.53, - "learning_rate": 0.009837042867728438, - "loss": 8.0816, - "step": 357200 - }, - { - "epoch": 1.54, - "learning_rate": 0.00983590447042642, - "loss": 8.0816, - "step": 357400 - }, - { - "epoch": 1.54, - "learning_rate": 0.00983476217693963, - "loss": 8.0857, - "step": 357600 - }, - { - "epoch": 1.54, - "learning_rate": 0.009833615988188394, - "loss": 8.094, - "step": 357800 - }, - { - "epoch": 1.54, - "learning_rate": 0.009832465905096177, - "loss": 8.0852, - "step": 358000 - }, - { - "epoch": 1.54, - "learning_rate": 0.009831311928589576, - "loss": 8.0922, - "step": 358200 - }, - { - "epoch": 1.54, - "learning_rate": 0.009830154059598334, - "loss": 8.0923, - "step": 358400 - }, - { - "epoch": 1.54, - "learning_rate": 0.009828992299055322, - "loss": 8.0725, - "step": 358600 - }, - { - "epoch": 1.54, - "learning_rate": 0.00982782664789655, - "loss": 8.0766, - "step": 358800 - }, - { - "epoch": 1.54, - "learning_rate": 0.009826662964439353, - "loss": 8.0756, - "step": 359000 - }, - { - "epoch": 1.54, - "learning_rate": 0.009825489554310951, - "loss": 8.091, - "step": 359200 - }, - { - "epoch": 1.54, - "learning_rate": 0.00982431225638889, - "loss": 8.0829, - "step": 359400 - }, - { - "epoch": 1.55, - "learning_rate": 0.00982313107162169, - "loss": 8.0782, - "step": 359600 - }, - { - "epoch": 1.55, - "learning_rate": 0.00982194600096102, - "loss": 8.0819, - "step": 359800 - }, - { - "epoch": 1.55, - "learning_rate": 0.009820757045361662, - "loss": 8.087, - "step": 360000 - }, - { - "epoch": 1.55, - "learning_rate": 0.009819564205781542, - "loss": 8.1015, - "step": 360200 - }, - { - "epoch": 1.55, - "learning_rate": 0.009818367483181706, - "loss": 8.0882, - "step": 360400 - }, - { - "epoch": 1.55, - "learning_rate": 0.009817166878526332, - "loss": 8.061, - "step": 360600 - }, - { - "epoch": 1.55, - "learning_rate": 0.009815962392782726, - "loss": 8.0644, - "step": 360800 - }, - { - "epoch": 1.55, - "learning_rate": 0.009814754026921321, - "loss": 8.0788, - "step": 361000 - }, - { - "epoch": 1.55, - "learning_rate": 0.009813541781915677, - "loss": 8.0696, - "step": 361200 - }, - { - "epoch": 1.55, - "learning_rate": 0.009812325658742472, - "loss": 8.0897, - "step": 361400 - }, - { - "epoch": 1.55, - "learning_rate": 0.009811105658381522, - "loss": 8.073, - "step": 361600 - }, - { - "epoch": 1.55, - "learning_rate": 0.009809881781815754, - "loss": 8.0717, - "step": 361800 - }, - { - "epoch": 1.56, - "learning_rate": 0.009808654030031224, - "loss": 8.0708, - "step": 362000 - }, - { - "epoch": 1.56, - "learning_rate": 0.009807422404017113, - "loss": 8.0896, - "step": 362200 - }, - { - "epoch": 1.56, - "learning_rate": 0.009806186904765715, - "loss": 8.0632, - "step": 362400 - }, - { - "epoch": 1.56, - "learning_rate": 0.009804947533272452, - "loss": 8.0801, - "step": 362600 - }, - { - "epoch": 1.56, - "learning_rate": 0.009803704290535863, - "loss": 8.0869, - "step": 362800 - }, - { - "epoch": 1.56, - "learning_rate": 0.009802463422748065, - "loss": 8.0947, - "step": 363000 - }, - { - "epoch": 1.56, - "learning_rate": 0.009801218724314024, - "loss": 8.085, - "step": 363200 - }, - { - "epoch": 1.56, - "learning_rate": 0.00979996391254717, - "loss": 8.0846, - "step": 363400 - }, - { - "epoch": 1.56, - "learning_rate": 0.009798705233552201, - "loss": 8.0767, - "step": 363600 - }, - { - "epoch": 1.56, - "learning_rate": 0.009797442688343216, - "loss": 8.065, - "step": 363800 - }, - { - "epoch": 1.56, - "learning_rate": 0.009796176277937422, - "loss": 8.0647, - "step": 364000 - }, - { - "epoch": 1.57, - "learning_rate": 0.009794906003355142, - "loss": 8.0868, - "step": 364200 - }, - { - "epoch": 1.57, - "learning_rate": 0.009793631865619818, - "loss": 8.0789, - "step": 364400 - }, - { - "epoch": 1.57, - "learning_rate": 0.009792353865757993, - "loss": 8.0747, - "step": 364600 - }, - { - "epoch": 1.57, - "learning_rate": 0.009791072004799336, - "loss": 8.0772, - "step": 364800 - }, - { - "epoch": 1.57, - "learning_rate": 0.009789786283776613, - "loss": 8.0783, - "step": 365000 - }, - { - "epoch": 1.57, - "learning_rate": 0.00978849670372571, - "loss": 8.0915, - "step": 365200 - }, - { - "epoch": 1.57, - "learning_rate": 0.009787203265685617, - "loss": 8.0795, - "step": 365400 - }, - { - "epoch": 1.57, - "learning_rate": 0.009785912466765799, - "loss": 8.0803, - "step": 365600 - }, - { - "epoch": 1.57, - "learning_rate": 0.009784611335153636, - "loss": 8.078, - "step": 365800 - }, - { - "epoch": 1.57, - "learning_rate": 0.009783306348682656, - "loss": 8.0823, - "step": 366000 - }, - { - "epoch": 1.57, - "learning_rate": 0.00978199750840426, - "loss": 8.0735, - "step": 366200 - }, - { - "epoch": 1.57, - "learning_rate": 0.00978068481537296, - "loss": 8.0883, - "step": 366400 - }, - { - "epoch": 1.58, - "learning_rate": 0.009779368270646367, - "loss": 8.0765, - "step": 366600 - }, - { - "epoch": 1.58, - "learning_rate": 0.009778047875285197, - "loss": 8.0648, - "step": 366800 - }, - { - "epoch": 1.58, - "learning_rate": 0.009776723630353267, - "loss": 8.0831, - "step": 367000 - }, - { - "epoch": 1.58, - "learning_rate": 0.009775395536917504, - "loss": 8.0691, - "step": 367200 - }, - { - "epoch": 1.58, - "learning_rate": 0.009774063596047922, - "loss": 8.0736, - "step": 367400 - }, - { - "epoch": 1.58, - "learning_rate": 0.009772727808817645, - "loss": 8.0737, - "step": 367600 - }, - { - "epoch": 1.58, - "learning_rate": 0.009771388176302893, - "loss": 8.0864, - "step": 367800 - }, - { - "epoch": 1.58, - "learning_rate": 0.00977004469958298, - "loss": 8.0775, - "step": 368000 - }, - { - "epoch": 1.58, - "learning_rate": 0.009768697379740326, - "loss": 8.0772, - "step": 368200 - }, - { - "epoch": 1.58, - "learning_rate": 0.00976734621786044, - "loss": 8.0798, - "step": 368400 - }, - { - "epoch": 1.58, - "learning_rate": 0.009765991215031926, - "loss": 8.075, - "step": 368600 - }, - { - "epoch": 1.58, - "learning_rate": 0.00976463237234649, - "loss": 8.0738, - "step": 368800 - }, - { - "epoch": 1.59, - "learning_rate": 0.009763269690898925, - "loss": 8.0761, - "step": 369000 - }, - { - "epoch": 1.59, - "learning_rate": 0.009761903171787116, - "loss": 8.0791, - "step": 369200 - }, - { - "epoch": 1.59, - "learning_rate": 0.009760539677432048, - "loss": 8.0838, - "step": 369400 - }, - { - "epoch": 1.59, - "learning_rate": 0.009759165505472333, - "loss": 8.0653, - "step": 369600 - }, - { - "epoch": 1.59, - "learning_rate": 0.009757787499155046, - "loss": 8.0759, - "step": 369800 - }, - { - "epoch": 1.59, - "learning_rate": 0.009756405659590418, - "loss": 8.0687, - "step": 370000 - }, - { - "epoch": 1.59, - "learning_rate": 0.009755019987891776, - "loss": 8.0934, - "step": 370200 - }, - { - "epoch": 1.59, - "learning_rate": 0.00975363048517553, - "loss": 8.0745, - "step": 370400 - }, - { - "epoch": 1.59, - "learning_rate": 0.009752237152561174, - "loss": 8.0812, - "step": 370600 - }, - { - "epoch": 1.59, - "learning_rate": 0.009750839991171295, - "loss": 8.0748, - "step": 370800 - }, - { - "epoch": 1.59, - "learning_rate": 0.009749439002131561, - "loss": 8.0891, - "step": 371000 - }, - { - "epoch": 1.6, - "learning_rate": 0.00974803418657072, - "loss": 8.0898, - "step": 371200 - }, - { - "epoch": 1.6, - "learning_rate": 0.00974662554562061, - "loss": 8.0657, - "step": 371400 - }, - { - "epoch": 1.6, - "learning_rate": 0.009745213080416143, - "loss": 8.0746, - "step": 371600 - }, - { - "epoch": 1.6, - "learning_rate": 0.00974379679209532, - "loss": 8.0735, - "step": 371800 - }, - { - "epoch": 1.6, - "learning_rate": 0.00974237668179922, - "loss": 8.0785, - "step": 372000 - }, - { - "epoch": 1.6, - "learning_rate": 0.009740952750671997, - "loss": 8.0767, - "step": 372200 - }, - { - "epoch": 1.6, - "learning_rate": 0.009739532148114509, - "loss": 8.0943, - "step": 372400 - }, - { - "epoch": 1.6, - "learning_rate": 0.00973810059785963, - "loss": 8.0791, - "step": 372600 - }, - { - "epoch": 1.6, - "learning_rate": 0.009736665230218794, - "loss": 8.0772, - "step": 372800 - }, - { - "epoch": 1.6, - "learning_rate": 0.00973522604634845, - "loss": 8.0755, - "step": 373000 - }, - { - "epoch": 1.6, - "learning_rate": 0.009733783047408122, - "loss": 8.0696, - "step": 373200 - }, - { - "epoch": 1.6, - "learning_rate": 0.009732336234560409, - "loss": 8.0696, - "step": 373400 - }, - { - "epoch": 1.61, - "learning_rate": 0.009730885608970983, - "loss": 8.0741, - "step": 373600 - }, - { - "epoch": 1.61, - "learning_rate": 0.009729431171808583, - "loss": 8.0732, - "step": 373800 - }, - { - "epoch": 1.61, - "learning_rate": 0.009727972924245026, - "loss": 8.0652, - "step": 374000 - }, - { - "epoch": 1.61, - "learning_rate": 0.009726510867455195, - "loss": 8.072, - "step": 374200 - }, - { - "epoch": 1.61, - "learning_rate": 0.009725052341411803, - "loss": 8.0702, - "step": 374400 - }, - { - "epoch": 1.61, - "learning_rate": 0.009723582688737744, - "loss": 8.0745, - "step": 374600 - }, - { - "epoch": 1.61, - "learning_rate": 0.009722109230374545, - "loss": 8.0754, - "step": 374800 - }, - { - "epoch": 1.61, - "learning_rate": 0.009720631967509344, - "loss": 8.0892, - "step": 375000 - }, - { - "epoch": 1.61, - "learning_rate": 0.009719150901332347, - "loss": 8.084, - "step": 375200 - }, - { - "epoch": 1.61, - "learning_rate": 0.009717666033036818, - "loss": 8.068, - "step": 375400 - }, - { - "epoch": 1.61, - "learning_rate": 0.009716177363819092, - "loss": 8.0921, - "step": 375600 - }, - { - "epoch": 1.61, - "learning_rate": 0.009714684894878563, - "loss": 8.063, - "step": 375800 - }, - { - "epoch": 1.62, - "learning_rate": 0.009713188627417683, - "loss": 8.0848, - "step": 376000 - }, - { - "epoch": 1.62, - "learning_rate": 0.009711688562641973, - "loss": 8.0661, - "step": 376200 - }, - { - "epoch": 1.62, - "learning_rate": 0.009710184701760003, - "loss": 8.0686, - "step": 376400 - }, - { - "epoch": 1.62, - "learning_rate": 0.009708684593700084, - "loss": 8.0732, - "step": 376600 - }, - { - "epoch": 1.62, - "learning_rate": 0.00970717316320893, - "loss": 8.0787, - "step": 376800 - }, - { - "epoch": 1.62, - "learning_rate": 0.009705657940249492, - "loss": 8.0698, - "step": 377000 - }, - { - "epoch": 1.62, - "learning_rate": 0.009704138926042561, - "loss": 8.067, - "step": 377200 - }, - { - "epoch": 1.62, - "learning_rate": 0.009702616121811978, - "loss": 8.0735, - "step": 377400 - }, - { - "epoch": 1.62, - "learning_rate": 0.009701089528784639, - "loss": 8.0744, - "step": 377600 - }, - { - "epoch": 1.62, - "learning_rate": 0.009699559148190491, - "loss": 8.0665, - "step": 377800 - }, - { - "epoch": 1.62, - "learning_rate": 0.009698024981262536, - "loss": 8.0717, - "step": 378000 - }, - { - "epoch": 1.63, - "learning_rate": 0.009696487029236825, - "loss": 8.0809, - "step": 378200 - }, - { - "epoch": 1.63, - "learning_rate": 0.009694945293352455, - "loss": 8.0874, - "step": 378400 - }, - { - "epoch": 1.63, - "learning_rate": 0.009693399774851577, - "loss": 8.0598, - "step": 378600 - }, - { - "epoch": 1.63, - "learning_rate": 0.009691850474979386, - "loss": 8.0725, - "step": 378800 - }, - { - "epoch": 1.63, - "learning_rate": 0.009690297394984127, - "loss": 8.0874, - "step": 379000 - }, - { - "epoch": 1.63, - "learning_rate": 0.00968874053611709, - "loss": 8.0871, - "step": 379200 - }, - { - "epoch": 1.63, - "learning_rate": 0.009687179899632602, - "loss": 8.0759, - "step": 379400 - }, - { - "epoch": 1.63, - "learning_rate": 0.009685615486788044, - "loss": 8.0568, - "step": 379600 - }, - { - "epoch": 1.63, - "learning_rate": 0.009684047298843836, - "loss": 8.076, - "step": 379800 - }, - { - "epoch": 1.63, - "learning_rate": 0.009682475337063436, - "loss": 8.08, - "step": 380000 - }, - { - "epoch": 1.63, - "learning_rate": 0.009680899602713344, - "loss": 8.0757, - "step": 380200 - }, - { - "epoch": 1.63, - "learning_rate": 0.009679320097063106, - "loss": 8.0779, - "step": 380400 - }, - { - "epoch": 1.64, - "learning_rate": 0.00967774474713952, - "loss": 8.0765, - "step": 380600 - }, - { - "epoch": 1.64, - "learning_rate": 0.00967615772155034, - "loss": 8.0866, - "step": 380800 - }, - { - "epoch": 1.64, - "learning_rate": 0.009674566928481457, - "loss": 8.0756, - "step": 381000 - }, - { - "epoch": 1.64, - "learning_rate": 0.009672972369214548, - "loss": 8.0703, - "step": 381200 - }, - { - "epoch": 1.64, - "learning_rate": 0.009671374045034318, - "loss": 8.0833, - "step": 381400 - }, - { - "epoch": 1.64, - "learning_rate": 0.009669771957228509, - "loss": 8.085, - "step": 381600 - }, - { - "epoch": 1.64, - "learning_rate": 0.009668166107087895, - "loss": 8.0639, - "step": 381800 - }, - { - "epoch": 1.64, - "learning_rate": 0.009666556495906276, - "loss": 8.0714, - "step": 382000 - }, - { - "epoch": 1.64, - "learning_rate": 0.009664943124980493, - "loss": 8.0766, - "step": 382200 - }, - { - "epoch": 1.64, - "learning_rate": 0.009663325995610405, - "loss": 8.0745, - "step": 382400 - }, - { - "epoch": 1.64, - "learning_rate": 0.009661713222875195, - "loss": 8.0584, - "step": 382600 - }, - { - "epoch": 1.64, - "learning_rate": 0.009660088599304128, - "loss": 8.059, - "step": 382800 - }, - { - "epoch": 1.65, - "learning_rate": 0.009658460221199962, - "loss": 8.061, - "step": 383000 - }, - { - "epoch": 1.65, - "learning_rate": 0.00965682808987465, - "loss": 8.072, - "step": 383200 - }, - { - "epoch": 1.65, - "learning_rate": 0.009655192206643175, - "loss": 8.0822, - "step": 383400 - }, - { - "epoch": 1.65, - "learning_rate": 0.009653552572823534, - "loss": 8.0664, - "step": 383600 - }, - { - "epoch": 1.65, - "learning_rate": 0.00965190918973675, - "loss": 8.0711, - "step": 383800 - }, - { - "epoch": 1.65, - "learning_rate": 0.00965026205870687, - "loss": 8.0875, - "step": 384000 - }, - { - "epoch": 1.65, - "learning_rate": 0.00964861118106096, - "loss": 8.0885, - "step": 384200 - }, - { - "epoch": 1.65, - "learning_rate": 0.009646956558129097, - "loss": 8.0865, - "step": 384400 - }, - { - "epoch": 1.65, - "learning_rate": 0.009645298191244382, - "loss": 8.075, - "step": 384600 - }, - { - "epoch": 1.65, - "learning_rate": 0.009643644401597984, - "loss": 8.0795, - "step": 384800 - }, - { - "epoch": 1.65, - "learning_rate": 0.009641978569521984, - "loss": 8.0749, - "step": 385000 - }, - { - "epoch": 1.66, - "learning_rate": 0.009640308997503807, - "loss": 8.073, - "step": 385200 - }, - { - "epoch": 1.66, - "learning_rate": 0.0096386356868886, - "loss": 8.0784, - "step": 385400 - }, - { - "epoch": 1.66, - "learning_rate": 0.009636958639024517, - "loss": 8.0846, - "step": 385600 - }, - { - "epoch": 1.66, - "learning_rate": 0.009635277855262725, - "loss": 8.0809, - "step": 385800 - }, - { - "epoch": 1.66, - "learning_rate": 0.009633593336957402, - "loss": 8.0836, - "step": 386000 - }, - { - "epoch": 1.66, - "learning_rate": 0.009631905085465733, - "loss": 8.0736, - "step": 386200 - }, - { - "epoch": 1.66, - "learning_rate": 0.009630213102147914, - "loss": 8.0727, - "step": 386400 - }, - { - "epoch": 1.66, - "learning_rate": 0.009628517388367142, - "loss": 8.0831, - "step": 386600 - }, - { - "epoch": 1.66, - "learning_rate": 0.009626817945489627, - "loss": 8.0727, - "step": 386800 - }, - { - "epoch": 1.66, - "learning_rate": 0.009625114774884577, - "loss": 8.0769, - "step": 387000 - }, - { - "epoch": 1.66, - "learning_rate": 0.009623407877924206, - "loss": 8.072, - "step": 387200 - }, - { - "epoch": 1.66, - "learning_rate": 0.009621705818357041, - "loss": 8.072, - "step": 387400 - }, - { - "epoch": 1.67, - "learning_rate": 0.009619991491429253, - "loss": 8.0711, - "step": 387600 - }, - { - "epoch": 1.67, - "learning_rate": 0.00961827344227388, - "loss": 8.0685, - "step": 387800 - }, - { - "epoch": 1.67, - "learning_rate": 0.009616551672275127, - "loss": 8.0567, - "step": 388000 - }, - { - "epoch": 1.67, - "learning_rate": 0.00961482618282019, - "loss": 8.0581, - "step": 388200 - }, - { - "epoch": 1.67, - "learning_rate": 0.009613096975299263, - "loss": 8.0792, - "step": 388400 - }, - { - "epoch": 1.67, - "learning_rate": 0.009611364051105539, - "loss": 8.0678, - "step": 388600 - }, - { - "epoch": 1.67, - "learning_rate": 0.009609627411635206, - "loss": 8.0801, - "step": 388800 - }, - { - "epoch": 1.67, - "learning_rate": 0.009607887058287438, - "loss": 8.0731, - "step": 389000 - }, - { - "epoch": 1.67, - "learning_rate": 0.009606142992464413, - "loss": 8.0718, - "step": 389200 - }, - { - "epoch": 1.67, - "learning_rate": 0.009604403963684711, - "loss": 8.0696, - "step": 389400 - }, - { - "epoch": 1.67, - "learning_rate": 0.009602652495674442, - "loss": 8.0689, - "step": 389600 - }, - { - "epoch": 1.68, - "learning_rate": 0.009600897319406304, - "loss": 8.0755, - "step": 389800 - }, - { - "epoch": 1.68, - "learning_rate": 0.00959913843629441, - "loss": 8.0722, - "step": 390000 - }, - { - "epoch": 1.68, - "learning_rate": 0.009597375847755866, - "loss": 8.0656, - "step": 390200 - }, - { - "epoch": 1.68, - "learning_rate": 0.00959560955521075, - "loss": 8.0653, - "step": 390400 - }, - { - "epoch": 1.68, - "learning_rate": 0.009593839560082137, - "loss": 8.0651, - "step": 390600 - }, - { - "epoch": 1.68, - "learning_rate": 0.009592065863796078, - "loss": 8.0715, - "step": 390800 - }, - { - "epoch": 1.68, - "learning_rate": 0.00959028846778161, - "loss": 8.0753, - "step": 391000 - }, - { - "epoch": 1.68, - "learning_rate": 0.009588507373470747, - "loss": 8.067, - "step": 391200 - }, - { - "epoch": 1.68, - "learning_rate": 0.009586731515447913, - "loss": 8.067, - "step": 391400 - }, - { - "epoch": 1.68, - "learning_rate": 0.009584943047325762, - "loss": 8.0735, - "step": 391600 - }, - { - "epoch": 1.68, - "learning_rate": 0.009583150885213925, - "loss": 8.0731, - "step": 391800 - }, - { - "epoch": 1.68, - "learning_rate": 0.009581355030556316, - "loss": 8.0585, - "step": 392000 - }, - { - "epoch": 1.69, - "learning_rate": 0.009579555484799822, - "loss": 8.0655, - "step": 392200 - }, - { - "epoch": 1.69, - "learning_rate": 0.009577752249394302, - "loss": 8.0852, - "step": 392400 - }, - { - "epoch": 1.69, - "learning_rate": 0.009575945325792593, - "loss": 8.0726, - "step": 392600 - }, - { - "epoch": 1.69, - "learning_rate": 0.0095741347154505, - "loss": 8.0765, - "step": 392800 - }, - { - "epoch": 1.69, - "learning_rate": 0.0095723204198268, - "loss": 8.0895, - "step": 393000 - }, - { - "epoch": 1.69, - "learning_rate": 0.009570502440383238, - "loss": 8.0821, - "step": 393200 - }, - { - "epoch": 1.69, - "learning_rate": 0.009568680778584528, - "loss": 8.0698, - "step": 393400 - }, - { - "epoch": 1.69, - "learning_rate": 0.009566855435898346, - "loss": 8.0677, - "step": 393600 - }, - { - "epoch": 1.69, - "learning_rate": 0.00956502641379534, - "loss": 8.0795, - "step": 393800 - }, - { - "epoch": 1.69, - "learning_rate": 0.009563193713749123, - "loss": 8.0762, - "step": 394000 - }, - { - "epoch": 1.69, - "learning_rate": 0.009561366528261592, - "loss": 8.0688, - "step": 394200 - }, - { - "epoch": 1.69, - "learning_rate": 0.009559526495132878, - "loss": 8.0634, - "step": 394400 - }, - { - "epoch": 1.7, - "learning_rate": 0.009557682788492136, - "loss": 8.0791, - "step": 394600 - }, - { - "epoch": 1.7, - "learning_rate": 0.009555835409824806, - "loss": 8.0791, - "step": 394800 - }, - { - "epoch": 1.7, - "learning_rate": 0.00955398436061929, - "loss": 8.0698, - "step": 395000 - }, - { - "epoch": 1.7, - "learning_rate": 0.009552129642366942, - "loss": 8.0618, - "step": 395200 - }, - { - "epoch": 1.7, - "learning_rate": 0.009550271256562074, - "loss": 8.0751, - "step": 395400 - }, - { - "epoch": 1.7, - "learning_rate": 0.009548409204701958, - "loss": 8.0636, - "step": 395600 - }, - { - "epoch": 1.7, - "learning_rate": 0.009546543488286813, - "loss": 8.0713, - "step": 395800 - }, - { - "epoch": 1.7, - "learning_rate": 0.009544674108819814, - "loss": 8.0672, - "step": 396000 - }, - { - "epoch": 1.7, - "learning_rate": 0.009542801067807086, - "loss": 8.0699, - "step": 396200 - }, - { - "epoch": 1.7, - "learning_rate": 0.009540924366757705, - "loss": 8.0642, - "step": 396400 - }, - { - "epoch": 1.7, - "learning_rate": 0.009539053418079637, - "loss": 8.0812, - "step": 396600 - }, - { - "epoch": 1.71, - "learning_rate": 0.009537169419777242, - "loss": 8.0756, - "step": 396800 - }, - { - "epoch": 1.71, - "learning_rate": 0.00953528176597551, - "loss": 8.0737, - "step": 397000 - }, - { - "epoch": 1.71, - "learning_rate": 0.00953339045819529, - "loss": 8.0737, - "step": 397200 - }, - { - "epoch": 1.71, - "learning_rate": 0.009531495497960375, - "loss": 8.07, - "step": 397400 - }, - { - "epoch": 1.71, - "learning_rate": 0.0095295968867975, - "loss": 8.0719, - "step": 397600 - }, - { - "epoch": 1.71, - "learning_rate": 0.009527694626236339, - "loss": 8.065, - "step": 397800 - }, - { - "epoch": 1.71, - "learning_rate": 0.009525788717809514, - "loss": 8.0647, - "step": 398000 - }, - { - "epoch": 1.71, - "learning_rate": 0.009523879163052576, - "loss": 8.0758, - "step": 398200 - }, - { - "epoch": 1.71, - "learning_rate": 0.009521965963504022, - "loss": 8.0686, - "step": 398400 - }, - { - "epoch": 1.71, - "learning_rate": 0.00952004912070528, - "loss": 8.0701, - "step": 398600 - }, - { - "epoch": 1.71, - "learning_rate": 0.00951812863620072, - "loss": 8.0538, - "step": 398800 - }, - { - "epoch": 1.71, - "learning_rate": 0.009516214141213283, - "loss": 8.0736, - "step": 399000 - }, - { - "epoch": 1.72, - "learning_rate": 0.009514286396131092, - "loss": 8.0691, - "step": 399200 - }, - { - "epoch": 1.72, - "learning_rate": 0.009512355013986002, - "loss": 8.0671, - "step": 399400 - }, - { - "epoch": 1.72, - "learning_rate": 0.009510419996334094, - "loss": 8.0678, - "step": 399600 - }, - { - "epoch": 1.72, - "learning_rate": 0.009508481344734376, - "loss": 8.0627, - "step": 399800 - }, - { - "epoch": 1.72, - "learning_rate": 0.009506539060748786, - "loss": 8.0724, - "step": 400000 - }, - { - "epoch": 1.72, - "learning_rate": 0.009504593145942186, - "loss": 8.0741, - "step": 400200 - }, - { - "epoch": 1.72, - "learning_rate": 0.009502643601882365, - "loss": 8.0706, - "step": 400400 - }, - { - "epoch": 1.72, - "learning_rate": 0.009500690430140036, - "loss": 8.0797, - "step": 400600 - }, - { - "epoch": 1.72, - "learning_rate": 0.009498733632288831, - "loss": 8.0741, - "step": 400800 - }, - { - "epoch": 1.72, - "learning_rate": 0.00949678302103064, - "loss": 8.0668, - "step": 401000 - }, - { - "epoch": 1.72, - "learning_rate": 0.00949481899380511, - "loss": 8.0695, - "step": 401200 - }, - { - "epoch": 1.72, - "learning_rate": 0.009492851345201215, - "loss": 8.08, - "step": 401400 - }, - { - "epoch": 1.73, - "learning_rate": 0.009490880076804258, - "loss": 8.0839, - "step": 401600 - }, - { - "epoch": 1.73, - "learning_rate": 0.00948890519020245, - "loss": 8.0675, - "step": 401800 - }, - { - "epoch": 1.73, - "learning_rate": 0.009486926686986924, - "loss": 8.0652, - "step": 402000 - }, - { - "epoch": 1.73, - "learning_rate": 0.009484944568751723, - "loss": 8.0629, - "step": 402200 - }, - { - "epoch": 1.73, - "learning_rate": 0.009482958837093805, - "loss": 8.0866, - "step": 402400 - }, - { - "epoch": 1.73, - "learning_rate": 0.009480969493613036, - "loss": 8.0683, - "step": 402600 - }, - { - "epoch": 1.73, - "learning_rate": 0.009478976539912197, - "loss": 8.0662, - "step": 402800 - }, - { - "epoch": 1.73, - "learning_rate": 0.009476979977596974, - "loss": 8.0768, - "step": 403000 - }, - { - "epoch": 1.73, - "learning_rate": 0.009474989818092326, - "loss": 8.0691, - "step": 403200 - }, - { - "epoch": 1.73, - "learning_rate": 0.009472986061399982, - "loss": 8.0622, - "step": 403400 - }, - { - "epoch": 1.73, - "learning_rate": 0.009470978700919671, - "loss": 8.0695, - "step": 403600 - }, - { - "epoch": 1.74, - "learning_rate": 0.00946896773826869, - "loss": 8.062, - "step": 403800 - }, - { - "epoch": 1.74, - "learning_rate": 0.009466953175067233, - "loss": 8.0726, - "step": 404000 - }, - { - "epoch": 1.74, - "learning_rate": 0.009464935012938405, - "loss": 8.0685, - "step": 404200 - }, - { - "epoch": 1.74, - "learning_rate": 0.009462913253508193, - "loss": 8.0697, - "step": 404400 - }, - { - "epoch": 1.74, - "learning_rate": 0.009460887898405497, - "loss": 8.0716, - "step": 404600 - }, - { - "epoch": 1.74, - "learning_rate": 0.009458858949262109, - "loss": 8.051, - "step": 404800 - }, - { - "epoch": 1.74, - "learning_rate": 0.009456826407712716, - "loss": 8.0683, - "step": 405000 - }, - { - "epoch": 1.74, - "learning_rate": 0.009454790275394899, - "loss": 8.0548, - "step": 405200 - }, - { - "epoch": 1.74, - "learning_rate": 0.009452750553949134, - "loss": 8.0699, - "step": 405400 - }, - { - "epoch": 1.74, - "learning_rate": 0.009450717470484584, - "loss": 8.0683, - "step": 405600 - }, - { - "epoch": 1.74, - "learning_rate": 0.009448670593641007, - "loss": 8.0635, - "step": 405800 - }, - { - "epoch": 1.74, - "learning_rate": 0.009446620132599996, - "loss": 8.072, - "step": 406000 - }, - { - "epoch": 1.75, - "learning_rate": 0.009444566089013576, - "loss": 8.0622, - "step": 406200 - }, - { - "epoch": 1.75, - "learning_rate": 0.009442508464536647, - "loss": 8.0743, - "step": 406400 - }, - { - "epoch": 1.75, - "learning_rate": 0.009440447260827005, - "loss": 8.0731, - "step": 406600 - }, - { - "epoch": 1.75, - "learning_rate": 0.009438382479545324, - "loss": 8.0728, - "step": 406800 - }, - { - "epoch": 1.75, - "learning_rate": 0.009436314122355158, - "loss": 8.0648, - "step": 407000 - }, - { - "epoch": 1.75, - "learning_rate": 0.009434242190922948, - "loss": 8.0697, - "step": 407200 - }, - { - "epoch": 1.75, - "learning_rate": 0.009432166686918009, - "loss": 8.0681, - "step": 407400 - }, - { - "epoch": 1.75, - "learning_rate": 0.009430098016266913, - "loss": 8.0571, - "step": 407600 - }, - { - "epoch": 1.75, - "learning_rate": 0.009428015389977938, - "loss": 8.0632, - "step": 407800 - }, - { - "epoch": 1.75, - "learning_rate": 0.00942592919613306, - "loss": 8.0725, - "step": 408000 - }, - { - "epoch": 1.75, - "learning_rate": 0.009423839436413083, - "loss": 8.0615, - "step": 408200 - }, - { - "epoch": 1.75, - "learning_rate": 0.00942174611250169, - "loss": 8.0723, - "step": 408400 - }, - { - "epoch": 1.76, - "learning_rate": 0.009419649226085436, - "loss": 8.0706, - "step": 408600 - }, - { - "epoch": 1.76, - "learning_rate": 0.00941754877885374, - "loss": 8.0684, - "step": 408800 - }, - { - "epoch": 1.76, - "learning_rate": 0.009415444772498898, - "loss": 8.0564, - "step": 409000 - }, - { - "epoch": 1.76, - "learning_rate": 0.009413337208716068, - "loss": 8.0693, - "step": 409200 - }, - { - "epoch": 1.76, - "learning_rate": 0.009411226089203278, - "loss": 8.0635, - "step": 409400 - }, - { - "epoch": 1.76, - "learning_rate": 0.009409111415661418, - "loss": 8.0774, - "step": 409600 - }, - { - "epoch": 1.76, - "learning_rate": 0.009406993189794242, - "loss": 8.0759, - "step": 409800 - }, - { - "epoch": 1.76, - "learning_rate": 0.009404882031020132, - "loss": 8.0788, - "step": 410000 - }, - { - "epoch": 1.76, - "learning_rate": 0.009402756723365322, - "loss": 8.0631, - "step": 410200 - }, - { - "epoch": 1.76, - "learning_rate": 0.00940062786850506, - "loss": 8.0681, - "step": 410400 - }, - { - "epoch": 1.76, - "learning_rate": 0.009398495468154518, - "loss": 8.0717, - "step": 410600 - }, - { - "epoch": 1.77, - "learning_rate": 0.00939635952403174, - "loss": 8.068, - "step": 410800 - }, - { - "epoch": 1.77, - "learning_rate": 0.009394220037857616, - "loss": 8.072, - "step": 411000 - }, - { - "epoch": 1.77, - "learning_rate": 0.00939207701135589, - "loss": 8.0664, - "step": 411200 - }, - { - "epoch": 1.77, - "learning_rate": 0.009389930446253159, - "loss": 8.0715, - "step": 411400 - }, - { - "epoch": 1.77, - "learning_rate": 0.009387780344278873, - "loss": 8.0616, - "step": 411600 - }, - { - "epoch": 1.77, - "learning_rate": 0.009385626707165329, - "loss": 8.0785, - "step": 411800 - }, - { - "epoch": 1.77, - "learning_rate": 0.009383480331286732, - "loss": 8.0687, - "step": 412000 - }, - { - "epoch": 1.77, - "learning_rate": 0.009381319646756967, - "loss": 8.0716, - "step": 412200 - }, - { - "epoch": 1.77, - "learning_rate": 0.009379155432293208, - "loss": 8.0631, - "step": 412400 - }, - { - "epoch": 1.77, - "learning_rate": 0.009376987689639129, - "loss": 8.0665, - "step": 412600 - }, - { - "epoch": 1.77, - "learning_rate": 0.00937481642054124, - "loss": 8.0671, - "step": 412800 - }, - { - "epoch": 1.77, - "learning_rate": 0.009372641626748895, - "loss": 8.0723, - "step": 413000 - }, - { - "epoch": 1.78, - "learning_rate": 0.009370463310014282, - "loss": 8.0738, - "step": 413200 - }, - { - "epoch": 1.78, - "learning_rate": 0.009368281472092434, - "loss": 8.0658, - "step": 413400 - }, - { - "epoch": 1.78, - "learning_rate": 0.009366096114741219, - "loss": 8.0535, - "step": 413600 - }, - { - "epoch": 1.78, - "learning_rate": 0.009363907239721341, - "loss": 8.0718, - "step": 413800 - }, - { - "epoch": 1.78, - "learning_rate": 0.009361714848796332, - "loss": 8.0638, - "step": 414000 - }, - { - "epoch": 1.78, - "learning_rate": 0.00935952993199638, - "loss": 8.0731, - "step": 414200 - }, - { - "epoch": 1.78, - "learning_rate": 0.0093573305321205, - "loss": 8.0716, - "step": 414400 - }, - { - "epoch": 1.78, - "learning_rate": 0.009355127621638227, - "loss": 8.0609, - "step": 414600 - }, - { - "epoch": 1.78, - "learning_rate": 0.009352921202324404, - "loss": 8.0659, - "step": 414800 - }, - { - "epoch": 1.78, - "learning_rate": 0.009350711275956707, - "loss": 8.0471, - "step": 415000 - }, - { - "epoch": 1.78, - "learning_rate": 0.00934849784431563, - "loss": 8.0587, - "step": 415200 - }, - { - "epoch": 1.79, - "learning_rate": 0.009346280909184499, - "loss": 8.0631, - "step": 415400 - }, - { - "epoch": 1.79, - "learning_rate": 0.009344060472349454, - "loss": 8.071, - "step": 415600 - }, - { - "epoch": 1.79, - "learning_rate": 0.009341836535599465, - "loss": 8.0537, - "step": 415800 - }, - { - "epoch": 1.79, - "learning_rate": 0.009339609100726315, - "loss": 8.0733, - "step": 416000 - }, - { - "epoch": 1.79, - "learning_rate": 0.009337378169524612, - "loss": 8.0621, - "step": 416200 - }, - { - "epoch": 1.79, - "learning_rate": 0.00933514374379177, - "loss": 8.0716, - "step": 416400 - }, - { - "epoch": 1.79, - "learning_rate": 0.00933290582532803, - "loss": 8.0698, - "step": 416600 - }, - { - "epoch": 1.79, - "learning_rate": 0.0093306756316641, - "loss": 8.0785, - "step": 416800 - }, - { - "epoch": 1.79, - "learning_rate": 0.00932843075059164, - "loss": 8.0517, - "step": 417000 - }, - { - "epoch": 1.79, - "learning_rate": 0.009326182382196818, - "loss": 8.062, - "step": 417200 - }, - { - "epoch": 1.79, - "learning_rate": 0.009323930528291106, - "loss": 8.0614, - "step": 417400 - }, - { - "epoch": 1.79, - "learning_rate": 0.009321675190688782, - "loss": 8.0655, - "step": 417600 - }, - { - "epoch": 1.8, - "learning_rate": 0.009319416371206927, - "loss": 8.0631, - "step": 417800 - }, - { - "epoch": 1.8, - "learning_rate": 0.009317154071665433, - "loss": 8.0678, - "step": 418000 - }, - { - "epoch": 1.8, - "learning_rate": 0.009314888293886997, - "loss": 8.0718, - "step": 418200 - }, - { - "epoch": 1.8, - "learning_rate": 0.009312619039697111, - "loss": 8.072, - "step": 418400 - }, - { - "epoch": 1.8, - "learning_rate": 0.009310346310924072, - "loss": 8.0645, - "step": 418600 - }, - { - "epoch": 1.8, - "learning_rate": 0.009308081499042043, - "loss": 8.0658, - "step": 418800 - }, - { - "epoch": 1.8, - "learning_rate": 0.009305801843948816, - "loss": 8.0696, - "step": 419000 - }, - { - "epoch": 1.8, - "learning_rate": 0.009303518719764928, - "loss": 8.0549, - "step": 419200 - }, - { - "epoch": 1.8, - "learning_rate": 0.009301232128329852, - "loss": 8.0735, - "step": 419400 - }, - { - "epoch": 1.8, - "learning_rate": 0.009298942071485851, - "loss": 8.06, - "step": 419600 - }, - { - "epoch": 1.8, - "learning_rate": 0.009296648551077988, - "loss": 8.0655, - "step": 419800 - }, - { - "epoch": 1.8, - "learning_rate": 0.009294351568954107, - "loss": 8.0701, - "step": 420000 - }, - { - "epoch": 1.81, - "learning_rate": 0.009292051126964846, - "loss": 8.0697, - "step": 420200 - }, - { - "epoch": 1.81, - "learning_rate": 0.009289747226963628, - "loss": 8.0588, - "step": 420400 - }, - { - "epoch": 1.81, - "learning_rate": 0.009287439870806666, - "loss": 8.0627, - "step": 420600 - }, - { - "epoch": 1.81, - "learning_rate": 0.009285129060352955, - "loss": 8.0683, - "step": 420800 - }, - { - "epoch": 1.81, - "learning_rate": 0.009282814797464272, - "loss": 8.0649, - "step": 421000 - }, - { - "epoch": 1.81, - "learning_rate": 0.009280497084005176, - "loss": 8.0674, - "step": 421200 - }, - { - "epoch": 1.81, - "learning_rate": 0.009278187536229377, - "loss": 8.0571, - "step": 421400 - }, - { - "epoch": 1.81, - "learning_rate": 0.009275862944463765, - "loss": 8.0673, - "step": 421600 - }, - { - "epoch": 1.81, - "learning_rate": 0.009273534907728724, - "loss": 8.072, - "step": 421800 - }, - { - "epoch": 1.81, - "learning_rate": 0.00927120342789991, - "loss": 8.0654, - "step": 422000 - }, - { - "epoch": 1.81, - "learning_rate": 0.009268868506855755, - "loss": 8.0683, - "step": 422200 - }, - { - "epoch": 1.82, - "learning_rate": 0.009266530146477462, - "loss": 8.0635, - "step": 422400 - }, - { - "epoch": 1.82, - "learning_rate": 0.009264188348649009, - "loss": 8.0591, - "step": 422600 - }, - { - "epoch": 1.82, - "learning_rate": 0.009261843115257134, - "loss": 8.0593, - "step": 422800 - }, - { - "epoch": 1.82, - "learning_rate": 0.009259494448191356, - "loss": 8.068, - "step": 423000 - }, - { - "epoch": 1.82, - "learning_rate": 0.009257142349343949, - "loss": 8.062, - "step": 423200 - }, - { - "epoch": 1.82, - "learning_rate": 0.009254798606782333, - "loss": 8.0635, - "step": 423400 - }, - { - "epoch": 1.82, - "learning_rate": 0.009252439667194783, - "loss": 8.0633, - "step": 423600 - }, - { - "epoch": 1.82, - "learning_rate": 0.009250077301509516, - "loss": 8.0656, - "step": 423800 - }, - { - "epoch": 1.82, - "learning_rate": 0.009247711511629847, - "loss": 8.0529, - "step": 424000 - }, - { - "epoch": 1.82, - "learning_rate": 0.009245342299461848, - "loss": 8.0638, - "step": 424200 - }, - { - "epoch": 1.82, - "learning_rate": 0.009242969666914352, - "loss": 8.066, - "step": 424400 - }, - { - "epoch": 1.82, - "learning_rate": 0.009240593615898948, - "loss": 8.0633, - "step": 424600 - }, - { - "epoch": 1.83, - "learning_rate": 0.009238214148329973, - "loss": 8.0704, - "step": 424800 - }, - { - "epoch": 1.83, - "learning_rate": 0.009235831266124525, - "loss": 8.0745, - "step": 425000 - }, - { - "epoch": 1.83, - "learning_rate": 0.009233444971202449, - "loss": 8.0573, - "step": 425200 - }, - { - "epoch": 1.83, - "learning_rate": 0.009231055265486338, - "loss": 8.0593, - "step": 425400 - }, - { - "epoch": 1.83, - "learning_rate": 0.009228674124950832, - "loss": 8.0793, - "step": 425600 - }, - { - "epoch": 1.83, - "learning_rate": 0.00922627762045533, - "loss": 8.0497, - "step": 425800 - }, - { - "epoch": 1.83, - "learning_rate": 0.009223877710940395, - "loss": 8.0627, - "step": 426000 - }, - { - "epoch": 1.83, - "learning_rate": 0.009221474398339598, - "loss": 8.06, - "step": 426200 - }, - { - "epoch": 1.83, - "learning_rate": 0.009219067684589237, - "loss": 8.0518, - "step": 426400 - }, - { - "epoch": 1.83, - "learning_rate": 0.009216657571628366, - "loss": 8.059, - "step": 426600 - }, - { - "epoch": 1.83, - "learning_rate": 0.009214244061398763, - "loss": 8.069, - "step": 426800 - }, - { - "epoch": 1.83, - "learning_rate": 0.009211827155844952, - "loss": 8.0616, - "step": 427000 - }, - { - "epoch": 1.84, - "learning_rate": 0.00920940685691419, - "loss": 8.0549, - "step": 427200 - }, - { - "epoch": 1.84, - "learning_rate": 0.009206983166556467, - "loss": 8.0636, - "step": 427400 - }, - { - "epoch": 1.84, - "learning_rate": 0.00920455608672451, - "loss": 8.0596, - "step": 427600 - }, - { - "epoch": 1.84, - "learning_rate": 0.009202137780133738, - "loss": 8.0772, - "step": 427800 - }, - { - "epoch": 1.84, - "learning_rate": 0.009199703944145326, - "loss": 8.068, - "step": 428000 - }, - { - "epoch": 1.84, - "learning_rate": 0.009197266724547416, - "loss": 8.0613, - "step": 428200 - }, - { - "epoch": 1.84, - "learning_rate": 0.009194826123303633, - "loss": 8.0632, - "step": 428400 - }, - { - "epoch": 1.84, - "learning_rate": 0.009192382142380322, - "loss": 8.0728, - "step": 428600 - }, - { - "epoch": 1.84, - "learning_rate": 0.009189934783746556, - "loss": 8.0667, - "step": 428800 - }, - { - "epoch": 1.84, - "learning_rate": 0.009187484049374127, - "loss": 8.0503, - "step": 429000 - }, - { - "epoch": 1.84, - "learning_rate": 0.00918502994123755, - "loss": 8.0759, - "step": 429200 - }, - { - "epoch": 1.85, - "learning_rate": 0.009182572461314053, - "loss": 8.0513, - "step": 429400 - }, - { - "epoch": 1.85, - "learning_rate": 0.009180111611583584, - "loss": 8.0673, - "step": 429600 - }, - { - "epoch": 1.85, - "learning_rate": 0.009177659723490758, - "loss": 8.0551, - "step": 429800 - }, - { - "epoch": 1.85, - "learning_rate": 0.009175192156921296, - "loss": 8.062, - "step": 430000 - }, - { - "epoch": 1.85, - "learning_rate": 0.00917272122649104, - "loss": 8.0633, - "step": 430200 - }, - { - "epoch": 1.85, - "learning_rate": 0.009170246934190773, - "loss": 8.0681, - "step": 430400 - }, - { - "epoch": 1.85, - "learning_rate": 0.00916776928201399, - "loss": 8.0692, - "step": 430600 - }, - { - "epoch": 1.85, - "learning_rate": 0.009165288271956888, - "loss": 8.0702, - "step": 430800 - }, - { - "epoch": 1.85, - "learning_rate": 0.009162803906018374, - "loss": 8.0562, - "step": 431000 - }, - { - "epoch": 1.85, - "learning_rate": 0.009160316186200055, - "loss": 8.055, - "step": 431200 - }, - { - "epoch": 1.85, - "learning_rate": 0.009157825114506243, - "loss": 8.0539, - "step": 431400 - }, - { - "epoch": 1.85, - "learning_rate": 0.009155330692943948, - "loss": 8.0579, - "step": 431600 - }, - { - "epoch": 1.86, - "learning_rate": 0.009152832923522883, - "loss": 8.068, - "step": 431800 - }, - { - "epoch": 1.86, - "learning_rate": 0.009150344322151251, - "loss": 8.0719, - "step": 432000 - }, - { - "epoch": 1.86, - "learning_rate": 0.009147839879766704, - "loss": 8.0655, - "step": 432200 - }, - { - "epoch": 1.86, - "learning_rate": 0.009145332095558593, - "loss": 8.0704, - "step": 432400 - }, - { - "epoch": 1.86, - "learning_rate": 0.009142820971547403, - "loss": 8.0631, - "step": 432600 - }, - { - "epoch": 1.86, - "learning_rate": 0.009140306509756299, - "loss": 8.0535, - "step": 432800 - }, - { - "epoch": 1.86, - "learning_rate": 0.009137788712211135, - "loss": 8.0644, - "step": 433000 - }, - { - "epoch": 1.86, - "learning_rate": 0.009135267580940456, - "loss": 8.0631, - "step": 433200 - }, - { - "epoch": 1.86, - "learning_rate": 0.009132743117975494, - "loss": 8.0657, - "step": 433400 - }, - { - "epoch": 1.86, - "learning_rate": 0.00913021532535016, - "loss": 8.0609, - "step": 433600 - }, - { - "epoch": 1.86, - "learning_rate": 0.009127684205101055, - "loss": 8.061, - "step": 433800 - }, - { - "epoch": 1.87, - "learning_rate": 0.009125162439765635, - "loss": 8.0641, - "step": 434000 - }, - { - "epoch": 1.87, - "learning_rate": 0.009122624687002125, - "loss": 8.0728, - "step": 434200 - }, - { - "epoch": 1.87, - "learning_rate": 0.00912008361273048, - "loss": 8.0673, - "step": 434400 - }, - { - "epoch": 1.87, - "learning_rate": 0.009117539218998001, - "loss": 8.0604, - "step": 434600 - }, - { - "epoch": 1.87, - "learning_rate": 0.009114991507854659, - "loss": 8.0459, - "step": 434800 - }, - { - "epoch": 1.87, - "learning_rate": 0.009112440481353097, - "loss": 8.069, - "step": 435000 - }, - { - "epoch": 1.87, - "learning_rate": 0.009109886141548632, - "loss": 8.0726, - "step": 435200 - }, - { - "epoch": 1.87, - "learning_rate": 0.009107328490499252, - "loss": 8.058, - "step": 435400 - }, - { - "epoch": 1.87, - "learning_rate": 0.009104767530265607, - "loss": 8.0697, - "step": 435600 - }, - { - "epoch": 1.87, - "learning_rate": 0.009102203262911017, - "loss": 8.0644, - "step": 435800 - }, - { - "epoch": 1.87, - "learning_rate": 0.009099635690501467, - "loss": 8.052, - "step": 436000 - }, - { - "epoch": 1.87, - "learning_rate": 0.009097077677695334, - "loss": 8.0775, - "step": 436200 - }, - { - "epoch": 1.88, - "learning_rate": 0.009094503517883882, - "loss": 8.0567, - "step": 436400 - }, - { - "epoch": 1.88, - "learning_rate": 0.009091926059221015, - "loss": 8.0705, - "step": 436600 - }, - { - "epoch": 1.88, - "learning_rate": 0.009089345303783346, - "loss": 8.0543, - "step": 436800 - }, - { - "epoch": 1.88, - "learning_rate": 0.00908676125365014, - "loss": 8.051, - "step": 437000 - }, - { - "epoch": 1.88, - "learning_rate": 0.009084173910903322, - "loss": 8.0623, - "step": 437200 - }, - { - "epoch": 1.88, - "learning_rate": 0.009081583277627467, - "loss": 8.067, - "step": 437400 - }, - { - "epoch": 1.88, - "learning_rate": 0.0090789893559098, - "loss": 8.0624, - "step": 437600 - }, - { - "epoch": 1.88, - "learning_rate": 0.0090763921478402, - "loss": 8.0666, - "step": 437800 - }, - { - "epoch": 1.88, - "learning_rate": 0.009073791655511186, - "loss": 8.0582, - "step": 438000 - }, - { - "epoch": 1.88, - "learning_rate": 0.009071187881017932, - "loss": 8.0632, - "step": 438200 - }, - { - "epoch": 1.88, - "learning_rate": 0.00906859386988674, - "loss": 8.0788, - "step": 438400 - }, - { - "epoch": 1.88, - "learning_rate": 0.009065983553745688, - "loss": 8.0552, - "step": 438600 - }, - { - "epoch": 1.89, - "learning_rate": 0.00906336996173124, - "loss": 8.0724, - "step": 438800 - }, - { - "epoch": 1.89, - "learning_rate": 0.009060753095949121, - "loss": 8.0587, - "step": 439000 - }, - { - "epoch": 1.89, - "learning_rate": 0.009058132958507693, - "loss": 8.0561, - "step": 439200 - }, - { - "epoch": 1.89, - "learning_rate": 0.009055509551517953, - "loss": 8.0621, - "step": 439400 - }, - { - "epoch": 1.89, - "learning_rate": 0.00905288287709353, - "loss": 8.0568, - "step": 439600 - }, - { - "epoch": 1.89, - "learning_rate": 0.009050252937350694, - "loss": 8.0648, - "step": 439800 - }, - { - "epoch": 1.89, - "learning_rate": 0.009047619734408333, - "loss": 8.0748, - "step": 440000 - }, - { - "epoch": 1.89, - "learning_rate": 0.009044983270387975, - "loss": 8.0722, - "step": 440200 - }, - { - "epoch": 1.89, - "learning_rate": 0.009042356754131772, - "loss": 8.0602, - "step": 440400 - }, - { - "epoch": 1.89, - "learning_rate": 0.009039713790609341, - "loss": 8.0593, - "step": 440600 - }, - { - "epoch": 1.89, - "learning_rate": 0.009037067572378588, - "loss": 8.0647, - "step": 440800 - }, - { - "epoch": 1.9, - "learning_rate": 0.009034418101571523, - "loss": 8.0668, - "step": 441000 - }, - { - "epoch": 1.9, - "learning_rate": 0.009031765380322777, - "loss": 8.0619, - "step": 441200 - }, - { - "epoch": 1.9, - "learning_rate": 0.009029109410769603, - "loss": 8.0601, - "step": 441400 - }, - { - "epoch": 1.9, - "learning_rate": 0.009026450195051862, - "loss": 8.0811, - "step": 441600 - }, - { - "epoch": 1.9, - "learning_rate": 0.00902378773531204, - "loss": 8.0717, - "step": 441800 - }, - { - "epoch": 1.9, - "learning_rate": 0.009021122033695232, - "loss": 8.0618, - "step": 442000 - }, - { - "epoch": 1.9, - "learning_rate": 0.009018453092349143, - "loss": 8.0627, - "step": 442200 - }, - { - "epoch": 1.9, - "learning_rate": 0.009015780913424096, - "loss": 8.0519, - "step": 442400 - }, - { - "epoch": 1.9, - "learning_rate": 0.009013118884189326, - "loss": 8.082, - "step": 442600 - }, - { - "epoch": 1.9, - "learning_rate": 0.009010440252728728, - "loss": 8.0636, - "step": 442800 - }, - { - "epoch": 1.9, - "learning_rate": 0.00900775839014497, - "loss": 8.0581, - "step": 443000 - }, - { - "epoch": 1.9, - "learning_rate": 0.009005073298598778, - "loss": 8.0609, - "step": 443200 - }, - { - "epoch": 1.91, - "learning_rate": 0.009002384980253485, - "loss": 8.0708, - "step": 443400 - }, - { - "epoch": 1.91, - "learning_rate": 0.008999693437275016, - "loss": 8.0631, - "step": 443600 - }, - { - "epoch": 1.91, - "learning_rate": 0.008996998671831903, - "loss": 8.0715, - "step": 443800 - }, - { - "epoch": 1.91, - "learning_rate": 0.00899430068609527, - "loss": 8.0681, - "step": 444000 - }, - { - "epoch": 1.91, - "learning_rate": 0.008991599482238832, - "loss": 8.0685, - "step": 444200 - }, - { - "epoch": 1.91, - "learning_rate": 0.0089888950624389, - "loss": 8.0653, - "step": 444400 - }, - { - "epoch": 1.91, - "learning_rate": 0.008986200975032835, - "loss": 8.0611, - "step": 444600 - }, - { - "epoch": 1.91, - "learning_rate": 0.0089834901459377, - "loss": 8.0501, - "step": 444800 - }, - { - "epoch": 1.91, - "learning_rate": 0.00898077610743262, - "loss": 8.075, - "step": 445000 - }, - { - "epoch": 1.91, - "learning_rate": 0.008978058861704245, - "loss": 8.0547, - "step": 445200 - }, - { - "epoch": 1.91, - "learning_rate": 0.008975338410941811, - "loss": 8.0537, - "step": 445400 - }, - { - "epoch": 1.91, - "learning_rate": 0.008972614757337136, - "loss": 8.0578, - "step": 445600 - }, - { - "epoch": 1.92, - "learning_rate": 0.008969887903084617, - "loss": 8.0619, - "step": 445800 - }, - { - "epoch": 1.92, - "learning_rate": 0.008967157850381234, - "loss": 8.0667, - "step": 446000 - }, - { - "epoch": 1.92, - "learning_rate": 0.00896442460142654, - "loss": 8.0649, - "step": 446200 - }, - { - "epoch": 1.92, - "learning_rate": 0.008961688158422666, - "loss": 8.0701, - "step": 446400 - }, - { - "epoch": 1.92, - "learning_rate": 0.008958948523574312, - "loss": 8.0738, - "step": 446600 - }, - { - "epoch": 1.92, - "learning_rate": 0.008956205699088754, - "loss": 8.0563, - "step": 446800 - }, - { - "epoch": 1.92, - "learning_rate": 0.008953473425160466, - "loss": 8.0561, - "step": 447000 - }, - { - "epoch": 1.92, - "learning_rate": 0.008950724243953167, - "loss": 8.0646, - "step": 447200 - }, - { - "epoch": 1.92, - "learning_rate": 0.008947971879734815, - "loss": 8.0451, - "step": 447400 - }, - { - "epoch": 1.92, - "learning_rate": 0.008945216334722941, - "loss": 8.0552, - "step": 447600 - }, - { - "epoch": 1.92, - "learning_rate": 0.008942457611137641, - "loss": 8.0564, - "step": 447800 - }, - { - "epoch": 1.93, - "learning_rate": 0.008939695711201567, - "loss": 8.0651, - "step": 448000 - }, - { - "epoch": 1.93, - "learning_rate": 0.008936930637139932, - "loss": 8.0516, - "step": 448200 - }, - { - "epoch": 1.93, - "learning_rate": 0.008934162391180504, - "loss": 8.0555, - "step": 448400 - }, - { - "epoch": 1.93, - "learning_rate": 0.008931390975553611, - "loss": 8.0659, - "step": 448600 - }, - { - "epoch": 1.93, - "learning_rate": 0.008928616392492133, - "loss": 8.0648, - "step": 448800 - }, - { - "epoch": 1.93, - "learning_rate": 0.008925852540842535, - "loss": 8.0575, - "step": 449000 - }, - { - "epoch": 1.93, - "learning_rate": 0.008923071645429962, - "loss": 8.0673, - "step": 449200 - }, - { - "epoch": 1.93, - "learning_rate": 0.00892028758928554, - "loss": 8.0642, - "step": 449400 - }, - { - "epoch": 1.93, - "learning_rate": 0.008917500374652328, - "loss": 8.0617, - "step": 449600 - }, - { - "epoch": 1.93, - "learning_rate": 0.008914710003775937, - "loss": 8.0451, - "step": 449800 - }, - { - "epoch": 1.93, - "learning_rate": 0.008911916478904519, - "loss": 8.0684, - "step": 450000 - }, - { - "epoch": 1.93, - "learning_rate": 0.008909119802288763, - "loss": 8.0641, - "step": 450200 - }, - { - "epoch": 1.94, - "learning_rate": 0.008906319976181904, - "loss": 8.0579, - "step": 450400 - }, - { - "epoch": 1.94, - "learning_rate": 0.00890351700283971, - "loss": 8.0687, - "step": 450600 - }, - { - "epoch": 1.94, - "learning_rate": 0.008900710884520487, - "loss": 8.0738, - "step": 450800 - }, - { - "epoch": 1.94, - "learning_rate": 0.008897901623485074, - "loss": 8.0655, - "step": 451000 - }, - { - "epoch": 1.94, - "learning_rate": 0.00889510329181241, - "loss": 8.0576, - "step": 451200 - }, - { - "epoch": 1.94, - "learning_rate": 0.008892287767822553, - "loss": 8.0631, - "step": 451400 - }, - { - "epoch": 1.94, - "learning_rate": 0.008889469107902857, - "loss": 8.0608, - "step": 451600 - }, - { - "epoch": 1.94, - "learning_rate": 0.008886647314324266, - "loss": 8.0718, - "step": 451800 - }, - { - "epoch": 1.94, - "learning_rate": 0.008883822389360248, - "loss": 8.0601, - "step": 452000 - }, - { - "epoch": 1.94, - "learning_rate": 0.008880994335286796, - "loss": 8.0643, - "step": 452200 - }, - { - "epoch": 1.94, - "learning_rate": 0.008878163154382421, - "loss": 8.063, - "step": 452400 - }, - { - "epoch": 1.94, - "learning_rate": 0.008875328848928155, - "loss": 8.047, - "step": 452600 - }, - { - "epoch": 1.95, - "learning_rate": 0.008872491421207546, - "loss": 8.0731, - "step": 452800 - }, - { - "epoch": 1.95, - "learning_rate": 0.008869650873506656, - "loss": 8.0687, - "step": 453000 - }, - { - "epoch": 1.95, - "learning_rate": 0.008866821434192498, - "loss": 8.0769, - "step": 453200 - }, - { - "epoch": 1.95, - "learning_rate": 0.008863974668970598, - "loss": 8.0573, - "step": 453400 - }, - { - "epoch": 1.95, - "learning_rate": 0.008861124790630212, - "loss": 8.0704, - "step": 453600 - }, - { - "epoch": 1.95, - "learning_rate": 0.008858271801467435, - "loss": 8.0528, - "step": 453800 - }, - { - "epoch": 1.95, - "learning_rate": 0.00885541570378087, - "loss": 8.0439, - "step": 454000 - }, - { - "epoch": 1.95, - "learning_rate": 0.008852556499871624, - "loss": 8.0616, - "step": 454200 - }, - { - "epoch": 1.95, - "learning_rate": 0.008849694192043307, - "loss": 8.0479, - "step": 454400 - }, - { - "epoch": 1.95, - "learning_rate": 0.008846828782602024, - "loss": 8.0554, - "step": 454600 - }, - { - "epoch": 1.95, - "learning_rate": 0.008843960273856389, - "loss": 8.0571, - "step": 454800 - }, - { - "epoch": 1.96, - "learning_rate": 0.008841088668117504, - "loss": 8.0589, - "step": 455000 - }, - { - "epoch": 1.96, - "learning_rate": 0.008838228348895248, - "loss": 8.0584, - "step": 455200 - }, - { - "epoch": 1.96, - "learning_rate": 0.008835350571569214, - "loss": 8.0739, - "step": 455400 - }, - { - "epoch": 1.96, - "learning_rate": 0.008832469704186613, - "loss": 8.0697, - "step": 455600 - }, - { - "epoch": 1.96, - "learning_rate": 0.008829585749068508, - "loss": 8.0652, - "step": 455800 - }, - { - "epoch": 1.96, - "learning_rate": 0.008826698708538448, - "loss": 8.0609, - "step": 456000 - }, - { - "epoch": 1.96, - "learning_rate": 0.008823808584922472, - "loss": 8.0522, - "step": 456200 - }, - { - "epoch": 1.96, - "learning_rate": 0.008820915380549098, - "loss": 8.0458, - "step": 456400 - }, - { - "epoch": 1.96, - "learning_rate": 0.00881801909774933, - "loss": 8.0483, - "step": 456600 - }, - { - "epoch": 1.96, - "learning_rate": 0.00881511973885665, - "loss": 8.0619, - "step": 456800 - }, - { - "epoch": 1.96, - "learning_rate": 0.008812217306207019, - "loss": 8.0606, - "step": 457000 - }, - { - "epoch": 1.96, - "learning_rate": 0.008809311802138876, - "loss": 8.0602, - "step": 457200 - }, - { - "epoch": 1.97, - "learning_rate": 0.008806417779489316, - "loss": 8.0684, - "step": 457400 - }, - { - "epoch": 1.97, - "learning_rate": 0.008803506154937192, - "loss": 8.0663, - "step": 457600 - }, - { - "epoch": 1.97, - "learning_rate": 0.008800591465984975, - "loss": 8.0614, - "step": 457800 - }, - { - "epoch": 1.97, - "learning_rate": 0.008797673714980973, - "loss": 8.0678, - "step": 458000 - }, - { - "epoch": 1.97, - "learning_rate": 0.008794752904275964, - "loss": 8.0544, - "step": 458200 - }, - { - "epoch": 1.97, - "learning_rate": 0.008791829036223199, - "loss": 8.058, - "step": 458400 - }, - { - "epoch": 1.97, - "learning_rate": 0.00878890211317838, - "loss": 8.0647, - "step": 458600 - }, - { - "epoch": 1.97, - "learning_rate": 0.00878597213749968, - "loss": 8.0671, - "step": 458800 - }, - { - "epoch": 1.97, - "learning_rate": 0.008783039111547724, - "loss": 8.0737, - "step": 459000 - }, - { - "epoch": 1.97, - "learning_rate": 0.008780103037685599, - "loss": 8.0631, - "step": 459200 - }, - { - "epoch": 1.97, - "learning_rate": 0.008777178621447754, - "loss": 8.0521, - "step": 459400 - }, - { - "epoch": 1.98, - "learning_rate": 0.00877423647407436, - "loss": 8.0571, - "step": 459600 - }, - { - "epoch": 1.98, - "learning_rate": 0.008771291285882924, - "loss": 8.0642, - "step": 459800 - }, - { - "epoch": 1.98, - "learning_rate": 0.008768343059246328, - "loss": 8.0576, - "step": 460000 - }, - { - "epoch": 1.98, - "learning_rate": 0.008765391796539908, - "loss": 8.0651, - "step": 460200 - }, - { - "epoch": 1.98, - "learning_rate": 0.008762437500141444, - "loss": 8.0529, - "step": 460400 - }, - { - "epoch": 1.98, - "learning_rate": 0.008759480172431154, - "loss": 8.0715, - "step": 460600 - }, - { - "epoch": 1.98, - "learning_rate": 0.008756519815791711, - "loss": 8.0711, - "step": 460800 - }, - { - "epoch": 1.98, - "learning_rate": 0.008753556432608214, - "loss": 8.0669, - "step": 461000 - }, - { - "epoch": 1.98, - "learning_rate": 0.008750590025268212, - "loss": 8.0668, - "step": 461200 - }, - { - "epoch": 1.98, - "learning_rate": 0.008747635450819902, - "loss": 8.0661, - "step": 461400 - }, - { - "epoch": 1.98, - "learning_rate": 0.00874466301743018, - "loss": 8.0558, - "step": 461600 - }, - { - "epoch": 1.98, - "learning_rate": 0.008741687567049211, - "loss": 8.0522, - "step": 461800 - }, - { - "epoch": 1.99, - "learning_rate": 0.008738709102074272, - "loss": 8.054, - "step": 462000 - }, - { - "epoch": 1.99, - "learning_rate": 0.008735727624905047, - "loss": 8.0596, - "step": 462200 - }, - { - "epoch": 1.99, - "learning_rate": 0.008732743137943665, - "loss": 8.0545, - "step": 462400 - }, - { - "epoch": 1.99, - "learning_rate": 0.008729755643594671, - "loss": 8.0642, - "step": 462600 - }, - { - "epoch": 1.99, - "learning_rate": 0.008726765144265037, - "loss": 8.0487, - "step": 462800 - }, - { - "epoch": 1.99, - "learning_rate": 0.008723771642364155, - "loss": 8.06, - "step": 463000 - }, - { - "epoch": 1.99, - "learning_rate": 0.008720775140303831, - "loss": 8.0604, - "step": 463200 - }, - { - "epoch": 1.99, - "learning_rate": 0.008717790645450219, - "loss": 8.0491, - "step": 463400 - }, - { - "epoch": 1.99, - "learning_rate": 0.008714788165286744, - "loss": 8.0542, - "step": 463600 - }, - { - "epoch": 1.99, - "learning_rate": 0.008711782692201655, - "loss": 8.0458, - "step": 463800 - }, - { - "epoch": 1.99, - "learning_rate": 0.008708774228616404, - "loss": 8.06, - "step": 464000 - }, - { - "epoch": 1.99, - "learning_rate": 0.008705762776954862, - "loss": 8.0543, - "step": 464200 - }, - { - "epoch": 2.0, - "learning_rate": 0.0087027483396433, - "loss": 8.0536, - "step": 464400 - }, - { - "epoch": 2.0, - "learning_rate": 0.008699730919110391, - "loss": 8.0519, - "step": 464600 - }, - { - "epoch": 2.0, - "learning_rate": 0.008696710517787221, - "loss": 8.0539, - "step": 464800 - }, - { - "epoch": 2.0, - "learning_rate": 0.008693687138107269, - "loss": 8.0562, - "step": 465000 - }, - { - "epoch": 2.0, - "learning_rate": 0.008690660782506422, - "loss": 8.0549, - "step": 465200 - }, - { - "epoch": 2.0, - "learning_rate": 0.008687631453422957, - "loss": 8.053, - "step": 465400 - }, - { - "epoch": 2.0, - "learning_rate": 0.008684614322184602, - "loss": 8.0627, - "step": 465600 - }, - { - "epoch": 2.0, - "learning_rate": 0.008681579068297236, - "loss": 8.0574, - "step": 465800 - }, - { - "epoch": 2.0, - "learning_rate": 0.008678540848244225, - "loss": 8.0589, - "step": 466000 - }, - { - "epoch": 2.0, - "learning_rate": 0.00867549966447341, - "loss": 8.0641, - "step": 466200 - }, - { - "epoch": 2.0, - "learning_rate": 0.008672455519435015, - "loss": 8.0538, - "step": 466400 - }, - { - "epoch": 2.01, - "learning_rate": 0.008669408415581652, - "loss": 8.057, - "step": 466600 - }, - { - "epoch": 2.01, - "learning_rate": 0.008666358355368324, - "loss": 8.0626, - "step": 466800 - }, - { - "epoch": 2.01, - "learning_rate": 0.008663305341252403, - "loss": 8.05, - "step": 467000 - }, - { - "epoch": 2.01, - "learning_rate": 0.00866024937569365, - "loss": 8.0658, - "step": 467200 - }, - { - "epoch": 2.01, - "learning_rate": 0.0086571904611542, - "loss": 8.0435, - "step": 467400 - }, - { - "epoch": 2.01, - "learning_rate": 0.008654143916729223, - "loss": 8.0602, - "step": 467600 - }, - { - "epoch": 2.01, - "learning_rate": 0.008651079126338399, - "loss": 8.0699, - "step": 467800 - }, - { - "epoch": 2.01, - "learning_rate": 0.008648011394355181, - "loss": 8.0599, - "step": 468000 - }, - { - "epoch": 2.01, - "learning_rate": 0.008644940723251187, - "loss": 8.0604, - "step": 468200 - }, - { - "epoch": 2.01, - "learning_rate": 0.008641867115500402, - "loss": 8.0582, - "step": 468400 - }, - { - "epoch": 2.01, - "learning_rate": 0.008638790573579176, - "loss": 8.0779, - "step": 468600 - }, - { - "epoch": 2.01, - "learning_rate": 0.008635711099966222, - "loss": 8.0685, - "step": 468800 - }, - { - "epoch": 2.02, - "learning_rate": 0.008632628697142618, - "loss": 8.0626, - "step": 469000 - }, - { - "epoch": 2.02, - "learning_rate": 0.008629543367591799, - "loss": 8.0441, - "step": 469200 - }, - { - "epoch": 2.02, - "learning_rate": 0.00862645511379956, - "loss": 8.063, - "step": 469400 - }, - { - "epoch": 2.02, - "learning_rate": 0.008623379401395523, - "loss": 8.0813, - "step": 469600 - }, - { - "epoch": 2.02, - "learning_rate": 0.008620285321177363, - "loss": 8.0654, - "step": 469800 - }, - { - "epoch": 2.02, - "learning_rate": 0.008617188324176824, - "loss": 8.0628, - "step": 470000 - }, - { - "epoch": 2.02, - "learning_rate": 0.0086140884128891, - "loss": 8.0623, - "step": 470200 - }, - { - "epoch": 2.02, - "learning_rate": 0.008610985589811732, - "loss": 8.0548, - "step": 470400 - }, - { - "epoch": 2.02, - "learning_rate": 0.008607879857444612, - "loss": 8.0624, - "step": 470600 - }, - { - "epoch": 2.02, - "learning_rate": 0.008604771218289972, - "loss": 8.0589, - "step": 470800 - }, - { - "epoch": 2.02, - "learning_rate": 0.008601659674852383, - "loss": 8.0595, - "step": 471000 - }, - { - "epoch": 2.02, - "learning_rate": 0.008598545229638762, - "loss": 8.0542, - "step": 471200 - }, - { - "epoch": 2.03, - "learning_rate": 0.00859542788515836, - "loss": 8.0644, - "step": 471400 - }, - { - "epoch": 2.03, - "learning_rate": 0.008592323252330468, - "loss": 8.073, - "step": 471600 - }, - { - "epoch": 2.03, - "learning_rate": 0.008589200131318551, - "loss": 8.0527, - "step": 471800 - }, - { - "epoch": 2.03, - "learning_rate": 0.008586074118569034, - "loss": 8.0575, - "step": 472000 - }, - { - "epoch": 2.03, - "learning_rate": 0.008582945216600484, - "loss": 8.0683, - "step": 472200 - }, - { - "epoch": 2.03, - "learning_rate": 0.008579813427933803, - "loss": 8.0577, - "step": 472400 - }, - { - "epoch": 2.03, - "learning_rate": 0.008576678755092217, - "loss": 8.0482, - "step": 472600 - }, - { - "epoch": 2.03, - "learning_rate": 0.008573541200601272, - "loss": 8.0567, - "step": 472800 - }, - { - "epoch": 2.03, - "learning_rate": 0.008570400766988843, - "loss": 8.0521, - "step": 473000 - }, - { - "epoch": 2.03, - "learning_rate": 0.008567257456785117, - "loss": 8.0681, - "step": 473200 - }, - { - "epoch": 2.03, - "learning_rate": 0.008564111272522606, - "loss": 8.0555, - "step": 473400 - }, - { - "epoch": 2.04, - "learning_rate": 0.00856096221673613, - "loss": 8.057, - "step": 473600 - }, - { - "epoch": 2.04, - "learning_rate": 0.008557826058719102, - "loss": 8.066, - "step": 473800 - }, - { - "epoch": 2.04, - "learning_rate": 0.008554671281824341, - "loss": 8.0537, - "step": 474000 - }, - { - "epoch": 2.04, - "learning_rate": 0.008551513641011246, - "loss": 8.0566, - "step": 474200 - }, - { - "epoch": 2.04, - "learning_rate": 0.008548353138823874, - "loss": 8.0678, - "step": 474400 - }, - { - "epoch": 2.04, - "learning_rate": 0.008545189777808581, - "loss": 8.0551, - "step": 474600 - }, - { - "epoch": 2.04, - "learning_rate": 0.008542023560514032, - "loss": 8.0596, - "step": 474800 - }, - { - "epoch": 2.04, - "learning_rate": 0.008538854489491191, - "loss": 8.0553, - "step": 475000 - }, - { - "epoch": 2.04, - "learning_rate": 0.00853568256729332, - "loss": 8.062, - "step": 475200 - }, - { - "epoch": 2.04, - "learning_rate": 0.00853250779647598, - "loss": 8.0612, - "step": 475400 - }, - { - "epoch": 2.04, - "learning_rate": 0.008529330179597028, - "loss": 8.0547, - "step": 475600 - }, - { - "epoch": 2.04, - "learning_rate": 0.008526165628587486, - "loss": 8.0544, - "step": 475800 - }, - { - "epoch": 2.05, - "learning_rate": 0.00852298234146636, - "loss": 8.0699, - "step": 476000 - }, - { - "epoch": 2.05, - "learning_rate": 0.008519796215958109, - "loss": 8.0531, - "step": 476200 - }, - { - "epoch": 2.05, - "learning_rate": 0.008516607254629735, - "loss": 8.0724, - "step": 476400 - }, - { - "epoch": 2.05, - "learning_rate": 0.008513415460050527, - "loss": 8.0621, - "step": 476600 - }, - { - "epoch": 2.05, - "learning_rate": 0.008510220834792056, - "loss": 8.0526, - "step": 476800 - }, - { - "epoch": 2.05, - "learning_rate": 0.008507023381428176, - "loss": 8.0543, - "step": 477000 - }, - { - "epoch": 2.05, - "learning_rate": 0.008503823102535014, - "loss": 8.0676, - "step": 477200 - }, - { - "epoch": 2.05, - "learning_rate": 0.00850062000069098, - "loss": 8.0542, - "step": 477400 - }, - { - "epoch": 2.05, - "learning_rate": 0.008497414078476753, - "loss": 8.0594, - "step": 477600 - }, - { - "epoch": 2.05, - "learning_rate": 0.008494221389180264, - "loss": 8.0712, - "step": 477800 - }, - { - "epoch": 2.05, - "learning_rate": 0.00849100984804636, - "loss": 8.0652, - "step": 478000 - }, - { - "epoch": 2.05, - "learning_rate": 0.00848779549428499, - "loss": 8.0539, - "step": 478200 - }, - { - "epoch": 2.06, - "learning_rate": 0.0084845783304859, - "loss": 8.0691, - "step": 478400 - }, - { - "epoch": 2.06, - "learning_rate": 0.008481358359241103, - "loss": 8.0572, - "step": 478600 - }, - { - "epoch": 2.06, - "learning_rate": 0.008478135583144872, - "loss": 8.0511, - "step": 478800 - }, - { - "epoch": 2.06, - "learning_rate": 0.008474910004793735, - "loss": 8.0542, - "step": 479000 - }, - { - "epoch": 2.06, - "learning_rate": 0.008471681626786486, - "loss": 8.0691, - "step": 479200 - }, - { - "epoch": 2.06, - "learning_rate": 0.00846845045172417, - "loss": 8.0427, - "step": 479400 - }, - { - "epoch": 2.06, - "learning_rate": 0.008465216482210087, - "loss": 8.0548, - "step": 479600 - }, - { - "epoch": 2.06, - "learning_rate": 0.008461995911596992, - "loss": 8.0622, - "step": 479800 - }, - { - "epoch": 2.06, - "learning_rate": 0.008458756374937978, - "loss": 8.0602, - "step": 480000 - }, - { - "epoch": 2.06, - "learning_rate": 0.008455514051637537, - "loss": 8.0486, - "step": 480200 - }, - { - "epoch": 2.06, - "learning_rate": 0.008452268944307954, - "loss": 8.0664, - "step": 480400 - }, - { - "epoch": 2.07, - "learning_rate": 0.00844902105556375, - "loss": 8.0589, - "step": 480600 - }, - { - "epoch": 2.07, - "learning_rate": 0.008445770388021695, - "loss": 8.0578, - "step": 480800 - }, - { - "epoch": 2.07, - "learning_rate": 0.008442516944300786, - "loss": 8.0654, - "step": 481000 - }, - { - "epoch": 2.07, - "learning_rate": 0.00843926072702227, - "loss": 8.056, - "step": 481200 - }, - { - "epoch": 2.07, - "learning_rate": 0.00843600173880962, - "loss": 8.0699, - "step": 481400 - }, - { - "epoch": 2.07, - "learning_rate": 0.008432739982288544, - "loss": 8.0566, - "step": 481600 - }, - { - "epoch": 2.07, - "learning_rate": 0.00842949178957327, - "loss": 8.0648, - "step": 481800 - }, - { - "epoch": 2.07, - "learning_rate": 0.008426224518130092, - "loss": 8.0582, - "step": 482000 - }, - { - "epoch": 2.07, - "learning_rate": 0.008422954486255817, - "loss": 8.0405, - "step": 482200 - }, - { - "epoch": 2.07, - "learning_rate": 0.008419681696585054, - "loss": 8.0592, - "step": 482400 - }, - { - "epoch": 2.07, - "learning_rate": 0.008416406151754628, - "loss": 8.0622, - "step": 482600 - }, - { - "epoch": 2.07, - "learning_rate": 0.008413127854403585, - "loss": 8.0523, - "step": 482800 - }, - { - "epoch": 2.08, - "learning_rate": 0.008409846807173193, - "loss": 8.0749, - "step": 483000 - }, - { - "epoch": 2.08, - "learning_rate": 0.008406563012706933, - "loss": 8.0473, - "step": 483200 - }, - { - "epoch": 2.08, - "learning_rate": 0.008403276473650497, - "loss": 8.0529, - "step": 483400 - }, - { - "epoch": 2.08, - "learning_rate": 0.008399987192651788, - "loss": 8.0579, - "step": 483600 - }, - { - "epoch": 2.08, - "learning_rate": 0.008396695172360923, - "loss": 8.0548, - "step": 483800 - }, - { - "epoch": 2.08, - "learning_rate": 0.00839341689601788, - "loss": 8.0475, - "step": 484000 - }, - { - "epoch": 2.08, - "learning_rate": 0.00839011941876519, - "loss": 8.0671, - "step": 484200 - }, - { - "epoch": 2.08, - "learning_rate": 0.008386819210170635, - "loss": 8.054, - "step": 484400 - }, - { - "epoch": 2.08, - "learning_rate": 0.008383516272893127, - "loss": 8.0621, - "step": 484600 - }, - { - "epoch": 2.08, - "learning_rate": 0.008380210609593784, - "loss": 8.0582, - "step": 484800 - }, - { - "epoch": 2.08, - "learning_rate": 0.008376902222935922, - "loss": 8.0399, - "step": 485000 - }, - { - "epoch": 2.09, - "learning_rate": 0.008373591115585046, - "loss": 8.0449, - "step": 485200 - }, - { - "epoch": 2.09, - "learning_rate": 0.008370277290208858, - "loss": 8.0558, - "step": 485400 - }, - { - "epoch": 2.09, - "learning_rate": 0.008366960749477242, - "loss": 8.0748, - "step": 485600 - }, - { - "epoch": 2.09, - "learning_rate": 0.008363641496062281, - "loss": 8.0552, - "step": 485800 - }, - { - "epoch": 2.09, - "learning_rate": 0.008360336149192077, - "loss": 8.0466, - "step": 486000 - }, - { - "epoch": 2.09, - "learning_rate": 0.008357011491965391, - "loss": 8.0548, - "step": 486200 - }, - { - "epoch": 2.09, - "learning_rate": 0.008353684130071298, - "loss": 8.0689, - "step": 486400 - }, - { - "epoch": 2.09, - "learning_rate": 0.008350354066190589, - "loss": 8.0607, - "step": 486600 - }, - { - "epoch": 2.09, - "learning_rate": 0.008347021303006236, - "loss": 8.053, - "step": 486800 - }, - { - "epoch": 2.09, - "learning_rate": 0.008343685843203387, - "loss": 8.0633, - "step": 487000 - }, - { - "epoch": 2.09, - "learning_rate": 0.008340347689469363, - "loss": 8.0456, - "step": 487200 - }, - { - "epoch": 2.09, - "learning_rate": 0.008337006844493653, - "loss": 8.0444, - "step": 487400 - }, - { - "epoch": 2.1, - "learning_rate": 0.008333663310967916, - "loss": 8.0622, - "step": 487600 - }, - { - "epoch": 2.1, - "learning_rate": 0.008330317091585975, - "loss": 8.0546, - "step": 487800 - }, - { - "epoch": 2.1, - "learning_rate": 0.00832698494022643, - "loss": 8.062, - "step": 488000 - }, - { - "epoch": 2.1, - "learning_rate": 0.008323633370617808, - "loss": 8.0589, - "step": 488200 - }, - { - "epoch": 2.1, - "learning_rate": 0.008320279123233923, - "loss": 8.0628, - "step": 488400 - }, - { - "epoch": 2.1, - "learning_rate": 0.008316922200777233, - "loss": 8.0536, - "step": 488600 - }, - { - "epoch": 2.1, - "learning_rate": 0.00831356260595235, - "loss": 8.0577, - "step": 488800 - }, - { - "epoch": 2.1, - "learning_rate": 0.008310200341466041, - "loss": 8.0567, - "step": 489000 - }, - { - "epoch": 2.1, - "learning_rate": 0.00830683541002722, - "loss": 8.0471, - "step": 489200 - }, - { - "epoch": 2.1, - "learning_rate": 0.008303467814346951, - "loss": 8.0503, - "step": 489400 - }, - { - "epoch": 2.1, - "learning_rate": 0.008300097557138446, - "loss": 8.0463, - "step": 489600 - }, - { - "epoch": 2.1, - "learning_rate": 0.008296724641117059, - "loss": 8.0448, - "step": 489800 - }, - { - "epoch": 2.11, - "learning_rate": 0.00829336595346341, - "loss": 8.0557, - "step": 490000 - }, - { - "epoch": 2.11, - "learning_rate": 0.008289987741231004, - "loss": 8.0481, - "step": 490200 - }, - { - "epoch": 2.11, - "learning_rate": 0.00828660687833101, - "loss": 8.0683, - "step": 490400 - }, - { - "epoch": 2.11, - "learning_rate": 0.008283223367487335, - "loss": 8.06, - "step": 490600 - }, - { - "epoch": 2.11, - "learning_rate": 0.008279837211426005, - "loss": 8.0565, - "step": 490800 - }, - { - "epoch": 2.11, - "learning_rate": 0.00827644841287519, - "loss": 8.0492, - "step": 491000 - }, - { - "epoch": 2.11, - "learning_rate": 0.008273056974565183, - "loss": 8.0633, - "step": 491200 - }, - { - "epoch": 2.11, - "learning_rate": 0.008269662899228402, - "loss": 8.0614, - "step": 491400 - }, - { - "epoch": 2.11, - "learning_rate": 0.008266266189599395, - "loss": 8.0501, - "step": 491600 - }, - { - "epoch": 2.11, - "learning_rate": 0.008262866848414827, - "loss": 8.0607, - "step": 491800 - }, - { - "epoch": 2.11, - "learning_rate": 0.008259481894798144, - "loss": 8.049, - "step": 492000 - }, - { - "epoch": 2.12, - "learning_rate": 0.008256077311844499, - "loss": 8.0659, - "step": 492200 - }, - { - "epoch": 2.12, - "learning_rate": 0.008252670105544287, - "loss": 8.0552, - "step": 492400 - }, - { - "epoch": 2.12, - "learning_rate": 0.008249260278642636, - "loss": 8.0688, - "step": 492600 - }, - { - "epoch": 2.12, - "learning_rate": 0.008245847833886781, - "loss": 8.0582, - "step": 492800 - }, - { - "epoch": 2.12, - "learning_rate": 0.008242432774026066, - "loss": 8.0645, - "step": 493000 - }, - { - "epoch": 2.12, - "learning_rate": 0.008239015101811943, - "loss": 8.051, - "step": 493200 - }, - { - "epoch": 2.12, - "learning_rate": 0.008235594819997973, - "loss": 8.0558, - "step": 493400 - }, - { - "epoch": 2.12, - "learning_rate": 0.008232171931339809, - "loss": 8.0563, - "step": 493600 - }, - { - "epoch": 2.12, - "learning_rate": 0.008228746438595214, - "loss": 8.0536, - "step": 493800 - }, - { - "epoch": 2.12, - "learning_rate": 0.008225335491460635, - "loss": 8.0634, - "step": 494000 - }, - { - "epoch": 2.12, - "learning_rate": 0.008221904811810794, - "loss": 8.0534, - "step": 494200 - }, - { - "epoch": 2.12, - "learning_rate": 0.008218471536346557, - "loss": 8.0547, - "step": 494400 - }, - { - "epoch": 2.13, - "learning_rate": 0.00821503566783405, - "loss": 8.051, - "step": 494600 - }, - { - "epoch": 2.13, - "learning_rate": 0.008211597209041488, - "loss": 8.0676, - "step": 494800 - }, - { - "epoch": 2.13, - "learning_rate": 0.008208156162739177, - "loss": 8.0616, - "step": 495000 - }, - { - "epoch": 2.13, - "learning_rate": 0.008204712531699505, - "loss": 8.0586, - "step": 495200 - }, - { - "epoch": 2.13, - "learning_rate": 0.008201266318696944, - "loss": 8.0673, - "step": 495400 - }, - { - "epoch": 2.13, - "learning_rate": 0.008197817526508045, - "loss": 8.0551, - "step": 495600 - }, - { - "epoch": 2.13, - "learning_rate": 0.00819436615791144, - "loss": 8.0549, - "step": 495800 - }, - { - "epoch": 2.13, - "learning_rate": 0.008190929491796246, - "loss": 8.0587, - "step": 496000 - }, - { - "epoch": 2.13, - "learning_rate": 0.008187472991575708, - "loss": 8.0532, - "step": 496200 - }, - { - "epoch": 2.13, - "learning_rate": 0.008184013923281868, - "loss": 8.0526, - "step": 496400 - }, - { - "epoch": 2.13, - "learning_rate": 0.008180552289701633, - "loss": 8.0503, - "step": 496600 - }, - { - "epoch": 2.13, - "learning_rate": 0.008177088093623979, - "loss": 8.0536, - "step": 496800 - }, - { - "epoch": 2.14, - "learning_rate": 0.00817362133783995, - "loss": 8.0617, - "step": 497000 - }, - { - "epoch": 2.14, - "learning_rate": 0.008170152025142641, - "loss": 8.0507, - "step": 497200 - }, - { - "epoch": 2.14, - "learning_rate": 0.008166680158327224, - "loss": 8.0418, - "step": 497400 - }, - { - "epoch": 2.14, - "learning_rate": 0.00816320574019091, - "loss": 8.0658, - "step": 497600 - }, - { - "epoch": 2.14, - "learning_rate": 0.00815972877353298, - "loss": 8.0592, - "step": 497800 - }, - { - "epoch": 2.14, - "learning_rate": 0.008156266665044497, - "loss": 8.0616, - "step": 498000 - }, - { - "epoch": 2.14, - "learning_rate": 0.008152784622456982, - "loss": 8.0606, - "step": 498200 - }, - { - "epoch": 2.14, - "learning_rate": 0.008149300039743953, - "loss": 8.0599, - "step": 498400 - }, - { - "epoch": 2.14, - "learning_rate": 0.008145812919712881, - "loss": 8.0483, - "step": 498600 - }, - { - "epoch": 2.14, - "learning_rate": 0.008142323265173274, - "loss": 8.0556, - "step": 498800 - }, - { - "epoch": 2.14, - "learning_rate": 0.008138831078936681, - "loss": 8.0436, - "step": 499000 - }, - { - "epoch": 2.15, - "learning_rate": 0.008135336363816695, - "loss": 8.0577, - "step": 499200 - }, - { - "epoch": 2.15, - "learning_rate": 0.008131839122628949, - "loss": 8.0577, - "step": 499400 - }, - { - "epoch": 2.15, - "learning_rate": 0.0081283393581911, - "loss": 8.0686, - "step": 499600 - }, - { - "epoch": 2.15, - "learning_rate": 0.008124837073322849, - "loss": 8.0739, - "step": 499800 - }, - { - "epoch": 2.15, - "learning_rate": 0.00812134980111619, - "loss": 8.0646, - "step": 500000 - }, - { - "epoch": 2.15, - "learning_rate": 0.008117842496421241, - "loss": 8.0409, - "step": 500200 - }, - { - "epoch": 2.15, - "learning_rate": 0.008114332679753023, - "loss": 8.0567, - "step": 500400 - }, - { - "epoch": 2.15, - "learning_rate": 0.00811082035393933, - "loss": 8.0455, - "step": 500600 - }, - { - "epoch": 2.15, - "learning_rate": 0.00810730552180998, - "loss": 8.0603, - "step": 500800 - }, - { - "epoch": 2.15, - "learning_rate": 0.008103788186196809, - "loss": 8.0453, - "step": 501000 - }, - { - "epoch": 2.15, - "learning_rate": 0.008100268349933671, - "loss": 8.0641, - "step": 501200 - }, - { - "epoch": 2.15, - "learning_rate": 0.008096746015856434, - "loss": 8.0563, - "step": 501400 - }, - { - "epoch": 2.16, - "learning_rate": 0.00809322118680298, - "loss": 8.0563, - "step": 501600 - }, - { - "epoch": 2.16, - "learning_rate": 0.008089693865613201, - "loss": 8.0511, - "step": 501800 - }, - { - "epoch": 2.16, - "learning_rate": 0.008086181710368831, - "loss": 8.058, - "step": 502000 - }, - { - "epoch": 2.16, - "learning_rate": 0.008082649425859276, - "loss": 8.0534, - "step": 502200 - }, - { - "epoch": 2.16, - "learning_rate": 0.008079114657730873, - "loss": 8.0495, - "step": 502400 - }, - { - "epoch": 2.16, - "learning_rate": 0.008075577408831518, - "loss": 8.0515, - "step": 502600 - }, - { - "epoch": 2.16, - "learning_rate": 0.00807203768201111, - "loss": 8.0477, - "step": 502800 - }, - { - "epoch": 2.16, - "learning_rate": 0.008068495480121542, - "loss": 8.0624, - "step": 503000 - }, - { - "epoch": 2.16, - "learning_rate": 0.008064950806016703, - "loss": 8.0546, - "step": 503200 - }, - { - "epoch": 2.16, - "learning_rate": 0.008061403662552475, - "loss": 8.0696, - "step": 503400 - }, - { - "epoch": 2.16, - "learning_rate": 0.008057854052586723, - "loss": 8.0545, - "step": 503600 - }, - { - "epoch": 2.16, - "learning_rate": 0.008054301978979307, - "loss": 8.0468, - "step": 503800 - }, - { - "epoch": 2.17, - "learning_rate": 0.008050765223380456, - "loss": 8.0641, - "step": 504000 - }, - { - "epoch": 2.17, - "learning_rate": 0.008047208243359673, - "loss": 8.058, - "step": 504200 - }, - { - "epoch": 2.17, - "learning_rate": 0.008043648808274358, - "loss": 8.0517, - "step": 504400 - }, - { - "epoch": 2.17, - "learning_rate": 0.008040086920992286, - "loss": 8.0437, - "step": 504600 - }, - { - "epoch": 2.17, - "learning_rate": 0.008036522584383206, - "loss": 8.0534, - "step": 504800 - }, - { - "epoch": 2.17, - "learning_rate": 0.00803295580131884, - "loss": 8.0587, - "step": 505000 - }, - { - "epoch": 2.17, - "learning_rate": 0.008029386574672882, - "loss": 8.0654, - "step": 505200 - }, - { - "epoch": 2.17, - "learning_rate": 0.008025814907320992, - "loss": 8.0425, - "step": 505400 - }, - { - "epoch": 2.17, - "learning_rate": 0.008022240802140795, - "loss": 8.0616, - "step": 505600 - }, - { - "epoch": 2.17, - "learning_rate": 0.008018664262011886, - "loss": 8.0656, - "step": 505800 - }, - { - "epoch": 2.17, - "learning_rate": 0.008015103190721797, - "loss": 8.0421, - "step": 506000 - }, - { - "epoch": 2.18, - "learning_rate": 0.008011521801480829, - "loss": 8.0526, - "step": 506200 - }, - { - "epoch": 2.18, - "learning_rate": 0.00800793798592725, - "loss": 8.0468, - "step": 506400 - }, - { - "epoch": 2.18, - "learning_rate": 0.00800435174694848, - "loss": 8.0609, - "step": 506600 - }, - { - "epoch": 2.18, - "learning_rate": 0.00800076308743389, - "loss": 8.0673, - "step": 506800 - }, - { - "epoch": 2.18, - "learning_rate": 0.00799717201027479, - "loss": 8.048, - "step": 507000 - }, - { - "epoch": 2.18, - "learning_rate": 0.007993578518364452, - "loss": 8.0501, - "step": 507200 - }, - { - "epoch": 2.18, - "learning_rate": 0.007989982614598086, - "loss": 8.0524, - "step": 507400 - }, - { - "epoch": 2.18, - "learning_rate": 0.007986384301872845, - "loss": 8.0614, - "step": 507600 - }, - { - "epoch": 2.18, - "learning_rate": 0.00798278358308783, - "loss": 8.0584, - "step": 507800 - }, - { - "epoch": 2.18, - "learning_rate": 0.007979180461144068, - "loss": 8.0619, - "step": 508000 - }, - { - "epoch": 2.18, - "learning_rate": 0.007975592972521362, - "loss": 8.0469, - "step": 508200 - }, - { - "epoch": 2.18, - "learning_rate": 0.007971985064950483, - "loss": 8.0553, - "step": 508400 - }, - { - "epoch": 2.19, - "learning_rate": 0.007968374762921032, - "loss": 8.0502, - "step": 508600 - }, - { - "epoch": 2.19, - "learning_rate": 0.007964762069341763, - "loss": 8.0619, - "step": 508800 - }, - { - "epoch": 2.19, - "learning_rate": 0.00796114698712336, - "loss": 8.0313, - "step": 509000 - }, - { - "epoch": 2.19, - "learning_rate": 0.007957529519178426, - "loss": 8.0639, - "step": 509200 - }, - { - "epoch": 2.19, - "learning_rate": 0.007953909668421492, - "loss": 8.0625, - "step": 509400 - }, - { - "epoch": 2.19, - "learning_rate": 0.007950287437769005, - "loss": 8.057, - "step": 509600 - }, - { - "epoch": 2.19, - "learning_rate": 0.00794666283013933, - "loss": 8.0623, - "step": 509800 - }, - { - "epoch": 2.19, - "learning_rate": 0.007943035848452754, - "loss": 8.0629, - "step": 510000 - }, - { - "epoch": 2.19, - "learning_rate": 0.007939424648288932, - "loss": 8.0683, - "step": 510200 - }, - { - "epoch": 2.19, - "learning_rate": 0.007935792939090809, - "loss": 8.0642, - "step": 510400 - }, - { - "epoch": 2.19, - "learning_rate": 0.007932158864593458, - "loss": 8.0651, - "step": 510600 - }, - { - "epoch": 2.2, - "learning_rate": 0.007928522427724781, - "loss": 8.0599, - "step": 510800 - }, - { - "epoch": 2.2, - "learning_rate": 0.007924883631414595, - "loss": 8.0681, - "step": 511000 - }, - { - "epoch": 2.2, - "learning_rate": 0.007921242478594612, - "loss": 8.0563, - "step": 511200 - }, - { - "epoch": 2.2, - "learning_rate": 0.007917598972198441, - "loss": 8.0611, - "step": 511400 - }, - { - "epoch": 2.2, - "learning_rate": 0.00791395311516159, - "loss": 8.0411, - "step": 511600 - }, - { - "epoch": 2.2, - "learning_rate": 0.007910304910421459, - "loss": 8.0605, - "step": 511800 - }, - { - "epoch": 2.2, - "learning_rate": 0.00790665436091734, - "loss": 8.0456, - "step": 512000 - }, - { - "epoch": 2.2, - "learning_rate": 0.007903019739867472, - "loss": 8.0691, - "step": 512200 - }, - { - "epoch": 2.2, - "learning_rate": 0.00789936452134789, - "loss": 8.0604, - "step": 512400 - }, - { - "epoch": 2.2, - "learning_rate": 0.007895706966878793, - "loss": 8.0499, - "step": 512600 - }, - { - "epoch": 2.2, - "learning_rate": 0.007892047079407011, - "loss": 8.0632, - "step": 512800 - }, - { - "epoch": 2.2, - "learning_rate": 0.007888384861881249, - "loss": 8.0635, - "step": 513000 - }, - { - "epoch": 2.21, - "learning_rate": 0.007884720317252085, - "loss": 8.0445, - "step": 513200 - }, - { - "epoch": 2.21, - "learning_rate": 0.00788105344847198, - "loss": 8.046, - "step": 513400 - }, - { - "epoch": 2.21, - "learning_rate": 0.007877384258495266, - "loss": 8.0584, - "step": 513600 - }, - { - "epoch": 2.21, - "learning_rate": 0.007873712750278138, - "loss": 8.0635, - "step": 513800 - }, - { - "epoch": 2.21, - "learning_rate": 0.007870038926778667, - "loss": 8.0542, - "step": 514000 - }, - { - "epoch": 2.21, - "learning_rate": 0.007866381177382899, - "loss": 8.062, - "step": 514200 - }, - { - "epoch": 2.21, - "learning_rate": 0.007862702743739835, - "loss": 8.0479, - "step": 514400 - }, - { - "epoch": 2.21, - "learning_rate": 0.00785902200368499, - "loss": 8.051, - "step": 514600 - }, - { - "epoch": 2.21, - "learning_rate": 0.00785533896018387, - "loss": 8.047, - "step": 514800 - }, - { - "epoch": 2.21, - "learning_rate": 0.00785165361620384, - "loss": 8.0597, - "step": 515000 - }, - { - "epoch": 2.21, - "learning_rate": 0.00784796597471411, - "loss": 8.0495, - "step": 515200 - }, - { - "epoch": 2.21, - "learning_rate": 0.00784427603868575, - "loss": 8.0553, - "step": 515400 - }, - { - "epoch": 2.22, - "learning_rate": 0.007840583811091671, - "loss": 8.0636, - "step": 515600 - }, - { - "epoch": 2.22, - "learning_rate": 0.007836889294906637, - "loss": 8.053, - "step": 515800 - }, - { - "epoch": 2.22, - "learning_rate": 0.007833192493107252, - "loss": 8.0674, - "step": 516000 - }, - { - "epoch": 2.22, - "learning_rate": 0.007829511909767266, - "loss": 8.0599, - "step": 516200 - }, - { - "epoch": 2.22, - "learning_rate": 0.007825810557067221, - "loss": 8.0524, - "step": 516400 - }, - { - "epoch": 2.22, - "learning_rate": 0.00782210692767876, - "loss": 8.0722, - "step": 516600 - }, - { - "epoch": 2.22, - "learning_rate": 0.007818401024585838, - "loss": 8.0615, - "step": 516800 - }, - { - "epoch": 2.22, - "learning_rate": 0.007814692850774229, - "loss": 8.0587, - "step": 517000 - }, - { - "epoch": 2.22, - "learning_rate": 0.007810982409231544, - "loss": 8.0685, - "step": 517200 - }, - { - "epoch": 2.22, - "learning_rate": 0.007807269702947217, - "loss": 8.0593, - "step": 517400 - }, - { - "epoch": 2.22, - "learning_rate": 0.00780355473491251, - "loss": 8.0389, - "step": 517600 - }, - { - "epoch": 2.23, - "learning_rate": 0.007799837508120505, - "loss": 8.0505, - "step": 517800 - }, - { - "epoch": 2.23, - "learning_rate": 0.007796118025566104, - "loss": 8.059, - "step": 518000 - }, - { - "epoch": 2.23, - "learning_rate": 0.007792414904521422, - "loss": 8.0633, - "step": 518200 - }, - { - "epoch": 2.23, - "learning_rate": 0.00778869093067558, - "loss": 8.0584, - "step": 518400 - }, - { - "epoch": 2.23, - "learning_rate": 0.007784964710047938, - "loss": 8.0651, - "step": 518600 - }, - { - "epoch": 2.23, - "learning_rate": 0.0077812362456406445, - "loss": 8.0578, - "step": 518800 - }, - { - "epoch": 2.23, - "learning_rate": 0.0077775055404576575, - "loss": 8.046, - "step": 519000 - }, - { - "epoch": 2.23, - "learning_rate": 0.007773772597504737, - "loss": 8.035, - "step": 519200 - }, - { - "epoch": 2.23, - "learning_rate": 0.007770037419789448, - "loss": 8.0539, - "step": 519400 - }, - { - "epoch": 2.23, - "learning_rate": 0.007766300010321159, - "loss": 8.0674, - "step": 519600 - }, - { - "epoch": 2.23, - "learning_rate": 0.007762560372111028, - "loss": 8.0553, - "step": 519800 - }, - { - "epoch": 2.23, - "learning_rate": 0.007758818508172019, - "loss": 8.0423, - "step": 520000 - }, - { - "epoch": 2.24, - "learning_rate": 0.007755093147476158, - "loss": 8.0423, - "step": 520200 - }, - { - "epoch": 2.24, - "learning_rate": 0.007751346852216416, - "loss": 8.06, - "step": 520400 - }, - { - "epoch": 2.24, - "learning_rate": 0.007747598340262324, - "loss": 8.0464, - "step": 520600 - }, - { - "epoch": 2.24, - "learning_rate": 0.00774384761463399, - "loss": 8.0593, - "step": 520800 - }, - { - "epoch": 2.24, - "learning_rate": 0.007740094678353307, - "loss": 8.046, - "step": 521000 - }, - { - "epoch": 2.24, - "learning_rate": 0.0077363395344439475, - "loss": 8.0528, - "step": 521200 - }, - { - "epoch": 2.24, - "learning_rate": 0.0077325821859313615, - "loss": 8.0495, - "step": 521400 - }, - { - "epoch": 2.24, - "learning_rate": 0.007728822635842779, - "loss": 8.0485, - "step": 521600 - }, - { - "epoch": 2.24, - "learning_rate": 0.0077250608872072015, - "loss": 8.0465, - "step": 521800 - }, - { - "epoch": 2.24, - "learning_rate": 0.0077212969430554, - "loss": 8.0649, - "step": 522000 - }, - { - "epoch": 2.24, - "learning_rate": 0.00771753080641992, - "loss": 8.0618, - "step": 522200 - }, - { - "epoch": 2.24, - "learning_rate": 0.0077137813274067235, - "loss": 8.069, - "step": 522400 - }, - { - "epoch": 2.25, - "learning_rate": 0.0077100108258330805, - "loss": 8.0435, - "step": 522600 - }, - { - "epoch": 2.25, - "learning_rate": 0.007706238140868777, - "loss": 8.0551, - "step": 522800 - }, - { - "epoch": 2.25, - "learning_rate": 0.007702463275553399, - "loss": 8.0543, - "step": 523000 - }, - { - "epoch": 2.25, - "learning_rate": 0.007698686232928286, - "loss": 8.0538, - "step": 523200 - }, - { - "epoch": 2.25, - "learning_rate": 0.007694907016036531, - "loss": 8.047, - "step": 523400 - }, - { - "epoch": 2.25, - "learning_rate": 0.007691125627922984, - "loss": 8.0647, - "step": 523600 - }, - { - "epoch": 2.25, - "learning_rate": 0.007687342071634238, - "loss": 8.0594, - "step": 523800 - }, - { - "epoch": 2.25, - "learning_rate": 0.00768355635021864, - "loss": 8.0481, - "step": 524000 - }, - { - "epoch": 2.25, - "learning_rate": 0.007679768466726274, - "loss": 8.0533, - "step": 524200 - }, - { - "epoch": 2.25, - "learning_rate": 0.0076759973797870815, - "loss": 8.0615, - "step": 524400 - }, - { - "epoch": 2.25, - "learning_rate": 0.007672205192070668, - "loss": 8.046, - "step": 524600 - }, - { - "epoch": 2.26, - "learning_rate": 0.007668410851422909, - "loss": 8.0672, - "step": 524800 - }, - { - "epoch": 2.26, - "learning_rate": 0.007664614360900839, - "loss": 8.0428, - "step": 525000 - }, - { - "epoch": 2.26, - "learning_rate": 0.007660815723563221, - "loss": 8.0568, - "step": 525200 - }, - { - "epoch": 2.26, - "learning_rate": 0.007657014942470549, - "loss": 8.0541, - "step": 525400 - }, - { - "epoch": 2.26, - "learning_rate": 0.0076532120206850435, - "loss": 8.0504, - "step": 525600 - }, - { - "epoch": 2.26, - "learning_rate": 0.0076494069612706465, - "loss": 8.0532, - "step": 525800 - }, - { - "epoch": 2.26, - "learning_rate": 0.00764559976729303, - "loss": 8.0586, - "step": 526000 - }, - { - "epoch": 2.26, - "learning_rate": 0.00764179044181958, - "loss": 8.0578, - "step": 526200 - }, - { - "epoch": 2.26, - "learning_rate": 0.007637998050478286, - "loss": 8.0657, - "step": 526400 - }, - { - "epoch": 2.26, - "learning_rate": 0.007634184481841334, - "loss": 8.0462, - "step": 526600 - }, - { - "epoch": 2.26, - "learning_rate": 0.00763036879090564, - "loss": 8.0575, - "step": 526800 - }, - { - "epoch": 2.26, - "learning_rate": 0.007626550980745435, - "loss": 8.0649, - "step": 527000 - }, - { - "epoch": 2.27, - "learning_rate": 0.007622731054436658, - "loss": 8.0599, - "step": 527200 - }, - { - "epoch": 2.27, - "learning_rate": 0.0076189090150569575, - "loss": 8.0574, - "step": 527400 - }, - { - "epoch": 2.27, - "learning_rate": 0.0076150848656856805, - "loss": 8.0584, - "step": 527600 - }, - { - "epoch": 2.27, - "learning_rate": 0.007611258609403875, - "loss": 8.0519, - "step": 527800 - }, - { - "epoch": 2.27, - "learning_rate": 0.007607430249294286, - "loss": 8.0777, - "step": 528000 - }, - { - "epoch": 2.27, - "learning_rate": 0.007603599788441354, - "loss": 8.0565, - "step": 528200 - }, - { - "epoch": 2.27, - "learning_rate": 0.00759978639793658, - "loss": 8.0577, - "step": 528400 - }, - { - "epoch": 2.27, - "learning_rate": 0.007595951755322216, - "loss": 8.057, - "step": 528600 - }, - { - "epoch": 2.27, - "learning_rate": 0.0075921150212125265, - "loss": 8.0629, - "step": 528800 - }, - { - "epoch": 2.27, - "learning_rate": 0.007588276198698697, - "loss": 8.0691, - "step": 529000 - }, - { - "epoch": 2.27, - "learning_rate": 0.007584435290873596, - "loss": 8.0709, - "step": 529200 - }, - { - "epoch": 2.27, - "learning_rate": 0.007580592300831776, - "loss": 8.0609, - "step": 529400 - }, - { - "epoch": 2.28, - "learning_rate": 0.007576747231669466, - "loss": 8.0577, - "step": 529600 - }, - { - "epoch": 2.28, - "learning_rate": 0.007572900086484563, - "loss": 8.0542, - "step": 529800 - }, - { - "epoch": 2.28, - "learning_rate": 0.007569050868376649, - "loss": 8.0608, - "step": 530000 - }, - { - "epoch": 2.28, - "learning_rate": 0.007565199580446966, - "loss": 8.071, - "step": 530200 - }, - { - "epoch": 2.28, - "learning_rate": 0.007561365497707499, - "loss": 8.0686, - "step": 530400 - }, - { - "epoch": 2.28, - "learning_rate": 0.007557510089755028, - "loss": 8.0486, - "step": 530600 - }, - { - "epoch": 2.28, - "learning_rate": 0.0075536526212789884, - "loss": 8.0422, - "step": 530800 - }, - { - "epoch": 2.28, - "learning_rate": 0.007549793095387269, - "loss": 8.0577, - "step": 531000 - }, - { - "epoch": 2.28, - "learning_rate": 0.007545931515189419, - "loss": 8.0578, - "step": 531200 - }, - { - "epoch": 2.28, - "learning_rate": 0.0075420678837966465, - "loss": 8.0559, - "step": 531400 - }, - { - "epoch": 2.28, - "learning_rate": 0.007538202204321812, - "loss": 8.048, - "step": 531600 - }, - { - "epoch": 2.29, - "learning_rate": 0.007534334479879418, - "loss": 8.0706, - "step": 531800 - }, - { - "epoch": 2.29, - "learning_rate": 0.0075304647135856235, - "loss": 8.0598, - "step": 532000 - }, - { - "epoch": 2.29, - "learning_rate": 0.007526592908558228, - "loss": 8.0552, - "step": 532200 - }, - { - "epoch": 2.29, - "learning_rate": 0.007522738442178312, - "loss": 8.0592, - "step": 532400 - }, - { - "epoch": 2.29, - "learning_rate": 0.007518862579198385, - "loss": 8.0659, - "step": 532600 - }, - { - "epoch": 2.29, - "learning_rate": 0.007514984686832488, - "loss": 8.0362, - "step": 532800 - }, - { - "epoch": 2.29, - "learning_rate": 0.007511104768204968, - "loss": 8.0657, - "step": 533000 - }, - { - "epoch": 2.29, - "learning_rate": 0.007507222826441807, - "loss": 8.0447, - "step": 533200 - }, - { - "epoch": 2.29, - "learning_rate": 0.007503338864670612, - "loss": 8.063, - "step": 533400 - }, - { - "epoch": 2.29, - "learning_rate": 0.007499452886020626, - "loss": 8.0543, - "step": 533600 - }, - { - "epoch": 2.29, - "learning_rate": 0.007495564893622708, - "loss": 8.0492, - "step": 533800 - }, - { - "epoch": 2.29, - "learning_rate": 0.0074916748906093435, - "loss": 8.0412, - "step": 534000 - }, - { - "epoch": 2.3, - "learning_rate": 0.007487782880114638, - "loss": 8.0544, - "step": 534200 - }, - { - "epoch": 2.3, - "learning_rate": 0.007483908340329137, - "loss": 8.045, - "step": 534400 - }, - { - "epoch": 2.3, - "learning_rate": 0.007480012334278768, - "loss": 8.0598, - "step": 534600 - }, - { - "epoch": 2.3, - "learning_rate": 0.00747611433014337, - "loss": 8.0592, - "step": 534800 - }, - { - "epoch": 2.3, - "learning_rate": 0.007472214331063492, - "loss": 8.061, - "step": 535000 - }, - { - "epoch": 2.3, - "learning_rate": 0.007468312340181297, - "loss": 8.0581, - "step": 535200 - }, - { - "epoch": 2.3, - "learning_rate": 0.007464408360640544, - "loss": 8.0575, - "step": 535400 - }, - { - "epoch": 2.3, - "learning_rate": 0.0074605023955866025, - "loss": 8.0651, - "step": 535600 - }, - { - "epoch": 2.3, - "learning_rate": 0.007456594448166435, - "loss": 8.056, - "step": 535800 - }, - { - "epoch": 2.3, - "learning_rate": 0.007452684521528607, - "loss": 8.0514, - "step": 536000 - }, - { - "epoch": 2.3, - "learning_rate": 0.007448772618823274, - "loss": 8.058, - "step": 536200 - }, - { - "epoch": 2.31, - "learning_rate": 0.0074448783174827044, - "loss": 8.0568, - "step": 536400 - }, - { - "epoch": 2.31, - "learning_rate": 0.007440962481940165, - "loss": 8.0577, - "step": 536600 - }, - { - "epoch": 2.31, - "learning_rate": 0.007437044679774357, - "loss": 8.0365, - "step": 536800 - }, - { - "epoch": 2.31, - "learning_rate": 0.007433124914141782, - "loss": 8.0606, - "step": 537000 - }, - { - "epoch": 2.31, - "learning_rate": 0.007429203188200526, - "loss": 8.0724, - "step": 537200 - }, - { - "epoch": 2.31, - "learning_rate": 0.00742527950511025, - "loss": 8.0591, - "step": 537400 - }, - { - "epoch": 2.31, - "learning_rate": 0.0074213538680321985, - "loss": 8.054, - "step": 537600 - }, - { - "epoch": 2.31, - "learning_rate": 0.007417426280129182, - "loss": 8.0479, - "step": 537800 - }, - { - "epoch": 2.31, - "learning_rate": 0.00741349674456559, - "loss": 8.0535, - "step": 538000 - }, - { - "epoch": 2.31, - "learning_rate": 0.007409565264507377, - "loss": 8.0499, - "step": 538200 - }, - { - "epoch": 2.31, - "learning_rate": 0.007405651515052803, - "loss": 8.0604, - "step": 538400 - }, - { - "epoch": 2.31, - "learning_rate": 0.007401716165192385, - "loss": 8.053, - "step": 538600 - }, - { - "epoch": 2.32, - "learning_rate": 0.007397778880328747, - "loss": 8.0523, - "step": 538800 - }, - { - "epoch": 2.32, - "learning_rate": 0.007393839663634087, - "loss": 8.0412, - "step": 539000 - }, - { - "epoch": 2.32, - "learning_rate": 0.0073898985182821635, - "loss": 8.0521, - "step": 539200 - }, - { - "epoch": 2.32, - "learning_rate": 0.007385955447448282, - "loss": 8.0661, - "step": 539400 - }, - { - "epoch": 2.32, - "learning_rate": 0.007382010454309307, - "loss": 8.0465, - "step": 539600 - }, - { - "epoch": 2.32, - "learning_rate": 0.007378063542043648, - "loss": 8.0493, - "step": 539800 - }, - { - "epoch": 2.32, - "learning_rate": 0.007374114713831257, - "loss": 8.0476, - "step": 540000 - }, - { - "epoch": 2.32, - "learning_rate": 0.007370163972853637, - "loss": 8.0469, - "step": 540200 - }, - { - "epoch": 2.32, - "learning_rate": 0.007366231090291448, - "loss": 8.0494, - "step": 540400 - }, - { - "epoch": 2.32, - "learning_rate": 0.007362276542858093, - "loss": 8.0534, - "step": 540600 - }, - { - "epoch": 2.32, - "learning_rate": 0.0073583200921973095, - "loss": 8.0557, - "step": 540800 - }, - { - "epoch": 2.32, - "learning_rate": 0.007354361741496737, - "loss": 8.0604, - "step": 541000 - }, - { - "epoch": 2.33, - "learning_rate": 0.00735040149394555, - "loss": 8.0618, - "step": 541200 - }, - { - "epoch": 2.33, - "learning_rate": 0.007346439352734444, - "loss": 8.078, - "step": 541400 - }, - { - "epoch": 2.33, - "learning_rate": 0.007342475321055648, - "loss": 8.0603, - "step": 541600 - }, - { - "epoch": 2.33, - "learning_rate": 0.00733850940210291, - "loss": 8.063, - "step": 541800 - }, - { - "epoch": 2.33, - "learning_rate": 0.007334541599071498, - "loss": 8.054, - "step": 542000 - }, - { - "epoch": 2.33, - "learning_rate": 0.007330571915158202, - "loss": 8.0419, - "step": 542200 - }, - { - "epoch": 2.33, - "learning_rate": 0.00732662021603475, - "loss": 8.0467, - "step": 542400 - }, - { - "epoch": 2.33, - "learning_rate": 0.00732264678931856, - "loss": 8.0508, - "step": 542600 - }, - { - "epoch": 2.33, - "learning_rate": 0.007318671491303916, - "loss": 8.0526, - "step": 542800 - }, - { - "epoch": 2.33, - "learning_rate": 0.007314694325193645, - "loss": 8.0437, - "step": 543000 - }, - { - "epoch": 2.33, - "learning_rate": 0.007310715294192078, - "loss": 8.0563, - "step": 543200 - }, - { - "epoch": 2.34, - "learning_rate": 0.0073067344015050485, - "loss": 8.0506, - "step": 543400 - }, - { - "epoch": 2.34, - "learning_rate": 0.007302751650339891, - "loss": 8.0595, - "step": 543600 - }, - { - "epoch": 2.34, - "learning_rate": 0.007298767043905435, - "loss": 8.0619, - "step": 543800 - }, - { - "epoch": 2.34, - "learning_rate": 0.007294780585412007, - "loss": 8.0447, - "step": 544000 - }, - { - "epoch": 2.34, - "learning_rate": 0.007290792278071423, - "loss": 8.051, - "step": 544200 - }, - { - "epoch": 2.34, - "learning_rate": 0.007286822080447558, - "loss": 8.0459, - "step": 544400 - }, - { - "epoch": 2.34, - "learning_rate": 0.007282830094258168, - "loss": 8.0581, - "step": 544600 - }, - { - "epoch": 2.34, - "learning_rate": 0.0072788362688499175, - "loss": 8.048, - "step": 544800 - }, - { - "epoch": 2.34, - "learning_rate": 0.007274840607440558, - "loss": 8.0587, - "step": 545000 - }, - { - "epoch": 2.34, - "learning_rate": 0.007270843113249326, - "loss": 8.0521, - "step": 545200 - }, - { - "epoch": 2.34, - "learning_rate": 0.007266843789496924, - "loss": 8.0527, - "step": 545400 - }, - { - "epoch": 2.34, - "learning_rate": 0.007262842639405542, - "loss": 8.0465, - "step": 545600 - }, - { - "epoch": 2.35, - "learning_rate": 0.007258839666198829, - "loss": 8.0383, - "step": 545800 - }, - { - "epoch": 2.35, - "learning_rate": 0.007254834873101911, - "loss": 8.06, - "step": 546000 - }, - { - "epoch": 2.35, - "learning_rate": 0.0072508282633413764, - "loss": 8.0645, - "step": 546200 - }, - { - "epoch": 2.35, - "learning_rate": 0.007246819840145278, - "loss": 8.057, - "step": 546400 - }, - { - "epoch": 2.35, - "learning_rate": 0.007242829662407685, - "loss": 8.064, - "step": 546600 - }, - { - "epoch": 2.35, - "learning_rate": 0.007238837695703593, - "loss": 8.0656, - "step": 546800 - }, - { - "epoch": 2.35, - "learning_rate": 0.007234823869605143, - "loss": 8.0502, - "step": 547000 - }, - { - "epoch": 2.35, - "learning_rate": 0.007230808242965579, - "loss": 8.067, - "step": 547200 - }, - { - "epoch": 2.35, - "learning_rate": 0.0072267908190202235, - "loss": 8.0442, - "step": 547400 - }, - { - "epoch": 2.35, - "learning_rate": 0.007222771601005837, - "loss": 8.0562, - "step": 547600 - }, - { - "epoch": 2.35, - "learning_rate": 0.0072187505921606325, - "loss": 8.0575, - "step": 547800 - }, - { - "epoch": 2.35, - "learning_rate": 0.0072147277957242675, - "loss": 8.06, - "step": 548000 - }, - { - "epoch": 2.36, - "learning_rate": 0.007210703214937832, - "loss": 8.0484, - "step": 548200 - }, - { - "epoch": 2.36, - "learning_rate": 0.007206676853043861, - "loss": 8.0623, - "step": 548400 - }, - { - "epoch": 2.36, - "learning_rate": 0.00720264871328632, - "loss": 8.0545, - "step": 548600 - }, - { - "epoch": 2.36, - "learning_rate": 0.007198618798910609, - "loss": 8.0536, - "step": 548800 - }, - { - "epoch": 2.36, - "learning_rate": 0.007194587113163557, - "loss": 8.0564, - "step": 549000 - }, - { - "epoch": 2.36, - "learning_rate": 0.007190553659293422, - "loss": 8.0463, - "step": 549200 - }, - { - "epoch": 2.36, - "learning_rate": 0.0071865184405498815, - "loss": 8.0577, - "step": 549400 - }, - { - "epoch": 2.36, - "learning_rate": 0.007182481460184041, - "loss": 8.0473, - "step": 549600 - }, - { - "epoch": 2.36, - "learning_rate": 0.007178442721448424, - "loss": 8.0636, - "step": 549800 - }, - { - "epoch": 2.36, - "learning_rate": 0.007174402227596965, - "loss": 8.0567, - "step": 550000 - }, - { - "epoch": 2.36, - "learning_rate": 0.007170359981885019, - "loss": 8.0657, - "step": 550200 - }, - { - "epoch": 2.37, - "learning_rate": 0.007166315987569351, - "loss": 8.0443, - "step": 550400 - }, - { - "epoch": 2.37, - "learning_rate": 0.007162270247908135, - "loss": 8.0611, - "step": 550600 - }, - { - "epoch": 2.37, - "learning_rate": 0.00715824300789773, - "loss": 8.061, - "step": 550800 - }, - { - "epoch": 2.37, - "learning_rate": 0.007154193796011567, - "loss": 8.0521, - "step": 551000 - }, - { - "epoch": 2.37, - "learning_rate": 0.007150142848546487, - "loss": 8.057, - "step": 551200 - }, - { - "epoch": 2.37, - "learning_rate": 0.007146090168766263, - "loss": 8.0605, - "step": 551400 - }, - { - "epoch": 2.37, - "learning_rate": 0.007142035759936066, - "loss": 8.0472, - "step": 551600 - }, - { - "epoch": 2.37, - "learning_rate": 0.007137979625322462, - "loss": 8.0535, - "step": 551800 - }, - { - "epoch": 2.37, - "learning_rate": 0.0071339217681934035, - "loss": 8.0519, - "step": 552000 - }, - { - "epoch": 2.37, - "learning_rate": 0.007129862191818233, - "loss": 8.0626, - "step": 552200 - }, - { - "epoch": 2.37, - "learning_rate": 0.007125800899467677, - "loss": 8.0612, - "step": 552400 - }, - { - "epoch": 2.37, - "learning_rate": 0.007121737894413849, - "loss": 8.051, - "step": 552600 - }, - { - "epoch": 2.38, - "learning_rate": 0.007117693507749444, - "loss": 8.0404, - "step": 552800 - }, - { - "epoch": 2.38, - "learning_rate": 0.0071136270956335375, - "loss": 8.0493, - "step": 553000 - }, - { - "epoch": 2.38, - "learning_rate": 0.007109558980622571, - "loss": 8.0652, - "step": 553200 - }, - { - "epoch": 2.38, - "learning_rate": 0.007105489165994151, - "loss": 8.0499, - "step": 553400 - }, - { - "epoch": 2.38, - "learning_rate": 0.007101417655027253, - "loss": 8.0657, - "step": 553600 - }, - { - "epoch": 2.38, - "learning_rate": 0.007097344451002222, - "loss": 8.0415, - "step": 553800 - }, - { - "epoch": 2.38, - "learning_rate": 0.0070932695572007605, - "loss": 8.0431, - "step": 554000 - }, - { - "epoch": 2.38, - "learning_rate": 0.007089192976905942, - "loss": 8.0563, - "step": 554200 - }, - { - "epoch": 2.38, - "learning_rate": 0.007085114713402188, - "loss": 8.0635, - "step": 554400 - }, - { - "epoch": 2.38, - "learning_rate": 0.007081034769975286, - "loss": 8.0628, - "step": 554600 - }, - { - "epoch": 2.38, - "learning_rate": 0.00707697356217788, - "loss": 8.0692, - "step": 554800 - }, - { - "epoch": 2.38, - "learning_rate": 0.007072890277125996, - "loss": 8.0654, - "step": 555000 - }, - { - "epoch": 2.39, - "learning_rate": 0.00706880532199997, - "loss": 8.0595, - "step": 555200 - }, - { - "epoch": 2.39, - "learning_rate": 0.007064718700090976, - "loss": 8.0514, - "step": 555400 - }, - { - "epoch": 2.39, - "learning_rate": 0.007060630414691535, - "loss": 8.0607, - "step": 555600 - }, - { - "epoch": 2.39, - "learning_rate": 0.0070565404690954995, - "loss": 8.0671, - "step": 555800 - }, - { - "epoch": 2.39, - "learning_rate": 0.007052448866598068, - "loss": 8.0563, - "step": 556000 - }, - { - "epoch": 2.39, - "learning_rate": 0.00704835561049577, - "loss": 8.0695, - "step": 556200 - }, - { - "epoch": 2.39, - "learning_rate": 0.007044260704086468, - "loss": 8.0473, - "step": 556400 - }, - { - "epoch": 2.39, - "learning_rate": 0.007040164150669354, - "loss": 8.0468, - "step": 556600 - }, - { - "epoch": 2.39, - "learning_rate": 0.007036086448613831, - "loss": 8.0611, - "step": 556800 - }, - { - "epoch": 2.39, - "learning_rate": 0.007031986619277786, - "loss": 8.0643, - "step": 557000 - }, - { - "epoch": 2.39, - "learning_rate": 0.0070278851528229385, - "loss": 8.0717, - "step": 557200 - }, - { - "epoch": 2.4, - "learning_rate": 0.0070237820525537635, - "loss": 8.0483, - "step": 557400 - }, - { - "epoch": 2.4, - "learning_rate": 0.007019677321776058, - "loss": 8.0599, - "step": 557600 - }, - { - "epoch": 2.4, - "learning_rate": 0.007015570963796927, - "loss": 8.0636, - "step": 557800 - }, - { - "epoch": 2.4, - "learning_rate": 0.00701146298192479, - "loss": 8.0634, - "step": 558000 - }, - { - "epoch": 2.4, - "learning_rate": 0.007007353379469375, - "loss": 8.0698, - "step": 558200 - }, - { - "epoch": 2.4, - "learning_rate": 0.007003242159741711, - "loss": 8.0581, - "step": 558400 - }, - { - "epoch": 2.4, - "learning_rate": 0.0069991293260541374, - "loss": 8.0534, - "step": 558600 - }, - { - "epoch": 2.4, - "learning_rate": 0.006995035457942955, - "loss": 8.0641, - "step": 558800 - }, - { - "epoch": 2.4, - "learning_rate": 0.006990919414306169, - "loss": 8.0571, - "step": 559000 - }, - { - "epoch": 2.4, - "learning_rate": 0.0069868017666376864, - "loss": 8.0577, - "step": 559200 - }, - { - "epoch": 2.4, - "learning_rate": 0.006982703118473221, - "loss": 8.0753, - "step": 559400 - }, - { - "epoch": 2.4, - "learning_rate": 0.006978582280673894, - "loss": 8.0559, - "step": 559600 - }, - { - "epoch": 2.41, - "learning_rate": 0.006974459848782675, - "loss": 8.0529, - "step": 559800 - }, - { - "epoch": 2.41, - "learning_rate": 0.006970335826120932, - "loss": 8.046, - "step": 560000 - }, - { - "epoch": 2.41, - "learning_rate": 0.006966210216011318, - "loss": 8.0551, - "step": 560200 - }, - { - "epoch": 2.41, - "learning_rate": 0.0069620830217777575, - "loss": 8.0548, - "step": 560400 - }, - { - "epoch": 2.41, - "learning_rate": 0.006957954246745461, - "loss": 8.0485, - "step": 560600 - }, - { - "epoch": 2.41, - "learning_rate": 0.006953823894240906, - "loss": 8.0603, - "step": 560800 - }, - { - "epoch": 2.41, - "learning_rate": 0.0069496919675918435, - "loss": 8.051, - "step": 561000 - }, - { - "epoch": 2.41, - "learning_rate": 0.006945558470127292, - "loss": 8.0569, - "step": 561200 - }, - { - "epoch": 2.41, - "learning_rate": 0.006941423405177537, - "loss": 8.0389, - "step": 561400 - }, - { - "epoch": 2.41, - "learning_rate": 0.0069372867760741225, - "loss": 8.0413, - "step": 561600 - }, - { - "epoch": 2.41, - "learning_rate": 0.006933148586149858, - "loss": 8.0455, - "step": 561800 - }, - { - "epoch": 2.42, - "learning_rate": 0.006929008838738809, - "loss": 8.0532, - "step": 562000 - }, - { - "epoch": 2.42, - "learning_rate": 0.006924867537176294, - "loss": 8.0524, - "step": 562200 - }, - { - "epoch": 2.42, - "learning_rate": 0.006920724684798886, - "loss": 8.0644, - "step": 562400 - }, - { - "epoch": 2.42, - "learning_rate": 0.006916580284944404, - "loss": 8.0491, - "step": 562600 - }, - { - "epoch": 2.42, - "learning_rate": 0.006912434340951918, - "loss": 8.0658, - "step": 562800 - }, - { - "epoch": 2.42, - "learning_rate": 0.006908286856161741, - "loss": 8.0541, - "step": 563000 - }, - { - "epoch": 2.42, - "learning_rate": 0.006904137833915425, - "loss": 8.0429, - "step": 563200 - }, - { - "epoch": 2.42, - "learning_rate": 0.006900008034148137, - "loss": 8.047, - "step": 563400 - }, - { - "epoch": 2.42, - "learning_rate": 0.006895855954664682, - "loss": 8.0523, - "step": 563600 - }, - { - "epoch": 2.42, - "learning_rate": 0.006891702347740443, - "loss": 8.0611, - "step": 563800 - }, - { - "epoch": 2.42, - "learning_rate": 0.0068875472167219025, - "loss": 8.0624, - "step": 564000 - }, - { - "epoch": 2.42, - "learning_rate": 0.006883390564956777, - "loss": 8.0618, - "step": 564200 - }, - { - "epoch": 2.43, - "learning_rate": 0.006879232395794005, - "loss": 8.0637, - "step": 564400 - }, - { - "epoch": 2.43, - "learning_rate": 0.006875072712583748, - "loss": 8.0495, - "step": 564600 - }, - { - "epoch": 2.43, - "learning_rate": 0.00687091151867739, - "loss": 8.0603, - "step": 564800 - }, - { - "epoch": 2.43, - "learning_rate": 0.006866748817427526, - "loss": 8.0579, - "step": 565000 - }, - { - "epoch": 2.43, - "learning_rate": 0.006862584612187971, - "loss": 8.0629, - "step": 565200 - }, - { - "epoch": 2.43, - "learning_rate": 0.006858439738570398, - "loss": 8.0601, - "step": 565400 - }, - { - "epoch": 2.43, - "learning_rate": 0.006854293382593129, - "loss": 8.0478, - "step": 565600 - }, - { - "epoch": 2.43, - "learning_rate": 0.00685012470044207, - "loss": 8.06, - "step": 565800 - }, - { - "epoch": 2.43, - "learning_rate": 0.006845954527695071, - "loss": 8.0508, - "step": 566000 - }, - { - "epoch": 2.43, - "learning_rate": 0.006841782867711967, - "loss": 8.0748, - "step": 566200 - }, - { - "epoch": 2.43, - "learning_rate": 0.006837609723853784, - "loss": 8.0635, - "step": 566400 - }, - { - "epoch": 2.43, - "learning_rate": 0.0068334350994827524, - "loss": 8.0627, - "step": 566600 - }, - { - "epoch": 2.44, - "learning_rate": 0.0068292589979622904, - "loss": 8.0511, - "step": 566800 - }, - { - "epoch": 2.44, - "learning_rate": 0.006825081422657008, - "loss": 8.0495, - "step": 567000 - }, - { - "epoch": 2.44, - "learning_rate": 0.0068209023769327005, - "loss": 8.0555, - "step": 567200 - }, - { - "epoch": 2.44, - "learning_rate": 0.006816721864156354, - "loss": 8.0548, - "step": 567400 - }, - { - "epoch": 2.44, - "learning_rate": 0.006812539887696127, - "loss": 8.0487, - "step": 567600 - }, - { - "epoch": 2.44, - "learning_rate": 0.006808356450921365, - "loss": 8.0457, - "step": 567800 - }, - { - "epoch": 2.44, - "learning_rate": 0.0068041715572025865, - "loss": 8.0417, - "step": 568000 - }, - { - "epoch": 2.44, - "learning_rate": 0.006799985209911487, - "loss": 8.0564, - "step": 568200 - }, - { - "epoch": 2.44, - "learning_rate": 0.0067957974124209265, - "loss": 8.0481, - "step": 568400 - }, - { - "epoch": 2.44, - "learning_rate": 0.0067916081681049425, - "loss": 8.0318, - "step": 568600 - }, - { - "epoch": 2.44, - "learning_rate": 0.00678741748033873, - "loss": 8.0717, - "step": 568800 - }, - { - "epoch": 2.45, - "learning_rate": 0.006783225352498653, - "loss": 8.0506, - "step": 569000 - }, - { - "epoch": 2.45, - "learning_rate": 0.0067790317879622315, - "loss": 8.0453, - "step": 569200 - }, - { - "epoch": 2.45, - "learning_rate": 0.006774836790108145, - "loss": 8.0478, - "step": 569400 - }, - { - "epoch": 2.45, - "learning_rate": 0.006770661348006565, - "loss": 8.0587, - "step": 569600 - }, - { - "epoch": 2.45, - "learning_rate": 0.006766463500782177, - "loss": 8.0524, - "step": 569800 - }, - { - "epoch": 2.45, - "learning_rate": 0.006762285230252838, - "loss": 8.0655, - "step": 570000 } ], - "max_steps": 1000000, - "num_train_epochs": 5, - "total_flos": 9.084816952573256e+17, + "max_steps": 500000, + "num_train_epochs": 3, + "total_flos": 1.593829982208e+16, "trial_name": null, "trial_params": null }