diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,13417 @@ +{ + "best_metric": 0.3554441818087852, + "best_model_checkpoint": "esm2_t12_35M_lora_binding_sites_2023-09-24_04-19-04/checkpoint-445320", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 445320, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 0.00037015670298819826, + "loss": 0.5221, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 0.0003701563931502581, + "loss": 0.3727, + "step": 400 + }, + { + "epoch": 0.0, + "learning_rate": 0.00037015587606324446, + "loss": 0.3152, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037015515172773665, + "loss": 0.2911, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003701542201445457, + "loss": 0.2675, + "step": 1000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003701530813147147, + "loss": 0.2512, + "step": 1200 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037015173523951894, + "loss": 0.2466, + "step": 1400 + }, + { + "epoch": 0.01, + "learning_rate": 0.00037015018192046586, + "loss": 0.2271, + "step": 1600 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003701484213592947, + "loss": 0.2265, + "step": 1800 + }, + { + "epoch": 0.01, + "learning_rate": 0.000370146453557977, + "loss": 0.2299, + "step": 2000 + }, + { + "epoch": 0.01, + "learning_rate": 0.0003701442899094131, + "loss": 0.22, + "step": 2200 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003701419086708161, + "loss": 0.2157, + "step": 2400 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037013932019936536, + "loss": 0.215, + "step": 2600 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003701365244979595, + "loss": 0.2036, + "step": 2800 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037013352156972917, + "loss": 0.1992, + "step": 3000 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037013031141803696, + "loss": 0.2077, + "step": 3200 + }, + { + "epoch": 0.02, + "learning_rate": 0.00037012689404647756, + "loss": 0.1882, + "step": 3400 + }, + { + "epoch": 0.02, + "learning_rate": 0.0003701232694588778, + "loss": 0.1935, + "step": 3600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00037011943765929636, + "loss": 0.1966, + "step": 3800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003701154193624823, + "loss": 0.1852, + "step": 4000 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003701111741880463, + "loss": 0.1871, + "step": 4200 + }, + { + "epoch": 0.03, + "learning_rate": 0.00037010672181517294, + "loss": 0.1653, + "step": 4400 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003701020622488479, + "loss": 0.1865, + "step": 4600 + }, + { + "epoch": 0.03, + "learning_rate": 0.00037009719549428895, + "loss": 0.1746, + "step": 4800 + }, + { + "epoch": 0.03, + "learning_rate": 0.0003700921215569458, + "loss": 0.1787, + "step": 5000 + }, + { + "epoch": 0.04, + "learning_rate": 0.00037008684044250026, + "loss": 0.1873, + "step": 5200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00037008135215686603, + "loss": 0.1742, + "step": 5400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003700756567061889, + "loss": 0.1777, + "step": 5600 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003700697540968466, + "loss": 0.1792, + "step": 5800 + }, + { + "epoch": 0.04, + "learning_rate": 0.00037006364433544883, + "loss": 0.1794, + "step": 6000 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003700573274288372, + "loss": 0.1708, + "step": 6200 + }, + { + "epoch": 0.04, + "learning_rate": 0.00037005080338408546, + "loss": 0.161, + "step": 6400 + }, + { + "epoch": 0.04, + "learning_rate": 0.0003700440722084991, + "loss": 0.1661, + "step": 6600 + }, + { + "epoch": 0.05, + "learning_rate": 0.00037003720431783943, + "loss": 0.1833, + "step": 6800 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003700300609745444, + "loss": 0.1802, + "step": 7000 + }, + { + "epoch": 0.05, + "learning_rate": 0.00037002271052364195, + "loss": 0.1557, + "step": 7200 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003700151529733631, + "loss": 0.1763, + "step": 7400 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003700073883321708, + "loss": 0.1661, + "step": 7600 + }, + { + "epoch": 0.05, + "learning_rate": 0.0003699994166087597, + "loss": 0.152, + "step": 7800 + }, + { + "epoch": 0.05, + "learning_rate": 0.00036999123781205663, + "loss": 0.1616, + "step": 8000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036998285195121995, + "loss": 0.1552, + "step": 8200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036997425903564034, + "loss": 0.1595, + "step": 8400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036996545907493994, + "loss": 0.1567, + "step": 8600 + }, + { + "epoch": 0.06, + "learning_rate": 0.0003699564520789729, + "loss": 0.1572, + "step": 8800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036994723805782516, + "loss": 0.1628, + "step": 9000 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036993781702181466, + "loss": 0.1562, + "step": 9200 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036992818898149083, + "loss": 0.1583, + "step": 9400 + }, + { + "epoch": 0.06, + "learning_rate": 0.00036991835394763526, + "loss": 0.1514, + "step": 9600 + }, + { + "epoch": 0.07, + "learning_rate": 0.000369908311931261, + "loss": 0.1651, + "step": 9800 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003698980629436131, + "loss": 0.1488, + "step": 10000 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003698876069961683, + "loss": 0.1471, + "step": 10200 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003698769441006351, + "loss": 0.1516, + "step": 10400 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003698660742689538, + "loss": 0.1411, + "step": 10600 + }, + { + "epoch": 0.07, + "learning_rate": 0.0003698549975132963, + "loss": 0.1573, + "step": 10800 + }, + { + "epoch": 0.07, + "learning_rate": 0.00036984377077907405, + "loss": 0.1472, + "step": 11000 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003698322812473697, + "loss": 0.1445, + "step": 11200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003698205848295303, + "loss": 0.1544, + "step": 11400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003698086815386536, + "loss": 0.1545, + "step": 11600 + }, + { + "epoch": 0.08, + "learning_rate": 0.00036979657138806857, + "loss": 0.1405, + "step": 11800 + }, + { + "epoch": 0.08, + "learning_rate": 0.00036978425439133624, + "loss": 0.1558, + "step": 12000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00036977173056224906, + "loss": 0.154, + "step": 12200 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003697589999148311, + "loss": 0.1572, + "step": 12400 + }, + { + "epoch": 0.08, + "learning_rate": 0.0003697460624633381, + "loss": 0.1462, + "step": 12600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003697329182222573, + "loss": 0.1413, + "step": 12800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003697195672063075, + "loss": 0.1513, + "step": 13000 + }, + { + "epoch": 0.09, + "learning_rate": 0.00036970600943043915, + "loss": 0.1496, + "step": 13200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00036969231424668924, + "loss": 0.1537, + "step": 13400 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003696783440303689, + "loss": 0.147, + "step": 13600 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003696641671002913, + "loss": 0.153, + "step": 13800 + }, + { + "epoch": 0.09, + "learning_rate": 0.0003696497834723318, + "loss": 0.1424, + "step": 14000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003696351931625971, + "loss": 0.1413, + "step": 14200 + }, + { + "epoch": 0.1, + "learning_rate": 0.00036962039618742523, + "loss": 0.1382, + "step": 14400 + }, + { + "epoch": 0.1, + "learning_rate": 0.00036960539256338583, + "loss": 0.1432, + "step": 14600 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003695901823072799, + "loss": 0.1377, + "step": 14800 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003695747654361396, + "loss": 0.1535, + "step": 15000 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003695591419672288, + "loss": 0.1338, + "step": 15200 + }, + { + "epoch": 0.1, + "learning_rate": 0.0003695433915821276, + "loss": 0.1478, + "step": 15400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003695273560031605, + "loss": 0.1427, + "step": 15600 + }, + { + "epoch": 0.11, + "learning_rate": 0.00036951111387951155, + "loss": 0.149, + "step": 15800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00036949466522936866, + "loss": 0.1435, + "step": 16000 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003694780100711509, + "loss": 0.1281, + "step": 16200 + }, + { + "epoch": 0.11, + "learning_rate": 0.00036946123324535805, + "loss": 0.156, + "step": 16400 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003694441661594781, + "loss": 0.1463, + "step": 16600 + }, + { + "epoch": 0.11, + "learning_rate": 0.0003694268926220718, + "loss": 0.1403, + "step": 16800 + }, + { + "epoch": 0.11, + "learning_rate": 0.00036940941265248186, + "loss": 0.1346, + "step": 17000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003693917262702823, + "loss": 0.1271, + "step": 17200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003693738334952782, + "loss": 0.1397, + "step": 17400 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003693557343475058, + "loss": 0.1281, + "step": 17600 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003693374288472324, + "loss": 0.1448, + "step": 17800 + }, + { + "epoch": 0.12, + "learning_rate": 0.00036931891701495626, + "loss": 0.1354, + "step": 18000 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003693001988714069, + "loss": 0.1408, + "step": 18200 + }, + { + "epoch": 0.12, + "learning_rate": 0.0003692812744375448, + "loss": 0.131, + "step": 18400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003692621437345614, + "loss": 0.1347, + "step": 18600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00036924280678387906, + "loss": 0.1622, + "step": 18800 + }, + { + "epoch": 0.13, + "learning_rate": 0.00036922326360715115, + "loss": 0.1395, + "step": 19000 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003692035142262621, + "loss": 0.1348, + "step": 19200 + }, + { + "epoch": 0.13, + "learning_rate": 0.00036918355866332706, + "loss": 0.1491, + "step": 19400 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003691633969406922, + "loss": 0.1351, + "step": 19600 + }, + { + "epoch": 0.13, + "learning_rate": 0.00036914302908093444, + "loss": 0.1387, + "step": 19800 + }, + { + "epoch": 0.13, + "learning_rate": 0.0003691224551068615, + "loss": 0.1466, + "step": 20000 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003691016750415121, + "loss": 0.1238, + "step": 20200 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036908068890815544, + "loss": 0.1465, + "step": 20400 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036905949673029186, + "loss": 0.1405, + "step": 20600 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003690382060350824, + "loss": 0.1367, + "step": 20800 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003690166028695521, + "loss": 0.138, + "step": 21000 + }, + { + "epoch": 0.14, + "learning_rate": 0.00036899479373127795, + "loss": 0.1339, + "step": 21200 + }, + { + "epoch": 0.14, + "learning_rate": 0.0003689727786446815, + "loss": 0.1398, + "step": 21400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003689505576344153, + "loss": 0.1278, + "step": 21600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003689281307253622, + "loss": 0.1472, + "step": 21800 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036890549794263563, + "loss": 0.1371, + "step": 22000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00036888265931157973, + "loss": 0.1401, + "step": 22200 + }, + { + "epoch": 0.15, + "learning_rate": 0.000368859614857769, + "loss": 0.1349, + "step": 22400 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003688363646070085, + "loss": 0.1389, + "step": 22600 + }, + { + "epoch": 0.15, + "learning_rate": 0.0003688130263772538, + "loss": 0.1367, + "step": 22800 + }, + { + "epoch": 0.15, + "learning_rate": 0.000368789365639588, + "loss": 0.138, + "step": 23000 + }, + { + "epoch": 0.16, + "learning_rate": 0.000368765499183637, + "loss": 0.1284, + "step": 23200 + }, + { + "epoch": 0.16, + "learning_rate": 0.00036874142703612636, + "loss": 0.1224, + "step": 23400 + }, + { + "epoch": 0.16, + "learning_rate": 0.00036871714922401174, + "loss": 0.1329, + "step": 23600 + }, + { + "epoch": 0.16, + "learning_rate": 0.00036869266577447944, + "loss": 0.1408, + "step": 23800 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003686679767149457, + "loss": 0.1425, + "step": 24000 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003686430820730573, + "loss": 0.1288, + "step": 24200 + }, + { + "epoch": 0.16, + "learning_rate": 0.0003686179818766911, + "loss": 0.1299, + "step": 24400 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036859267615395395, + "loss": 0.1377, + "step": 24600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036856729300041623, + "loss": 0.1319, + "step": 24800 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036854157733745503, + "loss": 0.1284, + "step": 25000 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036851565623368017, + "loss": 0.1223, + "step": 25200 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003684895297181181, + "loss": 0.146, + "step": 25400 + }, + { + "epoch": 0.17, + "learning_rate": 0.0003684631978200251, + "loss": 0.1486, + "step": 25600 + }, + { + "epoch": 0.17, + "learning_rate": 0.00036843666056888733, + "loss": 0.1227, + "step": 25800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003684099179944211, + "loss": 0.1254, + "step": 26000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003683829701265725, + "loss": 0.1289, + "step": 26200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003683558169955177, + "loss": 0.129, + "step": 26400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00036832845863166245, + "loss": 0.1296, + "step": 26600 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003683008950656426, + "loss": 0.1338, + "step": 26800 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003682731263283235, + "loss": 0.1383, + "step": 27000 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003682452928304226, + "loss": 0.1334, + "step": 27200 + }, + { + "epoch": 0.18, + "learning_rate": 0.0003682171148694867, + "loss": 0.1387, + "step": 27400 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003681887318310681, + "loss": 0.1242, + "step": 27600 + }, + { + "epoch": 0.19, + "learning_rate": 0.00036816014374694997, + "loss": 0.1278, + "step": 27800 + }, + { + "epoch": 0.19, + "learning_rate": 0.000368131350649145, + "loss": 0.133, + "step": 28000 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003681023525698955, + "loss": 0.1261, + "step": 28200 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003680731495416734, + "loss": 0.1358, + "step": 28400 + }, + { + "epoch": 0.19, + "learning_rate": 0.00036804374159717996, + "loss": 0.1362, + "step": 28600 + }, + { + "epoch": 0.19, + "learning_rate": 0.0003680141287693461, + "loss": 0.1291, + "step": 28800 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036798431109133197, + "loss": 0.1252, + "step": 29000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003679542885965272, + "loss": 0.1286, + "step": 29200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003679240613185508, + "loss": 0.1281, + "step": 29400 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003678937819606453, + "loss": 0.1355, + "step": 29600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00036786314624159095, + "loss": 0.1357, + "step": 29800 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003678323058414256, + "loss": 0.1293, + "step": 30000 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003678012607946841, + "loss": 0.132, + "step": 30200 + }, + { + "epoch": 0.2, + "learning_rate": 0.0003677700111361304, + "loss": 0.1237, + "step": 30400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003677385569007577, + "loss": 0.1306, + "step": 30600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003677068981237883, + "loss": 0.1231, + "step": 30800 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036767519466573924, + "loss": 0.1405, + "step": 31000 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003676431279344224, + "loss": 0.127, + "step": 31200 + }, + { + "epoch": 0.21, + "learning_rate": 0.00036761085676836956, + "loss": 0.1336, + "step": 31400 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003675783812037177, + "loss": 0.1346, + "step": 31600 + }, + { + "epoch": 0.21, + "learning_rate": 0.0003675457012768328, + "loss": 0.117, + "step": 31800 + }, + { + "epoch": 0.22, + "learning_rate": 0.00036751281702430955, + "loss": 0.1277, + "step": 32000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003674797284829716, + "loss": 0.1181, + "step": 32200 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003674464356898712, + "loss": 0.124, + "step": 32400 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003674129386822893, + "loss": 0.137, + "step": 32600 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003673792374977358, + "loss": 0.1154, + "step": 32800 + }, + { + "epoch": 0.22, + "learning_rate": 0.000367345332173949, + "loss": 0.1548, + "step": 33000 + }, + { + "epoch": 0.22, + "learning_rate": 0.0003673112227488958, + "loss": 0.1304, + "step": 33200 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003672769092607718, + "loss": 0.113, + "step": 33400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003672423917480009, + "loss": 0.1383, + "step": 33600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003672076702492356, + "loss": 0.1286, + "step": 33800 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003671727448033568, + "loss": 0.1233, + "step": 34000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003671376154494739, + "loss": 0.1311, + "step": 34200 + }, + { + "epoch": 0.23, + "learning_rate": 0.00036710228222692435, + "loss": 0.1342, + "step": 34400 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003670669233674915, + "loss": 0.1317, + "step": 34600 + }, + { + "epoch": 0.23, + "learning_rate": 0.0003670311835453819, + "loss": 0.1343, + "step": 34800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036699523997378743, + "loss": 0.1177, + "step": 35000 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036695909269295746, + "loss": 0.1269, + "step": 35200 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003669227417433694, + "loss": 0.1374, + "step": 35400 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003668861871657289, + "loss": 0.1216, + "step": 35600 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036684942900096955, + "loss": 0.1234, + "step": 35800 + }, + { + "epoch": 0.24, + "learning_rate": 0.00036681246729025277, + "loss": 0.1312, + "step": 36000 + }, + { + "epoch": 0.24, + "learning_rate": 0.0003667753020749681, + "loss": 0.1219, + "step": 36200 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003667379333967329, + "loss": 0.1231, + "step": 36400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00036670036129739236, + "loss": 0.1236, + "step": 36600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003666625858190194, + "loss": 0.1299, + "step": 36800 + }, + { + "epoch": 0.25, + "learning_rate": 0.00036662460700391487, + "loss": 0.1284, + "step": 37000 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003665864248946071, + "loss": 0.1276, + "step": 37200 + }, + { + "epoch": 0.25, + "learning_rate": 0.00036654803953385206, + "loss": 0.1227, + "step": 37400 + }, + { + "epoch": 0.25, + "learning_rate": 0.00036650945096463363, + "loss": 0.1275, + "step": 37600 + }, + { + "epoch": 0.25, + "learning_rate": 0.0003664706592301629, + "loss": 0.1338, + "step": 37800 + }, + { + "epoch": 0.26, + "learning_rate": 0.00036643166437387857, + "loss": 0.1248, + "step": 38000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00036639246643944696, + "loss": 0.1234, + "step": 38200 + }, + { + "epoch": 0.26, + "learning_rate": 0.00036635306547076144, + "loss": 0.1274, + "step": 38400 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003663134615119431, + "loss": 0.1211, + "step": 38600 + }, + { + "epoch": 0.26, + "learning_rate": 0.00036627365460734014, + "loss": 0.1162, + "step": 38800 + }, + { + "epoch": 0.26, + "learning_rate": 0.0003662336448015281, + "loss": 0.1093, + "step": 39000 + }, + { + "epoch": 0.26, + "learning_rate": 0.00036619363370715154, + "loss": 0.1281, + "step": 39200 + }, + { + "epoch": 0.27, + "learning_rate": 0.000366153219247501, + "loss": 0.1143, + "step": 39400 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036611260202150397, + "loss": 0.1301, + "step": 39600 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003660717820746435, + "loss": 0.1312, + "step": 39800 + }, + { + "epoch": 0.27, + "learning_rate": 0.0003660307594526293, + "loss": 0.1249, + "step": 40000 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036598953420139834, + "loss": 0.13, + "step": 40200 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036594810636711434, + "loss": 0.1215, + "step": 40400 + }, + { + "epoch": 0.27, + "learning_rate": 0.000365906475996168, + "loss": 0.1347, + "step": 40600 + }, + { + "epoch": 0.27, + "learning_rate": 0.00036586464313517655, + "loss": 0.1219, + "step": 40800 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036582260783098424, + "loss": 0.1228, + "step": 41000 + }, + { + "epoch": 0.28, + "learning_rate": 0.000365780370130662, + "loss": 0.1263, + "step": 41200 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003657379300815072, + "loss": 0.1278, + "step": 41400 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036569528773104393, + "loss": 0.1191, + "step": 41600 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003656526578530694, + "loss": 0.1304, + "step": 41800 + }, + { + "epoch": 0.28, + "learning_rate": 0.00036560961205437574, + "loss": 0.1327, + "step": 42000 + }, + { + "epoch": 0.28, + "learning_rate": 0.0003655663640980633, + "loss": 0.1281, + "step": 42200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003655229140325608, + "loss": 0.1266, + "step": 42400 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003654792619065234, + "loss": 0.1142, + "step": 42600 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003654354077688324, + "loss": 0.1087, + "step": 42800 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003653913516685954, + "loss": 0.1197, + "step": 43000 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036534709365514605, + "loss": 0.1249, + "step": 43200 + }, + { + "epoch": 0.29, + "learning_rate": 0.0003653026337780443, + "loss": 0.1217, + "step": 43400 + }, + { + "epoch": 0.29, + "learning_rate": 0.00036525797208707584, + "loss": 0.1167, + "step": 43600 + }, + { + "epoch": 0.3, + "learning_rate": 0.0003652133334513314, + "loss": 0.1265, + "step": 43800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036516826929133405, + "loss": 0.1287, + "step": 44000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036512300346793045, + "loss": 0.1254, + "step": 44200 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036507753603180906, + "loss": 0.1234, + "step": 44400 + }, + { + "epoch": 0.3, + "learning_rate": 0.000365031867033884, + "loss": 0.1256, + "step": 44600 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036498599652529513, + "loss": 0.1295, + "step": 44800 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036493992455740784, + "loss": 0.129, + "step": 45000 + }, + { + "epoch": 0.3, + "learning_rate": 0.00036489365118181326, + "loss": 0.1252, + "step": 45200 + }, + { + "epoch": 0.31, + "learning_rate": 0.000364847176450328, + "loss": 0.1276, + "step": 45400 + }, + { + "epoch": 0.31, + "learning_rate": 0.00036480050041499414, + "loss": 0.1102, + "step": 45600 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003647536231280792, + "loss": 0.125, + "step": 45800 + }, + { + "epoch": 0.31, + "learning_rate": 0.00036470678053490165, + "loss": 0.1421, + "step": 46000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00036465950190812875, + "loss": 0.1281, + "step": 46200 + }, + { + "epoch": 0.31, + "learning_rate": 0.000364612022187664, + "loss": 0.1147, + "step": 46400 + }, + { + "epoch": 0.31, + "learning_rate": 0.0003645643414266747, + "loss": 0.1263, + "step": 46600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003645164596785535, + "loss": 0.1255, + "step": 46800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00036446837699691823, + "loss": 0.1227, + "step": 47000 + }, + { + "epoch": 0.32, + "learning_rate": 0.00036442009343561136, + "loss": 0.1171, + "step": 47200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00036437160904870047, + "loss": 0.1179, + "step": 47400 + }, + { + "epoch": 0.32, + "learning_rate": 0.00036432292389047827, + "loss": 0.1258, + "step": 47600 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003642740380154618, + "loss": 0.1297, + "step": 47800 + }, + { + "epoch": 0.32, + "learning_rate": 0.00036422495147839327, + "loss": 0.1272, + "step": 48000 + }, + { + "epoch": 0.32, + "learning_rate": 0.0003641756643342394, + "loss": 0.1152, + "step": 48200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003641264245754532, + "loss": 0.1162, + "step": 48400 + }, + { + "epoch": 0.33, + "learning_rate": 0.00036407673738527173, + "loss": 0.1195, + "step": 48600 + }, + { + "epoch": 0.33, + "learning_rate": 0.00036402684975397404, + "loss": 0.1252, + "step": 48800 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003639767617374241, + "loss": 0.1175, + "step": 49000 + }, + { + "epoch": 0.33, + "learning_rate": 0.00036392647339170993, + "loss": 0.1249, + "step": 49200 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003638759847731442, + "loss": 0.1199, + "step": 49400 + }, + { + "epoch": 0.33, + "learning_rate": 0.0003638252959382637, + "loss": 0.122, + "step": 49600 + }, + { + "epoch": 0.34, + "learning_rate": 0.00036377440694382947, + "loss": 0.1109, + "step": 49800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003637233178468266, + "loss": 0.1171, + "step": 50000 + }, + { + "epoch": 0.34, + "learning_rate": 0.00036367202870446437, + "loss": 0.1213, + "step": 50200 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003636207975172021, + "loss": 0.1156, + "step": 50400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003635691094561523, + "loss": 0.1257, + "step": 50600 + }, + { + "epoch": 0.34, + "learning_rate": 0.00036351722152242464, + "loss": 0.1179, + "step": 50800 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003634651337741228, + "loss": 0.1287, + "step": 51000 + }, + { + "epoch": 0.34, + "learning_rate": 0.0003634128462695743, + "loss": 0.1182, + "step": 51200 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003633606219999926, + "loss": 0.1371, + "step": 51400 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036330793615687614, + "loss": 0.1055, + "step": 51600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036325505073354187, + "loss": 0.1216, + "step": 51800 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036320196578921055, + "loss": 0.1207, + "step": 52000 + }, + { + "epoch": 0.35, + "learning_rate": 0.0003631486813833263, + "loss": 0.131, + "step": 52200 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036309519757555674, + "loss": 0.12, + "step": 52400 + }, + { + "epoch": 0.35, + "learning_rate": 0.00036304151442579253, + "loss": 0.1199, + "step": 52600 + }, + { + "epoch": 0.36, + "learning_rate": 0.0003629876319941478, + "loss": 0.116, + "step": 52800 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036293355034095967, + "loss": 0.1134, + "step": 53000 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036287926952678845, + "loss": 0.1133, + "step": 53200 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036282478961241737, + "loss": 0.1254, + "step": 53400 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036277011065885274, + "loss": 0.1207, + "step": 53600 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036271523272732363, + "loss": 0.1222, + "step": 53800 + }, + { + "epoch": 0.36, + "learning_rate": 0.00036266015587928203, + "loss": 0.1161, + "step": 54000 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003626048801764025, + "loss": 0.1216, + "step": 54200 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036254940568058254, + "loss": 0.1253, + "step": 54400 + }, + { + "epoch": 0.37, + "learning_rate": 0.000362493732453942, + "loss": 0.1154, + "step": 54600 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036243786055882343, + "loss": 0.1163, + "step": 54800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036238179005779176, + "loss": 0.1154, + "step": 55000 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036232552101363446, + "loss": 0.1115, + "step": 55200 + }, + { + "epoch": 0.37, + "learning_rate": 0.0003622693363205972, + "loss": 0.1203, + "step": 55400 + }, + { + "epoch": 0.37, + "learning_rate": 0.00036221267137136654, + "loss": 0.1241, + "step": 55600 + }, + { + "epoch": 0.38, + "learning_rate": 0.00036215580806838815, + "loss": 0.1113, + "step": 55800 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003620987464753372, + "loss": 0.1275, + "step": 56000 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003620417734481885, + "loss": 0.1238, + "step": 56200 + }, + { + "epoch": 0.38, + "learning_rate": 0.0003619843164575562, + "loss": 0.1158, + "step": 56400 + }, + { + "epoch": 0.38, + "learning_rate": 0.00036192666136888646, + "loss": 0.113, + "step": 56600 + }, + { + "epoch": 0.38, + "learning_rate": 0.000361868808246741, + "loss": 0.114, + "step": 56800 + }, + { + "epoch": 0.38, + "learning_rate": 0.00036181075715590336, + "loss": 0.1304, + "step": 57000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00036175250816137877, + "loss": 0.1105, + "step": 57200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003616940613283941, + "loss": 0.1187, + "step": 57400 + }, + { + "epoch": 0.39, + "learning_rate": 0.00036163541672239765, + "loss": 0.1166, + "step": 57600 + }, + { + "epoch": 0.39, + "learning_rate": 0.00036157657440905933, + "loss": 0.1209, + "step": 57800 + }, + { + "epoch": 0.39, + "learning_rate": 0.00036151753445427023, + "loss": 0.1363, + "step": 58000 + }, + { + "epoch": 0.39, + "learning_rate": 0.00036145829692414304, + "loss": 0.1096, + "step": 58200 + }, + { + "epoch": 0.39, + "learning_rate": 0.0003613988618850114, + "loss": 0.1188, + "step": 58400 + }, + { + "epoch": 0.39, + "learning_rate": 0.00036133922940343035, + "loss": 0.1083, + "step": 58600 + }, + { + "epoch": 0.4, + "learning_rate": 0.0003612793995461759, + "loss": 0.1197, + "step": 58800 + }, + { + "epoch": 0.4, + "learning_rate": 0.00036121937238024515, + "loss": 0.1279, + "step": 59000 + }, + { + "epoch": 0.4, + "learning_rate": 0.00036115914797285614, + "loss": 0.1189, + "step": 59200 + }, + { + "epoch": 0.4, + "learning_rate": 0.00036109872639144766, + "loss": 0.1356, + "step": 59400 + }, + { + "epoch": 0.4, + "learning_rate": 0.00036103810770367947, + "loss": 0.1222, + "step": 59600 + }, + { + "epoch": 0.4, + "learning_rate": 0.00036097729197743193, + "loss": 0.1201, + "step": 59800 + }, + { + "epoch": 0.4, + "learning_rate": 0.000360916279280806, + "loss": 0.1156, + "step": 60000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003608550696821234, + "loss": 0.1168, + "step": 60200 + }, + { + "epoch": 0.41, + "learning_rate": 0.00036079366324992627, + "loss": 0.1174, + "step": 60400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00036073236855829993, + "loss": 0.1202, + "step": 60600 + }, + { + "epoch": 0.41, + "learning_rate": 0.00036067056964888823, + "loss": 0.1073, + "step": 60800 + }, + { + "epoch": 0.41, + "learning_rate": 0.00036060857411256375, + "loss": 0.1288, + "step": 61000 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003605463820187487, + "loss": 0.1174, + "step": 61200 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003604839934370854, + "loss": 0.1136, + "step": 61400 + }, + { + "epoch": 0.41, + "learning_rate": 0.0003604214084374361, + "loss": 0.112, + "step": 61600 + }, + { + "epoch": 0.42, + "learning_rate": 0.00036035894148492016, + "loss": 0.1091, + "step": 61800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003602959648409783, + "loss": 0.1329, + "step": 62000 + }, + { + "epoch": 0.42, + "learning_rate": 0.00036023279198960366, + "loss": 0.1262, + "step": 62200 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003601694230015367, + "loss": 0.1231, + "step": 62400 + }, + { + "epoch": 0.42, + "learning_rate": 0.00036010585794773764, + "loss": 0.1107, + "step": 62600 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003600420968993861, + "loss": 0.1299, + "step": 62800 + }, + { + "epoch": 0.42, + "learning_rate": 0.0003599781399278814, + "loss": 0.1161, + "step": 63000 + }, + { + "epoch": 0.43, + "learning_rate": 0.00035991398710484205, + "loss": 0.115, + "step": 63200 + }, + { + "epoch": 0.43, + "learning_rate": 0.00035984963850210587, + "loss": 0.1223, + "step": 63400 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003597850941917301, + "loss": 0.1154, + "step": 63600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00035972035424599094, + "loss": 0.1167, + "step": 63800 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003596554187373838, + "loss": 0.1129, + "step": 64000 + }, + { + "epoch": 0.43, + "learning_rate": 0.000359590287738623, + "loss": 0.1255, + "step": 64200 + }, + { + "epoch": 0.43, + "learning_rate": 0.0003595249613226417, + "loss": 0.1212, + "step": 64400 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003594594395625922, + "loss": 0.1202, + "step": 64600 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003593937225318451, + "loss": 0.1145, + "step": 64800 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035932781030399, + "loss": 0.1076, + "step": 65000 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003592617029528349, + "loss": 0.1167, + "step": 65200 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035919540055240643, + "loss": 0.1182, + "step": 65400 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035912890317694947, + "loss": 0.1249, + "step": 65600 + }, + { + "epoch": 0.44, + "learning_rate": 0.00035906221090092747, + "loss": 0.1267, + "step": 65800 + }, + { + "epoch": 0.44, + "learning_rate": 0.0003589953237990218, + "loss": 0.1111, + "step": 66000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003589282419461323, + "loss": 0.1155, + "step": 66200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003588613022841523, + "loss": 0.1156, + "step": 66400 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003587938321276814, + "loss": 0.1381, + "step": 66600 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003587261674458557, + "loss": 0.1141, + "step": 66800 + }, + { + "epoch": 0.45, + "learning_rate": 0.00035865830831444557, + "loss": 0.1039, + "step": 67000 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003585902548094393, + "loss": 0.1329, + "step": 67200 + }, + { + "epoch": 0.45, + "learning_rate": 0.0003585220070070426, + "loss": 0.1255, + "step": 67400 + }, + { + "epoch": 0.46, + "learning_rate": 0.00035845356498367904, + "loss": 0.12, + "step": 67600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00035838492881598946, + "loss": 0.1142, + "step": 67800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00035831609858083215, + "loss": 0.1199, + "step": 68000 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003582470743552828, + "loss": 0.1206, + "step": 68200 + }, + { + "epoch": 0.46, + "learning_rate": 0.00035817820278955816, + "loss": 0.1298, + "step": 68400 + }, + { + "epoch": 0.46, + "learning_rate": 0.0003581087917843051, + "loss": 0.1344, + "step": 68600 + }, + { + "epoch": 0.46, + "learning_rate": 0.00035803918702080075, + "loss": 0.127, + "step": 68800 + }, + { + "epoch": 0.46, + "learning_rate": 0.00035796938857698805, + "loss": 0.1171, + "step": 69000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00035789939653102667, + "loss": 0.1142, + "step": 69200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003578292109612934, + "loss": 0.119, + "step": 69400 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003577588319463815, + "loss": 0.1232, + "step": 69600 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003576882595651009, + "loss": 0.1206, + "step": 69800 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003576174938964781, + "loss": 0.1093, + "step": 70000 + }, + { + "epoch": 0.47, + "learning_rate": 0.00035754653501975596, + "loss": 0.1352, + "step": 70200 + }, + { + "epoch": 0.47, + "learning_rate": 0.0003574753830143938, + "loss": 0.1207, + "step": 70400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00035740403796006713, + "loss": 0.1077, + "step": 70600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00035733249993666767, + "loss": 0.1159, + "step": 70800 + }, + { + "epoch": 0.48, + "learning_rate": 0.00035726112815854356, + "loss": 0.1196, + "step": 71000 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003571892054013809, + "loss": 0.1162, + "step": 71200 + }, + { + "epoch": 0.48, + "learning_rate": 0.0003571170899157135, + "loss": 0.1439, + "step": 71400 + }, + { + "epoch": 0.48, + "learning_rate": 0.00035704478178229574, + "loss": 0.1113, + "step": 71600 + }, + { + "epoch": 0.48, + "learning_rate": 0.00035697228108209786, + "loss": 0.1195, + "step": 71800 + }, + { + "epoch": 0.49, + "learning_rate": 0.00035689958789630546, + "loss": 0.1121, + "step": 72000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003568267023063201, + "loss": 0.1024, + "step": 72200 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003567536243937585, + "loss": 0.1103, + "step": 72400 + }, + { + "epoch": 0.49, + "learning_rate": 0.00035668035424045273, + "loss": 0.1306, + "step": 72600 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003566068919284503, + "loss": 0.1145, + "step": 72800 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003565332375400137, + "loss": 0.1157, + "step": 73000 + }, + { + "epoch": 0.49, + "learning_rate": 0.0003564593911576208, + "loss": 0.119, + "step": 73200 + }, + { + "epoch": 0.49, + "learning_rate": 0.00035638535286396404, + "loss": 0.1117, + "step": 73400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003563114943695969, + "loss": 0.117, + "step": 73600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00035623707346086925, + "loss": 0.108, + "step": 73800 + }, + { + "epoch": 0.5, + "learning_rate": 0.00035616246088982775, + "loss": 0.106, + "step": 74000 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003560876567400229, + "loss": 0.1316, + "step": 74200 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003560126610952201, + "loss": 0.1121, + "step": 74400 + }, + { + "epoch": 0.5, + "learning_rate": 0.0003559374740393988, + "loss": 0.124, + "step": 74600 + }, + { + "epoch": 0.5, + "learning_rate": 0.000355862095656753, + "loss": 0.1163, + "step": 74800 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003557865260316909, + "loss": 0.1191, + "step": 75000 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003557107652488348, + "loss": 0.1189, + "step": 75200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00035563481339302114, + "loss": 0.1118, + "step": 75400 + }, + { + "epoch": 0.51, + "learning_rate": 0.00035555867054930034, + "loss": 0.1242, + "step": 75600 + }, + { + "epoch": 0.51, + "learning_rate": 0.0003554823368029366, + "loss": 0.1064, + "step": 75800 + }, + { + "epoch": 0.51, + "learning_rate": 0.00035540581223940794, + "loss": 0.1078, + "step": 76000 + }, + { + "epoch": 0.51, + "learning_rate": 0.000355329096944406, + "loss": 0.1145, + "step": 76200 + }, + { + "epoch": 0.51, + "learning_rate": 0.00035525219100383614, + "loss": 0.122, + "step": 76400 + }, + { + "epoch": 0.52, + "learning_rate": 0.00035517548046019095, + "loss": 0.1132, + "step": 76600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003550981944392052, + "loss": 0.1183, + "step": 76800 + }, + { + "epoch": 0.52, + "learning_rate": 0.00035502071803121456, + "loss": 0.1086, + "step": 77000 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003549430513229765, + "loss": 0.11, + "step": 77200 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003548651944014617, + "loss": 0.1144, + "step": 77400 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003547871473538539, + "loss": 0.1114, + "step": 77600 + }, + { + "epoch": 0.52, + "learning_rate": 0.0003547089102675495, + "loss": 0.1131, + "step": 77800 + }, + { + "epoch": 0.53, + "learning_rate": 0.000354630483230158, + "loss": 0.1178, + "step": 78000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003545518663295014, + "loss": 0.1184, + "step": 78200 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003544730596536143, + "loss": 0.1131, + "step": 78400 + }, + { + "epoch": 0.53, + "learning_rate": 0.000354394063290744, + "loss": 0.1103, + "step": 78600 + }, + { + "epoch": 0.53, + "learning_rate": 0.00035431487732935006, + "loss": 0.1099, + "step": 78800 + }, + { + "epoch": 0.53, + "learning_rate": 0.00035423589920671934, + "loss": 0.124, + "step": 79000 + }, + { + "epoch": 0.53, + "learning_rate": 0.0003541563352613894, + "loss": 0.1216, + "step": 79200 + }, + { + "epoch": 0.53, + "learning_rate": 0.00035407658198374195, + "loss": 0.1153, + "step": 79400 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035399663946308424, + "loss": 0.107, + "step": 79600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035391650778893543, + "loss": 0.1105, + "step": 79800 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035383618705102626, + "loss": 0.1361, + "step": 80000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035375567733929953, + "loss": 0.1221, + "step": 80200 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003536749787439093, + "loss": 0.1085, + "step": 80400 + }, + { + "epoch": 0.54, + "learning_rate": 0.0003535940913552215, + "loss": 0.1115, + "step": 80600 + }, + { + "epoch": 0.54, + "learning_rate": 0.00035351301526381307, + "loss": 0.1107, + "step": 80800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035343175056047263, + "loss": 0.1211, + "step": 81000 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003533502973361997, + "loss": 0.117, + "step": 81200 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035326865568220515, + "loss": 0.1154, + "step": 81400 + }, + { + "epoch": 0.55, + "learning_rate": 0.0003531868256899108, + "loss": 0.1079, + "step": 81600 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035310480745094907, + "loss": 0.1115, + "step": 81800 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035302260105716366, + "loss": 0.1247, + "step": 82000 + }, + { + "epoch": 0.55, + "learning_rate": 0.00035294061904054505, + "loss": 0.119, + "step": 82200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035285803755310787, + "loss": 0.1219, + "step": 82400 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035277526818717824, + "loss": 0.1113, + "step": 82600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035269231103544056, + "loss": 0.1112, + "step": 82800 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035260958238174535, + "loss": 0.1099, + "step": 83000 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035252625087505373, + "loss": 0.1519, + "step": 83200 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035244273186140206, + "loss": 0.1178, + "step": 83400 + }, + { + "epoch": 0.56, + "learning_rate": 0.0003523590254343144, + "loss": 0.1095, + "step": 83600 + }, + { + "epoch": 0.56, + "learning_rate": 0.00035227513168752453, + "loss": 0.1161, + "step": 83800 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003521910507149763, + "loss": 0.1091, + "step": 84000 + }, + { + "epoch": 0.57, + "learning_rate": 0.00035210678261082275, + "loss": 0.1176, + "step": 84200 + }, + { + "epoch": 0.57, + "learning_rate": 0.00035202232746942697, + "loss": 0.1197, + "step": 84400 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003519376853853612, + "loss": 0.1163, + "step": 84600 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003518528564534069, + "loss": 0.1152, + "step": 84800 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003517678407685551, + "loss": 0.1086, + "step": 85000 + }, + { + "epoch": 0.57, + "learning_rate": 0.0003516826384260057, + "loss": 0.1023, + "step": 85200 + }, + { + "epoch": 0.58, + "learning_rate": 0.00035159724952116777, + "loss": 0.1111, + "step": 85400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003515116741496592, + "loss": 0.1195, + "step": 85600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003514259124073067, + "loss": 0.1031, + "step": 85800 + }, + { + "epoch": 0.58, + "learning_rate": 0.00035133996439014564, + "loss": 0.1232, + "step": 86000 + }, + { + "epoch": 0.58, + "learning_rate": 0.00035125383019442015, + "loss": 0.1214, + "step": 86200 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003511675099165826, + "loss": 0.1129, + "step": 86400 + }, + { + "epoch": 0.58, + "learning_rate": 0.0003510810036532938, + "loss": 0.1312, + "step": 86600 + }, + { + "epoch": 0.58, + "learning_rate": 0.00035099431150142305, + "loss": 0.1087, + "step": 86800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00035090743355804756, + "loss": 0.1117, + "step": 87000 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003508203699204526, + "loss": 0.1073, + "step": 87200 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003507331206861315, + "loss": 0.1101, + "step": 87400 + }, + { + "epoch": 0.59, + "learning_rate": 0.000350646123587719, + "loss": 0.1253, + "step": 87600 + }, + { + "epoch": 0.59, + "learning_rate": 0.0003505585043800183, + "loss": 0.1035, + "step": 87800 + }, + { + "epoch": 0.59, + "learning_rate": 0.00035047069986882655, + "loss": 0.1128, + "step": 88000 + }, + { + "epoch": 0.59, + "learning_rate": 0.00035038271015246694, + "loss": 0.1128, + "step": 88200 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003502945353294695, + "loss": 0.1097, + "step": 88400 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035020617549857193, + "loss": 0.1153, + "step": 88600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035011763075871896, + "loss": 0.1261, + "step": 88800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00035002890120906235, + "loss": 0.1111, + "step": 89000 + }, + { + "epoch": 0.6, + "learning_rate": 0.00034993998694896083, + "loss": 0.121, + "step": 89200 + }, + { + "epoch": 0.6, + "learning_rate": 0.00034985088807798, + "loss": 0.1327, + "step": 89400 + }, + { + "epoch": 0.6, + "learning_rate": 0.0003497616046958922, + "loss": 0.1166, + "step": 89600 + }, + { + "epoch": 0.6, + "learning_rate": 0.00034967213690267624, + "loss": 0.1173, + "step": 89800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003495824847985177, + "loss": 0.1138, + "step": 90000 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003494926484838084, + "loss": 0.1134, + "step": 90200 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034940262805914626, + "loss": 0.1032, + "step": 90400 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003493124236253357, + "loss": 0.1209, + "step": 90600 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003492224876824008, + "loss": 0.1307, + "step": 90800 + }, + { + "epoch": 0.61, + "learning_rate": 0.0003491319164523126, + "loss": 0.1144, + "step": 91000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00034904116151621703, + "loss": 0.1174, + "step": 91200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034895067812499057, + "loss": 0.1099, + "step": 91400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034885955699922493, + "loss": 0.1104, + "step": 91600 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034876825247243814, + "loss": 0.1115, + "step": 91800 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003486767646468723, + "loss": 0.1149, + "step": 92000 + }, + { + "epoch": 0.62, + "learning_rate": 0.0003485850936249749, + "loss": 0.1115, + "step": 92200 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034849323950939855, + "loss": 0.124, + "step": 92400 + }, + { + "epoch": 0.62, + "learning_rate": 0.00034840120240300075, + "loss": 0.1309, + "step": 92600 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003483089824088442, + "loss": 0.1079, + "step": 92800 + }, + { + "epoch": 0.63, + "learning_rate": 0.000348216579630196, + "loss": 0.1093, + "step": 93000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00034812399417052833, + "loss": 0.1159, + "step": 93200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00034803122613351773, + "loss": 0.1172, + "step": 93400 + }, + { + "epoch": 0.63, + "learning_rate": 0.00034793827562304517, + "loss": 0.1127, + "step": 93600 + }, + { + "epoch": 0.63, + "learning_rate": 0.000347845142743196, + "loss": 0.1152, + "step": 93800 + }, + { + "epoch": 0.63, + "learning_rate": 0.0003477518275982598, + "loss": 0.1123, + "step": 94000 + }, + { + "epoch": 0.63, + "learning_rate": 0.00034765833029273027, + "loss": 0.1321, + "step": 94200 + }, + { + "epoch": 0.64, + "learning_rate": 0.00034756465093130503, + "loss": 0.1116, + "step": 94400 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003474707896188856, + "loss": 0.1063, + "step": 94600 + }, + { + "epoch": 0.64, + "learning_rate": 0.00034737721712853624, + "loss": 0.1339, + "step": 94800 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003472829931380883, + "loss": 0.1165, + "step": 95000 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003471885875120447, + "loss": 0.1187, + "step": 95200 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003470940003561202, + "loss": 0.1201, + "step": 95400 + }, + { + "epoch": 0.64, + "learning_rate": 0.0003469992317762327, + "loss": 0.1115, + "step": 95600 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003469042818785036, + "loss": 0.1031, + "step": 95800 + }, + { + "epoch": 0.65, + "learning_rate": 0.000346809150769257, + "loss": 0.1212, + "step": 96000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034671383855502014, + "loss": 0.1148, + "step": 96200 + }, + { + "epoch": 0.65, + "learning_rate": 0.000346618345342523, + "loss": 0.1273, + "step": 96400 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003465226712386982, + "loss": 0.1204, + "step": 96600 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034642681635068095, + "loss": 0.1101, + "step": 96800 + }, + { + "epoch": 0.65, + "learning_rate": 0.0003463307807858089, + "loss": 0.1171, + "step": 97000 + }, + { + "epoch": 0.65, + "learning_rate": 0.00034623456465162207, + "loss": 0.1197, + "step": 97200 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034613816805586253, + "loss": 0.1153, + "step": 97400 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003460415911064746, + "loss": 0.1155, + "step": 97600 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034594483391160453, + "loss": 0.1134, + "step": 97800 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003458478965796003, + "loss": 0.1058, + "step": 98000 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034575077921901166, + "loss": 0.1172, + "step": 98200 + }, + { + "epoch": 0.66, + "learning_rate": 0.0003456534819385901, + "loss": 0.1182, + "step": 98400 + }, + { + "epoch": 0.66, + "learning_rate": 0.00034555600484728827, + "loss": 0.1244, + "step": 98600 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003454583480542605, + "loss": 0.1117, + "step": 98800 + }, + { + "epoch": 0.67, + "learning_rate": 0.000345360511668862, + "loss": 0.1088, + "step": 99000 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003452624958006496, + "loss": 0.1136, + "step": 99200 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003451643005593805, + "loss": 0.1148, + "step": 99400 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003450659260550133, + "loss": 0.105, + "step": 99600 + }, + { + "epoch": 0.67, + "learning_rate": 0.00034496786561145394, + "loss": 0.1195, + "step": 99800 + }, + { + "epoch": 0.67, + "learning_rate": 0.0003448691338065063, + "loss": 0.1182, + "step": 100000 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003447702230689861, + "loss": 0.1163, + "step": 100200 + }, + { + "epoch": 0.68, + "learning_rate": 0.000344671133509653, + "loss": 0.1245, + "step": 100400 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003445718652394668, + "loss": 0.1046, + "step": 100600 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003444724183695874, + "loss": 0.1182, + "step": 100800 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003443727930113748, + "loss": 0.1207, + "step": 101000 + }, + { + "epoch": 0.68, + "learning_rate": 0.00034427298927638874, + "loss": 0.1079, + "step": 101200 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003441730072763888, + "loss": 0.104, + "step": 101400 + }, + { + "epoch": 0.68, + "learning_rate": 0.0003440728471233341, + "loss": 0.1102, + "step": 101600 + }, + { + "epoch": 0.69, + "learning_rate": 0.000343973011063044, + "loss": 0.1252, + "step": 101800 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003438724958299181, + "loss": 0.1109, + "step": 102000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00034377230668762864, + "loss": 0.1208, + "step": 102200 + }, + { + "epoch": 0.69, + "learning_rate": 0.00034367143682240804, + "loss": 0.1033, + "step": 102400 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003435703893657879, + "loss": 0.1112, + "step": 102600 + }, + { + "epoch": 0.69, + "learning_rate": 0.0003434691644309205, + "loss": 0.1153, + "step": 102800 + }, + { + "epoch": 0.69, + "learning_rate": 0.00034336776213115676, + "loss": 0.1053, + "step": 103000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003432661825800463, + "loss": 0.1188, + "step": 103200 + }, + { + "epoch": 0.7, + "learning_rate": 0.00034316442589133725, + "loss": 0.1069, + "step": 103400 + }, + { + "epoch": 0.7, + "learning_rate": 0.000343062492178976, + "loss": 0.1195, + "step": 103600 + }, + { + "epoch": 0.7, + "learning_rate": 0.00034296038155710726, + "loss": 0.1099, + "step": 103800 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003428580941400738, + "loss": 0.1185, + "step": 104000 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003427556300424164, + "loss": 0.1247, + "step": 104200 + }, + { + "epoch": 0.7, + "learning_rate": 0.0003426529893788736, + "loss": 0.111, + "step": 104400 + }, + { + "epoch": 0.7, + "learning_rate": 0.00034255017226438175, + "loss": 0.1043, + "step": 104600 + }, + { + "epoch": 0.71, + "learning_rate": 0.00034244717881407483, + "loss": 0.1183, + "step": 104800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00034234452542979544, + "loss": 0.1165, + "step": 105000 + }, + { + "epoch": 0.71, + "learning_rate": 0.00034224118053428676, + "loss": 0.1178, + "step": 105200 + }, + { + "epoch": 0.71, + "learning_rate": 0.0003421376596489696, + "loss": 0.1283, + "step": 105400 + }, + { + "epoch": 0.71, + "learning_rate": 0.00034203396288976624, + "loss": 0.1091, + "step": 105600 + }, + { + "epoch": 0.71, + "learning_rate": 0.00034193009037279546, + "loss": 0.1101, + "step": 105800 + }, + { + "epoch": 0.71, + "learning_rate": 0.00034182604221437305, + "loss": 0.1147, + "step": 106000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003417218185310113, + "loss": 0.1052, + "step": 106200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00034161741943941935, + "loss": 0.1089, + "step": 106400 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003415128450565025, + "loss": 0.1223, + "step": 106600 + }, + { + "epoch": 0.72, + "learning_rate": 0.00034140809549936245, + "loss": 0.1067, + "step": 106800 + }, + { + "epoch": 0.72, + "learning_rate": 0.00034130317088529706, + "loss": 0.1182, + "step": 107000 + }, + { + "epoch": 0.72, + "learning_rate": 0.0003411980713318002, + "loss": 0.1026, + "step": 107200 + }, + { + "epoch": 0.72, + "learning_rate": 0.00034109279695656154, + "loss": 0.114, + "step": 107400 + }, + { + "epoch": 0.72, + "learning_rate": 0.00034098734787746675, + "loss": 0.1046, + "step": 107600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034088172421259687, + "loss": 0.114, + "step": 107800 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034077592608022865, + "loss": 0.1079, + "step": 108000 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034066995359883405, + "loss": 0.1134, + "step": 108200 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034056380688708037, + "loss": 0.1188, + "step": 108400 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034045748606383004, + "loss": 0.1123, + "step": 108600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00034035099124814027, + "loss": 0.109, + "step": 108800 + }, + { + "epoch": 0.73, + "learning_rate": 0.0003402443225592633, + "loss": 0.1105, + "step": 109000 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034013748011664594, + "loss": 0.117, + "step": 109200 + }, + { + "epoch": 0.74, + "learning_rate": 0.00034003099955202997, + "loss": 0.1196, + "step": 109400 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003399238108283236, + "loss": 0.112, + "step": 109600 + }, + { + "epoch": 0.74, + "learning_rate": 0.00033981644870978374, + "loss": 0.1267, + "step": 109800 + }, + { + "epoch": 0.74, + "learning_rate": 0.000339709451424421, + "loss": 0.1266, + "step": 110000 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003396017437425494, + "loss": 0.1283, + "step": 110200 + }, + { + "epoch": 0.74, + "learning_rate": 0.0003394938630264929, + "loss": 0.1104, + "step": 110400 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003393858093970554, + "loss": 0.1183, + "step": 110600 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003392775829752349, + "loss": 0.1186, + "step": 110800 + }, + { + "epoch": 0.75, + "learning_rate": 0.00033916918388222233, + "loss": 0.1141, + "step": 111000 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003390606122394025, + "loss": 0.114, + "step": 111200 + }, + { + "epoch": 0.75, + "learning_rate": 0.000338951868168353, + "loss": 0.1064, + "step": 111400 + }, + { + "epoch": 0.75, + "learning_rate": 0.00033884295179084477, + "loss": 0.1095, + "step": 111600 + }, + { + "epoch": 0.75, + "learning_rate": 0.00033873386322884173, + "loss": 0.1064, + "step": 111800 + }, + { + "epoch": 0.75, + "learning_rate": 0.0003386246026045003, + "loss": 0.1207, + "step": 112000 + }, + { + "epoch": 0.76, + "learning_rate": 0.00033851517004016994, + "loss": 0.1147, + "step": 112200 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003384055656583925, + "loss": 0.1238, + "step": 112400 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003382957895819021, + "loss": 0.1208, + "step": 112600 + }, + { + "epoch": 0.76, + "learning_rate": 0.00033818584193362537, + "loss": 0.1013, + "step": 112800 + }, + { + "epoch": 0.76, + "learning_rate": 0.00033807572283668094, + "loss": 0.1144, + "step": 113000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0003379654324143793, + "loss": 0.1062, + "step": 113200 + }, + { + "epoch": 0.76, + "learning_rate": 0.00033785497079022296, + "loss": 0.1134, + "step": 113400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00033774433808790615, + "loss": 0.1087, + "step": 113600 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003376335344313145, + "loss": 0.1135, + "step": 113800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003375225599445254, + "loss": 0.1042, + "step": 114000 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003374114147518071, + "loss": 0.1081, + "step": 114200 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003373006559806056, + "loss": 0.1206, + "step": 114400 + }, + { + "epoch": 0.77, + "learning_rate": 0.00033718917060157275, + "loss": 0.1112, + "step": 114600 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003370775148899379, + "loss": 0.1075, + "step": 114800 + }, + { + "epoch": 0.77, + "learning_rate": 0.0003369656889707324, + "loss": 0.1091, + "step": 115000 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003368536929691781, + "loss": 0.1104, + "step": 115200 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003367415270106874, + "loss": 0.1055, + "step": 115400 + }, + { + "epoch": 0.78, + "learning_rate": 0.00033662919122086295, + "loss": 0.11, + "step": 115600 + }, + { + "epoch": 0.78, + "learning_rate": 0.00033651668572549757, + "loss": 0.1178, + "step": 115800 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003364040106505742, + "loss": 0.1063, + "step": 116000 + }, + { + "epoch": 0.78, + "learning_rate": 0.00033629116612226564, + "loss": 0.1232, + "step": 116200 + }, + { + "epoch": 0.78, + "learning_rate": 0.0003361781522669344, + "loss": 0.1143, + "step": 116400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003360655355470882, + "loss": 0.1233, + "step": 116600 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003359521842626105, + "loss": 0.1191, + "step": 116800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00033583866403069976, + "loss": 0.1114, + "step": 117000 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003357249749784751, + "loss": 0.1075, + "step": 117200 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003356111172332447, + "loss": 0.1084, + "step": 117400 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003354970909225057, + "loss": 0.1037, + "step": 117600 + }, + { + "epoch": 0.79, + "learning_rate": 0.0003353828961739441, + "loss": 0.1187, + "step": 117800 + }, + { + "epoch": 0.79, + "learning_rate": 0.00033526853311543423, + "loss": 0.1078, + "step": 118000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00033515400187503924, + "loss": 0.1083, + "step": 118200 + }, + { + "epoch": 0.8, + "learning_rate": 0.00033503930258101024, + "loss": 0.1163, + "step": 118400 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003349244353617869, + "loss": 0.101, + "step": 118600 + }, + { + "epoch": 0.8, + "learning_rate": 0.00033480997593825657, + "loss": 0.1162, + "step": 118800 + }, + { + "epoch": 0.8, + "learning_rate": 0.00033469477409273286, + "loss": 0.1088, + "step": 119000 + }, + { + "epoch": 0.8, + "learning_rate": 0.00033457940470781543, + "loss": 0.1168, + "step": 119200 + }, + { + "epoch": 0.8, + "learning_rate": 0.0003344638679126941, + "loss": 0.1154, + "step": 119400 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003343481638367461, + "loss": 0.1118, + "step": 119600 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003342322926095362, + "loss": 0.1446, + "step": 119800 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003341162543608161, + "loss": 0.1079, + "step": 120000 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003340000492205248, + "loss": 0.1049, + "step": 120200 + }, + { + "epoch": 0.81, + "learning_rate": 0.0003338836773187878, + "loss": 0.1123, + "step": 120400 + }, + { + "epoch": 0.81, + "learning_rate": 0.00033376713878591777, + "loss": 0.1151, + "step": 120600 + }, + { + "epoch": 0.81, + "learning_rate": 0.00033365043375241366, + "loss": 0.0998, + "step": 120800 + }, + { + "epoch": 0.82, + "learning_rate": 0.000333533562348961, + "loss": 0.1189, + "step": 121000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003334165247064315, + "loss": 0.118, + "step": 121200 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003332993209558833, + "loss": 0.1189, + "step": 121400 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003331819512285602, + "loss": 0.113, + "step": 121600 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003330650037460781, + "loss": 0.1162, + "step": 121800 + }, + { + "epoch": 0.82, + "learning_rate": 0.00033294730328792147, + "loss": 0.1105, + "step": 122000 + }, + { + "epoch": 0.82, + "learning_rate": 0.0003328294372471769, + "loss": 0.1041, + "step": 122200 + }, + { + "epoch": 0.82, + "learning_rate": 0.00033271140575583016, + "loss": 0.1064, + "step": 122400 + }, + { + "epoch": 0.83, + "learning_rate": 0.000332593208946052, + "loss": 0.1035, + "step": 122600 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003324748469501984, + "loss": 0.1061, + "step": 122800 + }, + { + "epoch": 0.83, + "learning_rate": 0.00033235631990081034, + "loss": 0.1199, + "step": 123000 + }, + { + "epoch": 0.83, + "learning_rate": 0.00033223762793061363, + "loss": 0.1074, + "step": 123200 + }, + { + "epoch": 0.83, + "learning_rate": 0.00033211877117251865, + "loss": 0.1195, + "step": 123400 + }, + { + "epoch": 0.83, + "learning_rate": 0.00033199974975962047, + "loss": 0.1132, + "step": 123600 + }, + { + "epoch": 0.83, + "learning_rate": 0.0003318811601638971, + "loss": 0.1206, + "step": 123800 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003317618106630228, + "loss": 0.1195, + "step": 124000 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003316428948842102, + "loss": 0.1057, + "step": 124200 + }, + { + "epoch": 0.84, + "learning_rate": 0.000331523217827277, + "loss": 0.1067, + "step": 124400 + }, + { + "epoch": 0.84, + "learning_rate": 0.00033140337678243725, + "loss": 0.1185, + "step": 124600 + }, + { + "epoch": 0.84, + "learning_rate": 0.0003312833718838879, + "loss": 0.1089, + "step": 124800 + }, + { + "epoch": 0.84, + "learning_rate": 0.00033116320326600986, + "loss": 0.104, + "step": 125000 + }, + { + "epoch": 0.84, + "learning_rate": 0.000331042871063367, + "loss": 0.1254, + "step": 125200 + }, + { + "epoch": 0.84, + "learning_rate": 0.00033092237541070647, + "loss": 0.1085, + "step": 125400 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003308017164429586, + "loss": 0.1132, + "step": 125600 + }, + { + "epoch": 0.85, + "learning_rate": 0.00033068089429523636, + "loss": 0.0999, + "step": 125800 + }, + { + "epoch": 0.85, + "learning_rate": 0.00033055990910283567, + "loss": 0.106, + "step": 126000 + }, + { + "epoch": 0.85, + "learning_rate": 0.00033043876100123486, + "loss": 0.1066, + "step": 126200 + }, + { + "epoch": 0.85, + "learning_rate": 0.0003303174501260947, + "loss": 0.1109, + "step": 126400 + }, + { + "epoch": 0.85, + "learning_rate": 0.00033019597661325845, + "loss": 0.1097, + "step": 126600 + }, + { + "epoch": 0.85, + "learning_rate": 0.00033007434059875116, + "loss": 0.1142, + "step": 126800 + }, + { + "epoch": 0.86, + "learning_rate": 0.00032995254221878013, + "loss": 0.1156, + "step": 127000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00032983058160973426, + "loss": 0.1143, + "step": 127200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00032970845890818425, + "loss": 0.1193, + "step": 127400 + }, + { + "epoch": 0.86, + "learning_rate": 0.00032958617425088226, + "loss": 0.1052, + "step": 127600 + }, + { + "epoch": 0.86, + "learning_rate": 0.00032946372777476184, + "loss": 0.1078, + "step": 127800 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003293411196169377, + "loss": 0.1017, + "step": 128000 + }, + { + "epoch": 0.86, + "learning_rate": 0.00032921834991470565, + "loss": 0.1029, + "step": 128200 + }, + { + "epoch": 0.86, + "learning_rate": 0.0003290960338623602, + "loss": 0.0962, + "step": 128400 + }, + { + "epoch": 0.87, + "learning_rate": 0.00032897294228992666, + "loss": 0.1078, + "step": 128600 + }, + { + "epoch": 0.87, + "learning_rate": 0.00032884968958536763, + "loss": 0.1129, + "step": 128800 + }, + { + "epoch": 0.87, + "learning_rate": 0.00032872627588670074, + "loss": 0.1107, + "step": 129000 + }, + { + "epoch": 0.87, + "learning_rate": 0.00032860270133212367, + "loss": 0.1232, + "step": 129200 + }, + { + "epoch": 0.87, + "learning_rate": 0.0003284789660600145, + "loss": 0.1185, + "step": 129400 + }, + { + "epoch": 0.87, + "learning_rate": 0.00032835507020893096, + "loss": 0.111, + "step": 129600 + }, + { + "epoch": 0.87, + "learning_rate": 0.000328231013917611, + "loss": 0.1082, + "step": 129800 + }, + { + "epoch": 0.88, + "learning_rate": 0.00032810741880645453, + "loss": 0.1128, + "step": 130000 + }, + { + "epoch": 0.88, + "learning_rate": 0.000327983042852058, + "loss": 0.1224, + "step": 130200 + }, + { + "epoch": 0.88, + "learning_rate": 0.0003278585068740186, + "loss": 0.1127, + "step": 130400 + }, + { + "epoch": 0.88, + "learning_rate": 0.00032773381101179096, + "loss": 0.1122, + "step": 130600 + }, + { + "epoch": 0.88, + "learning_rate": 0.00032760895540500854, + "loss": 0.103, + "step": 130800 + }, + { + "epoch": 0.88, + "learning_rate": 0.000327483940193484, + "loss": 0.1129, + "step": 131000 + }, + { + "epoch": 0.88, + "learning_rate": 0.00032735876551720825, + "loss": 0.1118, + "step": 131200 + }, + { + "epoch": 0.89, + "learning_rate": 0.00032723343151635126, + "loss": 0.1141, + "step": 131400 + }, + { + "epoch": 0.89, + "learning_rate": 0.00032710793833126114, + "loss": 0.1043, + "step": 131600 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003269822861024642, + "loss": 0.1223, + "step": 131800 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003268564749706651, + "loss": 0.1129, + "step": 132000 + }, + { + "epoch": 0.89, + "learning_rate": 0.0003267305050767462, + "loss": 0.1159, + "step": 132200 + }, + { + "epoch": 0.89, + "learning_rate": 0.00032660437656176775, + "loss": 0.1026, + "step": 132400 + }, + { + "epoch": 0.89, + "learning_rate": 0.00032647808956696767, + "loss": 0.1107, + "step": 132600 + }, + { + "epoch": 0.89, + "learning_rate": 0.00032635164423376107, + "loss": 0.1001, + "step": 132800 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003262250407037408, + "loss": 0.1046, + "step": 133000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003260982791186765, + "loss": 0.1198, + "step": 133200 + }, + { + "epoch": 0.9, + "learning_rate": 0.000325971359620515, + "loss": 0.1114, + "step": 133400 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003258442823513799, + "loss": 0.1076, + "step": 133600 + }, + { + "epoch": 0.9, + "learning_rate": 0.00032571704745357134, + "loss": 0.1073, + "step": 133800 + }, + { + "epoch": 0.9, + "learning_rate": 0.00032558965506956623, + "loss": 0.1153, + "step": 134000 + }, + { + "epoch": 0.9, + "learning_rate": 0.0003254621053420176, + "loss": 0.111, + "step": 134200 + }, + { + "epoch": 0.91, + "learning_rate": 0.00032533503733919646, + "loss": 0.1122, + "step": 134400 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003252071741381574, + "loss": 0.1042, + "step": 134600 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003250791540218744, + "loss": 0.1103, + "step": 134800 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003249509771337035, + "loss": 0.11, + "step": 135000 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003248226436171763, + "loss": 0.1291, + "step": 135200 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003246941536159999, + "loss": 0.1095, + "step": 135400 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003245655072740564, + "loss": 0.1084, + "step": 135600 + }, + { + "epoch": 0.91, + "learning_rate": 0.0003244367047354031, + "loss": 0.0981, + "step": 135800 + }, + { + "epoch": 0.92, + "learning_rate": 0.00032430839132516974, + "loss": 0.1288, + "step": 136000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00032417927760514917, + "loss": 0.1036, + "step": 136200 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003240500081209162, + "loss": 0.1092, + "step": 136400 + }, + { + "epoch": 0.92, + "learning_rate": 0.00032392058301722597, + "loss": 0.1103, + "step": 136600 + }, + { + "epoch": 0.92, + "learning_rate": 0.00032379100243900774, + "loss": 0.1238, + "step": 136800 + }, + { + "epoch": 0.92, + "learning_rate": 0.000323661266531365, + "loss": 0.1125, + "step": 137000 + }, + { + "epoch": 0.92, + "learning_rate": 0.0003235313754395751, + "loss": 0.1181, + "step": 137200 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003234013293090893, + "loss": 0.1107, + "step": 137400 + }, + { + "epoch": 0.93, + "learning_rate": 0.00032327112828553224, + "loss": 0.1189, + "step": 137600 + }, + { + "epoch": 0.93, + "learning_rate": 0.00032314077251470224, + "loss": 0.1177, + "step": 137800 + }, + { + "epoch": 0.93, + "learning_rate": 0.00032301026214257077, + "loss": 0.1147, + "step": 138000 + }, + { + "epoch": 0.93, + "learning_rate": 0.00032287959731528245, + "loss": 0.1098, + "step": 138200 + }, + { + "epoch": 0.93, + "learning_rate": 0.00032274877817915483, + "loss": 0.1114, + "step": 138400 + }, + { + "epoch": 0.93, + "learning_rate": 0.0003226178048806784, + "loss": 0.1136, + "step": 138600 + }, + { + "epoch": 0.94, + "learning_rate": 0.000322486677566516, + "loss": 0.1111, + "step": 138800 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003223553963835033, + "loss": 0.1106, + "step": 139000 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003222239614786479, + "loss": 0.1227, + "step": 139200 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003220923729991298, + "loss": 0.1115, + "step": 139400 + }, + { + "epoch": 0.94, + "learning_rate": 0.00032196063109230086, + "loss": 0.119, + "step": 139600 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003218287359056848, + "loss": 0.111, + "step": 139800 + }, + { + "epoch": 0.94, + "learning_rate": 0.00032169668758697683, + "loss": 0.1146, + "step": 140000 + }, + { + "epoch": 0.94, + "learning_rate": 0.0003215651476708621, + "loss": 0.0971, + "step": 140200 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003214327942955547, + "loss": 0.1157, + "step": 140400 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003213002882315282, + "loss": 0.1192, + "step": 140600 + }, + { + "epoch": 0.95, + "learning_rate": 0.00032116762962716206, + "loss": 0.1189, + "step": 140800 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003210348186310064, + "loss": 0.113, + "step": 141000 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003209018553917821, + "loss": 0.1198, + "step": 141200 + }, + { + "epoch": 0.95, + "learning_rate": 0.00032076874005838044, + "loss": 0.1082, + "step": 141400 + }, + { + "epoch": 0.95, + "learning_rate": 0.0003206354727798631, + "loss": 0.1188, + "step": 141600 + }, + { + "epoch": 0.96, + "learning_rate": 0.00032050205370546186, + "loss": 0.112, + "step": 141800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003203684829845785, + "loss": 0.1139, + "step": 142000 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003202347607667846, + "loss": 0.1087, + "step": 142200 + }, + { + "epoch": 0.96, + "learning_rate": 0.00032010222668614745, + "loss": 0.1351, + "step": 142400 + }, + { + "epoch": 0.96, + "learning_rate": 0.00031996820343515567, + "loss": 0.1126, + "step": 142600 + }, + { + "epoch": 0.96, + "learning_rate": 0.00031983402913548365, + "loss": 0.1261, + "step": 142800 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003196997039373788, + "loss": 0.1123, + "step": 143000 + }, + { + "epoch": 0.96, + "learning_rate": 0.0003195652279912577, + "loss": 0.1252, + "step": 143200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00031943060144770555, + "loss": 0.1031, + "step": 143400 + }, + { + "epoch": 0.97, + "learning_rate": 0.00031929582445747623, + "loss": 0.1078, + "step": 143600 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003191608971714921, + "loss": 0.1157, + "step": 143800 + }, + { + "epoch": 0.97, + "learning_rate": 0.00031902581974084374, + "loss": 0.107, + "step": 144000 + }, + { + "epoch": 0.97, + "learning_rate": 0.00031889059231679, + "loss": 0.1033, + "step": 144200 + }, + { + "epoch": 0.97, + "learning_rate": 0.00031875521505075774, + "loss": 0.1127, + "step": 144400 + }, + { + "epoch": 0.97, + "learning_rate": 0.0003186196880943413, + "loss": 0.119, + "step": 144600 + }, + { + "epoch": 0.98, + "learning_rate": 0.00031848401159930296, + "loss": 0.1187, + "step": 144800 + }, + { + "epoch": 0.98, + "learning_rate": 0.00031834818571757227, + "loss": 0.1131, + "step": 145000 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003182122106012462, + "loss": 0.1141, + "step": 145200 + }, + { + "epoch": 0.98, + "learning_rate": 0.00031807608640258873, + "loss": 0.1158, + "step": 145400 + }, + { + "epoch": 0.98, + "learning_rate": 0.00031793981327403085, + "loss": 0.112, + "step": 145600 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003178033913681702, + "loss": 0.1089, + "step": 145800 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003176668208377711, + "loss": 0.1117, + "step": 146000 + }, + { + "epoch": 0.98, + "learning_rate": 0.0003175301018357644, + "loss": 0.1141, + "step": 146200 + }, + { + "epoch": 0.99, + "learning_rate": 0.000317393234515247, + "loss": 0.1145, + "step": 146400 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003172562190294821, + "loss": 0.1055, + "step": 146600 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003171197417173119, + "loss": 0.1074, + "step": 146800 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003169824311004134, + "loss": 0.1166, + "step": 147000 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003168449727782822, + "loss": 0.1075, + "step": 147200 + }, + { + "epoch": 0.99, + "learning_rate": 0.00031670736690484315, + "loss": 0.0993, + "step": 147400 + }, + { + "epoch": 0.99, + "learning_rate": 0.0003165696136341865, + "loss": 0.1093, + "step": 147600 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003164317131205673, + "loss": 0.109, + "step": 147800 + }, + { + "epoch": 1.0, + "learning_rate": 0.00031629366551840567, + "loss": 0.1129, + "step": 148000 + }, + { + "epoch": 1.0, + "learning_rate": 0.00031615547098228636, + "loss": 0.1125, + "step": 148200 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003160171296669587, + "loss": 0.1057, + "step": 148400 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.9420512498988455, + "eval_auc": 0.8522129916967822, + "eval_f1": 0.35228549223252686, + "eval_loss": 0.4365999102592468, + "eval_mcc": 0.39725936154292724, + "eval_precision": 0.2294216317399737, + "eval_recall": 0.7584797572261781, + "eval_runtime": 9338.8173, + "eval_samples_per_second": 23.624, + "eval_steps_per_second": 3.937, + "step": 148440 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003158786417273362, + "loss": 0.1028, + "step": 148600 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003157407008546266, + "loss": 0.096, + "step": 148800 + }, + { + "epoch": 1.0, + "learning_rate": 0.0003156019208629957, + "loss": 0.1202, + "step": 149000 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003154629947120181, + "loss": 0.1025, + "step": 149200 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003153239225572624, + "loss": 0.1131, + "step": 149400 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003151847045544605, + "loss": 0.1144, + "step": 149600 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003150453408595079, + "loss": 0.1147, + "step": 149800 + }, + { + "epoch": 1.01, + "learning_rate": 0.000314905831628463, + "loss": 0.1024, + "step": 150000 + }, + { + "epoch": 1.01, + "learning_rate": 0.00031476617701754745, + "loss": 0.1044, + "step": 150200 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003146263771831454, + "loss": 0.097, + "step": 150400 + }, + { + "epoch": 1.01, + "learning_rate": 0.0003144864322818037, + "loss": 0.1143, + "step": 150600 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003143463424702319, + "loss": 0.1225, + "step": 150800 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003142061079053015, + "loss": 0.1045, + "step": 151000 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031406572874404616, + "loss": 0.1082, + "step": 151200 + }, + { + "epoch": 1.02, + "learning_rate": 0.0003139252051436616, + "loss": 0.1185, + "step": 151400 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031378453726150514, + "loss": 0.1067, + "step": 151600 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031364372525509573, + "loss": 0.1033, + "step": 151800 + }, + { + "epoch": 1.02, + "learning_rate": 0.00031350347441983444, + "loss": 0.11, + "step": 152000 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003133623753567721, + "loss": 0.0992, + "step": 152200 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003132211326421909, + "loss": 0.1083, + "step": 152400 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003130797464342533, + "loss": 0.1068, + "step": 152600 + }, + { + "epoch": 1.03, + "learning_rate": 0.00031293821689128273, + "loss": 0.1233, + "step": 152800 + }, + { + "epoch": 1.03, + "learning_rate": 0.000312796544171763, + "loss": 0.109, + "step": 153000 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003126547284343383, + "loss": 0.1109, + "step": 153200 + }, + { + "epoch": 1.03, + "learning_rate": 0.0003125127698378129, + "loss": 0.107, + "step": 153400 + }, + { + "epoch": 1.03, + "learning_rate": 0.000312370668541151, + "loss": 0.1118, + "step": 153600 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003122284247034767, + "loss": 0.1025, + "step": 153800 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031208603848407383, + "loss": 0.1085, + "step": 154000 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031194422303810766, + "loss": 0.1027, + "step": 154200 + }, + { + "epoch": 1.04, + "learning_rate": 0.000311801553243652, + "loss": 0.1262, + "step": 154400 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003116587415454755, + "loss": 0.1146, + "step": 154600 + }, + { + "epoch": 1.04, + "learning_rate": 0.00031151578810349756, + "loss": 0.1027, + "step": 154800 + }, + { + "epoch": 1.04, + "learning_rate": 0.0003113726930777965, + "loss": 0.1051, + "step": 155000 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003112294566286092, + "loss": 0.1045, + "step": 155200 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003110860789163309, + "loss": 0.1087, + "step": 155400 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003109425601015149, + "loss": 0.1286, + "step": 155600 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031079890034487265, + "loss": 0.112, + "step": 155800 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003106558191598876, + "loss": 0.1005, + "step": 156000 + }, + { + "epoch": 1.05, + "learning_rate": 0.0003105118787050569, + "loss": 0.1044, + "step": 156200 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031036779779067405, + "loss": 0.1101, + "step": 156400 + }, + { + "epoch": 1.05, + "learning_rate": 0.00031022357657807976, + "loss": 0.1105, + "step": 156600 + }, + { + "epoch": 1.06, + "learning_rate": 0.00031007921522877214, + "loss": 0.1006, + "step": 156800 + }, + { + "epoch": 1.06, + "learning_rate": 0.00030993471390440585, + "loss": 0.1214, + "step": 157000 + }, + { + "epoch": 1.06, + "learning_rate": 0.0003097900727667927, + "loss": 0.1065, + "step": 157200 + }, + { + "epoch": 1.06, + "learning_rate": 0.00030964529197790067, + "loss": 0.112, + "step": 157400 + }, + { + "epoch": 1.06, + "learning_rate": 0.00030950037169985437, + "loss": 0.1212, + "step": 157600 + }, + { + "epoch": 1.06, + "learning_rate": 0.00030935531209493457, + "loss": 0.1087, + "step": 157800 + }, + { + "epoch": 1.06, + "learning_rate": 0.00030921011332557806, + "loss": 0.1061, + "step": 158000 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003090647755543773, + "loss": 0.109, + "step": 158200 + }, + { + "epoch": 1.07, + "learning_rate": 0.00030891929894408065, + "loss": 0.1149, + "step": 158400 + }, + { + "epoch": 1.07, + "learning_rate": 0.00030877368365759167, + "loss": 0.1195, + "step": 158600 + }, + { + "epoch": 1.07, + "learning_rate": 0.00030862792985796966, + "loss": 0.117, + "step": 158800 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003084820377084285, + "loss": 0.1091, + "step": 159000 + }, + { + "epoch": 1.07, + "learning_rate": 0.00030833600737233726, + "loss": 0.108, + "step": 159200 + }, + { + "epoch": 1.07, + "learning_rate": 0.0003081898390132197, + "loss": 0.1114, + "step": 159400 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030804353279475426, + "loss": 0.1096, + "step": 159600 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003078970888807736, + "loss": 0.1054, + "step": 159800 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003077505074352645, + "loss": 0.1122, + "step": 160000 + }, + { + "epoch": 1.08, + "learning_rate": 0.000307603788622368, + "loss": 0.1065, + "step": 160200 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030745693260637885, + "loss": 0.1131, + "step": 160400 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003073106748576299, + "loss": 0.1154, + "step": 160600 + }, + { + "epoch": 1.08, + "learning_rate": 0.00030716354561291433, + "loss": 0.1215, + "step": 160800 + }, + { + "epoch": 1.08, + "learning_rate": 0.0003070162796580872, + "loss": 0.0988, + "step": 161000 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003068688771580559, + "loss": 0.113, + "step": 161200 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030672133827788073, + "loss": 0.121, + "step": 161400 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030657366318277474, + "loss": 0.0969, + "step": 161600 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030642659143197647, + "loss": 0.1109, + "step": 161800 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003062786450822661, + "loss": 0.1125, + "step": 162000 + }, + { + "epoch": 1.09, + "learning_rate": 0.0003061305630133498, + "loss": 0.1032, + "step": 162200 + }, + { + "epoch": 1.09, + "learning_rate": 0.00030598234539104877, + "loss": 0.1153, + "step": 162400 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030583399238133616, + "loss": 0.1126, + "step": 162600 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030568550415033653, + "loss": 0.1225, + "step": 162800 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030553688086432617, + "loss": 0.1078, + "step": 163000 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030538812268973226, + "loss": 0.1095, + "step": 163200 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030523922979313326, + "loss": 0.1162, + "step": 163400 + }, + { + "epoch": 1.1, + "learning_rate": 0.00030509020234125854, + "loss": 0.1042, + "step": 163600 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003049410405009878, + "loss": 0.116, + "step": 163800 + }, + { + "epoch": 1.1, + "learning_rate": 0.0003047917444393516, + "loss": 0.1032, + "step": 164000 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003046423143235306, + "loss": 0.1167, + "step": 164200 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030449275032085564, + "loss": 0.112, + "step": 164400 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030434305259880747, + "loss": 0.1068, + "step": 164600 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030419322132501666, + "loss": 0.1163, + "step": 164800 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030404325666726306, + "loss": 0.1113, + "step": 165000 + }, + { + "epoch": 1.11, + "learning_rate": 0.00030389315879347626, + "loss": 0.1142, + "step": 165200 + }, + { + "epoch": 1.11, + "learning_rate": 0.0003037429278717348, + "loss": 0.1199, + "step": 165400 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003035925640702662, + "loss": 0.1094, + "step": 165600 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030344206755744674, + "loss": 0.1238, + "step": 165800 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003032921919765011, + "loss": 0.1096, + "step": 166000 + }, + { + "epoch": 1.12, + "learning_rate": 0.0003031414312081544, + "loss": 0.1103, + "step": 166200 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030299053823363247, + "loss": 0.1067, + "step": 166400 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030283951322190416, + "loss": 0.1044, + "step": 166600 + }, + { + "epoch": 1.12, + "learning_rate": 0.00030268835634208633, + "loss": 0.1238, + "step": 166800 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030253706776344337, + "loss": 0.1184, + "step": 167000 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003023864050828265, + "loss": 0.1143, + "step": 167200 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003022348542712936, + "loss": 0.1058, + "step": 167400 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003020831722687641, + "loss": 0.099, + "step": 167600 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030193135924509056, + "loss": 0.1164, + "step": 167800 + }, + { + "epoch": 1.13, + "learning_rate": 0.000301779415370272, + "loss": 0.1132, + "step": 168000 + }, + { + "epoch": 1.13, + "learning_rate": 0.0003016273408144543, + "loss": 0.1027, + "step": 168200 + }, + { + "epoch": 1.13, + "learning_rate": 0.00030147513574792944, + "loss": 0.122, + "step": 168400 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003013228003411356, + "loss": 0.1127, + "step": 168600 + }, + { + "epoch": 1.14, + "learning_rate": 0.000301170334764657, + "loss": 0.112, + "step": 168800 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003010177391892235, + "loss": 0.1098, + "step": 169000 + }, + { + "epoch": 1.14, + "learning_rate": 0.00030086501378571057, + "loss": 0.112, + "step": 169200 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003007121587251391, + "loss": 0.1037, + "step": 169400 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003005591741786752, + "loss": 0.1179, + "step": 169600 + }, + { + "epoch": 1.14, + "learning_rate": 0.0003004060603176298, + "loss": 0.1041, + "step": 169800 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003002528173134589, + "loss": 0.1142, + "step": 170000 + }, + { + "epoch": 1.15, + "learning_rate": 0.0003000994453377629, + "loss": 0.1118, + "step": 170200 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029994594456228665, + "loss": 0.109, + "step": 170400 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029979231515891937, + "loss": 0.1139, + "step": 170600 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002996385572996941, + "loss": 0.1119, + "step": 170800 + }, + { + "epoch": 1.15, + "learning_rate": 0.000299484671156788, + "loss": 0.1058, + "step": 171000 + }, + { + "epoch": 1.15, + "learning_rate": 0.0002993314272921847, + "loss": 0.1129, + "step": 171200 + }, + { + "epoch": 1.15, + "learning_rate": 0.00029917728573828744, + "loss": 0.1008, + "step": 171400 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002990237880813783, + "loss": 0.1019, + "step": 171600 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029886939180346847, + "loss": 0.111, + "step": 171800 + }, + { + "epoch": 1.16, + "learning_rate": 0.000298714868103184, + "loss": 0.1101, + "step": 172000 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029856021715355947, + "loss": 0.1147, + "step": 172200 + }, + { + "epoch": 1.16, + "learning_rate": 0.00029840543912777203, + "loss": 0.1142, + "step": 172400 + }, + { + "epoch": 1.16, + "learning_rate": 0.000298250534199141, + "loss": 0.1117, + "step": 172600 + }, + { + "epoch": 1.16, + "learning_rate": 0.0002980955025411278, + "loss": 0.1095, + "step": 172800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029794034432733597, + "loss": 0.1034, + "step": 173000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029778505973151055, + "loss": 0.1097, + "step": 173200 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029762964892753816, + "loss": 0.108, + "step": 173400 + }, + { + "epoch": 1.17, + "learning_rate": 0.0002974741120894467, + "loss": 0.1098, + "step": 173600 + }, + { + "epoch": 1.17, + "learning_rate": 0.000297319228017684, + "loss": 0.1179, + "step": 173800 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029716344026199713, + "loss": 0.1175, + "step": 174000 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029700752699424865, + "loss": 0.1041, + "step": 174200 + }, + { + "epoch": 1.17, + "learning_rate": 0.00029685148838902903, + "loss": 0.1099, + "step": 174400 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029669532462106926, + "loss": 0.1064, + "step": 174600 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029653903586524053, + "loss": 0.1046, + "step": 174800 + }, + { + "epoch": 1.18, + "learning_rate": 0.00029638262229655384, + "loss": 0.109, + "step": 175000 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002962260840901601, + "loss": 0.1278, + "step": 175200 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002960694214213497, + "loss": 0.1119, + "step": 175400 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002959126344655524, + "loss": 0.108, + "step": 175600 + }, + { + "epoch": 1.18, + "learning_rate": 0.0002957557233983373, + "loss": 0.101, + "step": 175800 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002955986883954122, + "loss": 0.1151, + "step": 176000 + }, + { + "epoch": 1.19, + "learning_rate": 0.00029544152963262386, + "loss": 0.1182, + "step": 176200 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002952842472859576, + "loss": 0.0995, + "step": 176400 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002951268415315372, + "loss": 0.1064, + "step": 176600 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002949693125456244, + "loss": 0.1202, + "step": 176800 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002948116605046191, + "loss": 0.1103, + "step": 177000 + }, + { + "epoch": 1.19, + "learning_rate": 0.0002946538855850592, + "loss": 0.1041, + "step": 177200 + }, + { + "epoch": 1.2, + "learning_rate": 0.00029449598796361975, + "loss": 0.1136, + "step": 177400 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002943379678171135, + "loss": 0.1018, + "step": 177600 + }, + { + "epoch": 1.2, + "learning_rate": 0.00029418061633901157, + "loss": 0.1199, + "step": 177800 + }, + { + "epoch": 1.2, + "learning_rate": 0.00029402235228377286, + "loss": 0.1152, + "step": 178000 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002938639662338413, + "loss": 0.121, + "step": 178200 + }, + { + "epoch": 1.2, + "learning_rate": 0.00029370545836657664, + "loss": 0.1021, + "step": 178400 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002935468288594748, + "loss": 0.1046, + "step": 178600 + }, + { + "epoch": 1.2, + "learning_rate": 0.0002933880778901681, + "loss": 0.1073, + "step": 178800 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002932292056364248, + "loss": 0.1162, + "step": 179000 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002930702122761489, + "loss": 0.1048, + "step": 179200 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002929110979873801, + "loss": 0.107, + "step": 179400 + }, + { + "epoch": 1.21, + "learning_rate": 0.00029275186294829356, + "loss": 0.1152, + "step": 179600 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002925925073371995, + "loss": 0.1175, + "step": 179800 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002924338290117502, + "loss": 0.1251, + "step": 180000 + }, + { + "epoch": 1.21, + "learning_rate": 0.0002922742333927425, + "loss": 0.109, + "step": 180200 + }, + { + "epoch": 1.22, + "learning_rate": 0.00029211451773657356, + "loss": 0.1099, + "step": 180400 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002919546822220921, + "loss": 0.124, + "step": 180600 + }, + { + "epoch": 1.22, + "learning_rate": 0.00029179472702828063, + "loss": 0.1024, + "step": 180800 + }, + { + "epoch": 1.22, + "learning_rate": 0.000291634652334256, + "loss": 0.1051, + "step": 181000 + }, + { + "epoch": 1.22, + "learning_rate": 0.00029147445831926867, + "loss": 0.1062, + "step": 181200 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002913141451627029, + "loss": 0.1134, + "step": 181400 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002911537130440763, + "loss": 0.1158, + "step": 181600 + }, + { + "epoch": 1.22, + "learning_rate": 0.0002909931621430395, + "loss": 0.1146, + "step": 181800 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002908324926393765, + "loss": 0.1031, + "step": 182000 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002906717047130038, + "loss": 0.113, + "step": 182200 + }, + { + "epoch": 1.23, + "learning_rate": 0.00029051079854397065, + "loss": 0.1105, + "step": 182400 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002903497743124587, + "loss": 0.1074, + "step": 182600 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002901886321987817, + "loss": 0.1044, + "step": 182800 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002900273723833857, + "loss": 0.104, + "step": 183000 + }, + { + "epoch": 1.23, + "learning_rate": 0.0002898659950468481, + "loss": 0.1385, + "step": 183200 + }, + { + "epoch": 1.24, + "learning_rate": 0.00028970450036987824, + "loss": 0.1143, + "step": 183400 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002895428885333168, + "loss": 0.1072, + "step": 183600 + }, + { + "epoch": 1.24, + "learning_rate": 0.00028938196865289624, + "loss": 0.1125, + "step": 183800 + }, + { + "epoch": 1.24, + "learning_rate": 0.000289220123623735, + "loss": 0.1072, + "step": 184000 + }, + { + "epoch": 1.24, + "learning_rate": 0.00028905816197738384, + "loss": 0.1123, + "step": 184200 + }, + { + "epoch": 1.24, + "learning_rate": 0.00028889608389520645, + "loss": 0.1132, + "step": 184400 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002887338895586967, + "loss": 0.1261, + "step": 184600 + }, + { + "epoch": 1.24, + "learning_rate": 0.0002885715791494787, + "loss": 0.1122, + "step": 184800 + }, + { + "epoch": 1.25, + "learning_rate": 0.00028840915284930656, + "loss": 0.1035, + "step": 185000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00028824661084006416, + "loss": 0.1153, + "step": 185200 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002880839533037649, + "loss": 0.1025, + "step": 185400 + }, + { + "epoch": 1.25, + "learning_rate": 0.00028792118042255164, + "loss": 0.1182, + "step": 185600 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002877582923786962, + "loss": 0.1138, + "step": 185800 + }, + { + "epoch": 1.25, + "learning_rate": 0.0002875952893545997, + "loss": 0.0991, + "step": 186000 + }, + { + "epoch": 1.25, + "learning_rate": 0.00028743217153279167, + "loss": 0.1069, + "step": 186200 + }, + { + "epoch": 1.26, + "learning_rate": 0.00028726893909593044, + "loss": 0.1092, + "step": 186400 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002871055922268025, + "loss": 0.1178, + "step": 186600 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002869421311083226, + "loss": 0.1065, + "step": 186800 + }, + { + "epoch": 1.26, + "learning_rate": 0.00028677855592353335, + "loss": 0.0977, + "step": 187000 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002866148668556052, + "loss": 0.1074, + "step": 187200 + }, + { + "epoch": 1.26, + "learning_rate": 0.00028645106408783596, + "loss": 0.1101, + "step": 187400 + }, + { + "epoch": 1.26, + "learning_rate": 0.0002862871478036509, + "loss": 0.0997, + "step": 187600 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002861231181866024, + "loss": 0.1043, + "step": 187800 + }, + { + "epoch": 1.27, + "learning_rate": 0.00028595897542036964, + "loss": 0.1088, + "step": 188000 + }, + { + "epoch": 1.27, + "learning_rate": 0.00028579554124811386, + "loss": 0.109, + "step": 188200 + }, + { + "epoch": 1.27, + "learning_rate": 0.00028563117329850646, + "loss": 0.1188, + "step": 188400 + }, + { + "epoch": 1.27, + "learning_rate": 0.00028546669275059144, + "loss": 0.1289, + "step": 188600 + }, + { + "epoch": 1.27, + "learning_rate": 0.00028530209978855283, + "loss": 0.1117, + "step": 188800 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002851373945967009, + "loss": 0.1109, + "step": 189000 + }, + { + "epoch": 1.27, + "learning_rate": 0.0002849725773594712, + "loss": 0.1077, + "step": 189200 + }, + { + "epoch": 1.28, + "learning_rate": 0.00028480764826142516, + "loss": 0.1115, + "step": 189400 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002846426074872491, + "loss": 0.1147, + "step": 189600 + }, + { + "epoch": 1.28, + "learning_rate": 0.00028447745522175455, + "loss": 0.1163, + "step": 189800 + }, + { + "epoch": 1.28, + "learning_rate": 0.000284312191649878, + "loss": 0.1165, + "step": 190000 + }, + { + "epoch": 1.28, + "learning_rate": 0.00028414681695668046, + "loss": 0.1177, + "step": 190200 + }, + { + "epoch": 1.28, + "learning_rate": 0.00028398133132734733, + "loss": 0.0974, + "step": 190400 + }, + { + "epoch": 1.28, + "learning_rate": 0.0002838165632042751, + "loss": 0.1099, + "step": 190600 + }, + { + "epoch": 1.29, + "learning_rate": 0.00028365085681108935, + "loss": 0.1195, + "step": 190800 + }, + { + "epoch": 1.29, + "learning_rate": 0.00028348504003714085, + "loss": 0.1002, + "step": 191000 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002833191130681101, + "loss": 0.1065, + "step": 191200 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002831530760898009, + "loss": 0.1089, + "step": 191400 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002829869292881404, + "loss": 0.1148, + "step": 191600 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002828215044037883, + "loss": 0.1049, + "step": 191800 + }, + { + "epoch": 1.29, + "learning_rate": 0.00028265513906049043, + "loss": 0.1026, + "step": 192000 + }, + { + "epoch": 1.29, + "learning_rate": 0.0002824886644514278, + "loss": 0.1162, + "step": 192200 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002823220807630174, + "loss": 0.1066, + "step": 192400 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002821553881817987, + "loss": 0.1169, + "step": 192600 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028198858689443274, + "loss": 0.1052, + "step": 192800 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028182167708770255, + "loss": 0.1017, + "step": 193000 + }, + { + "epoch": 1.3, + "learning_rate": 0.00028165465894851256, + "loss": 0.1143, + "step": 193200 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002814875326638886, + "loss": 0.1133, + "step": 193400 + }, + { + "epoch": 1.3, + "learning_rate": 0.0002813202984209775, + "loss": 0.1013, + "step": 193600 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028115295640704705, + "loss": 0.1057, + "step": 193800 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028098550680948564, + "loss": 0.1137, + "step": 194000 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002808179498158023, + "loss": 0.1064, + "step": 194200 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028065112420100753, + "loss": 0.1063, + "step": 194400 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002804833535127243, + "loss": 0.1058, + "step": 194600 + }, + { + "epoch": 1.31, + "learning_rate": 0.0002803154759906268, + "loss": 0.1047, + "step": 194800 + }, + { + "epoch": 1.31, + "learning_rate": 0.00028014749182270334, + "loss": 0.1154, + "step": 195000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027997940119706133, + "loss": 0.1168, + "step": 195200 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027981120430192757, + "loss": 0.1151, + "step": 195400 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002796429013256477, + "loss": 0.1142, + "step": 195600 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002794744924566864, + "loss": 0.1051, + "step": 195800 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027930597788362665, + "loss": 0.1081, + "step": 196000 + }, + { + "epoch": 1.32, + "learning_rate": 0.00027913735779516995, + "loss": 0.1047, + "step": 196200 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002789686323801359, + "loss": 0.1242, + "step": 196400 + }, + { + "epoch": 1.32, + "learning_rate": 0.0002787998018274622, + "loss": 0.1033, + "step": 196600 + }, + { + "epoch": 1.33, + "learning_rate": 0.000278630866326204, + "loss": 0.1124, + "step": 196800 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002784618260655342, + "loss": 0.1067, + "step": 197000 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002782926812347429, + "loss": 0.1173, + "step": 197200 + }, + { + "epoch": 1.33, + "learning_rate": 0.00027812343202323735, + "loss": 0.1127, + "step": 197400 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002779540786205417, + "loss": 0.1032, + "step": 197600 + }, + { + "epoch": 1.33, + "learning_rate": 0.0002777846212162966, + "loss": 0.1048, + "step": 197800 + }, + { + "epoch": 1.33, + "learning_rate": 0.00027761506000025947, + "loss": 0.1208, + "step": 198000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00027744539516230373, + "loss": 0.1019, + "step": 198200 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002772756268924189, + "loss": 0.1051, + "step": 198400 + }, + { + "epoch": 1.34, + "learning_rate": 0.00027710660499476826, + "loss": 0.0973, + "step": 198600 + }, + { + "epoch": 1.34, + "learning_rate": 0.00027693663094624163, + "loss": 0.1141, + "step": 198800 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002767665540354966, + "loss": 0.1098, + "step": 199000 + }, + { + "epoch": 1.34, + "learning_rate": 0.00027659637445298413, + "loss": 0.1083, + "step": 199200 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002764269440541954, + "loss": 0.1212, + "step": 199400 + }, + { + "epoch": 1.34, + "learning_rate": 0.0002762565602109388, + "loss": 0.1119, + "step": 199600 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002760860742670024, + "loss": 0.101, + "step": 199800 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002759154864132951, + "loss": 0.1161, + "step": 200000 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002757447968408403, + "loss": 0.1007, + "step": 200200 + }, + { + "epoch": 1.35, + "learning_rate": 0.0002755740057407747, + "loss": 0.1083, + "step": 200400 + }, + { + "epoch": 1.35, + "learning_rate": 0.00027540311330434927, + "loss": 0.1048, + "step": 200600 + }, + { + "epoch": 1.35, + "learning_rate": 0.000275232119722928, + "loss": 0.1031, + "step": 200800 + }, + { + "epoch": 1.35, + "learning_rate": 0.00027506102518798847, + "loss": 0.1104, + "step": 201000 + }, + { + "epoch": 1.36, + "learning_rate": 0.00027488982989112105, + "loss": 0.1104, + "step": 201200 + }, + { + "epoch": 1.36, + "learning_rate": 0.00027471853402402923, + "loss": 0.1052, + "step": 201400 + }, + { + "epoch": 1.36, + "learning_rate": 0.00027454713777852875, + "loss": 0.1087, + "step": 201600 + }, + { + "epoch": 1.36, + "learning_rate": 0.00027437649907760433, + "loss": 0.115, + "step": 201800 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002742049031506784, + "loss": 0.1065, + "step": 202000 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002740332074205042, + "loss": 0.1195, + "step": 202200 + }, + { + "epoch": 1.36, + "learning_rate": 0.00027386141207934555, + "loss": 0.1128, + "step": 202400 + }, + { + "epoch": 1.36, + "learning_rate": 0.0002736895173195777, + "loss": 0.1072, + "step": 202600 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002735175233336872, + "loss": 0.1028, + "step": 202800 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002733454303142717, + "loss": 0.1085, + "step": 203000 + }, + { + "epoch": 1.37, + "learning_rate": 0.00027317323845403986, + "loss": 0.1047, + "step": 203200 + }, + { + "epoch": 1.37, + "learning_rate": 0.000273000947945811, + "loss": 0.1193, + "step": 203400 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002728285589825147, + "loss": 0.1041, + "step": 203600 + }, + { + "epoch": 1.37, + "learning_rate": 0.0002726560717571911, + "loss": 0.1032, + "step": 203800 + }, + { + "epoch": 1.37, + "learning_rate": 0.00027248348646299013, + "loss": 0.0963, + "step": 204000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00027231080329317167, + "loss": 0.107, + "step": 204200 + }, + { + "epoch": 1.38, + "learning_rate": 0.00027213802244110513, + "loss": 0.1048, + "step": 204400 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002719651441002693, + "loss": 0.1097, + "step": 204600 + }, + { + "epoch": 1.38, + "learning_rate": 0.00027179216846425223, + "loss": 0.1019, + "step": 204800 + }, + { + "epoch": 1.38, + "learning_rate": 0.00027161909572675087, + "loss": 0.1078, + "step": 205000 + }, + { + "epoch": 1.38, + "learning_rate": 0.00027144592608157085, + "loss": 0.1092, + "step": 205200 + }, + { + "epoch": 1.38, + "learning_rate": 0.0002712726597226264, + "loss": 0.1169, + "step": 205400 + }, + { + "epoch": 1.39, + "learning_rate": 0.00027110016389810513, + "loss": 0.1222, + "step": 205600 + }, + { + "epoch": 1.39, + "learning_rate": 0.00027092670517495234, + "loss": 0.1128, + "step": 205800 + }, + { + "epoch": 1.39, + "learning_rate": 0.00027075315031945495, + "loss": 0.1243, + "step": 206000 + }, + { + "epoch": 1.39, + "learning_rate": 0.00027057949952595886, + "loss": 0.108, + "step": 206200 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002704057529889168, + "loss": 0.1073, + "step": 206400 + }, + { + "epoch": 1.39, + "learning_rate": 0.00027023191090288904, + "loss": 0.1142, + "step": 206600 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002700579734625427, + "loss": 0.1108, + "step": 206800 + }, + { + "epoch": 1.39, + "learning_rate": 0.0002698839408626519, + "loss": 0.1077, + "step": 207000 + }, + { + "epoch": 1.4, + "learning_rate": 0.000269709813298097, + "loss": 0.1101, + "step": 207200 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002695355909638649, + "loss": 0.1073, + "step": 207400 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002693612740550486, + "loss": 0.106, + "step": 207600 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026918686276684704, + "loss": 0.1054, + "step": 207800 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026901235729456465, + "loss": 0.1073, + "step": 208000 + }, + { + "epoch": 1.4, + "learning_rate": 0.00026883775783361163, + "loss": 0.1018, + "step": 208200 + }, + { + "epoch": 1.4, + "learning_rate": 0.0002686630645795032, + "loss": 0.1091, + "step": 208400 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026848827772785964, + "loss": 0.1111, + "step": 208600 + }, + { + "epoch": 1.41, + "learning_rate": 0.0002683133974744061, + "loss": 0.1087, + "step": 208800 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026813842401497234, + "loss": 0.1138, + "step": 209000 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026796335754549237, + "loss": 0.1044, + "step": 209200 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026778819826200456, + "loss": 0.1088, + "step": 209400 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026761294636065095, + "loss": 0.1114, + "step": 209600 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026743760203767745, + "loss": 0.1001, + "step": 209800 + }, + { + "epoch": 1.41, + "learning_rate": 0.00026726216548943345, + "loss": 0.1039, + "step": 210000 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002670866369123717, + "loss": 0.1047, + "step": 210200 + }, + { + "epoch": 1.42, + "learning_rate": 0.00026691101650304773, + "loss": 0.1054, + "step": 210400 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002667353044581202, + "loss": 0.1058, + "step": 210600 + }, + { + "epoch": 1.42, + "learning_rate": 0.00026655950097435025, + "loss": 0.1053, + "step": 210800 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002663844859488688, + "loss": 0.1081, + "step": 211000 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002662085006328417, + "loss": 0.1077, + "step": 211200 + }, + { + "epoch": 1.42, + "learning_rate": 0.0002660324244678835, + "loss": 0.1008, + "step": 211400 + }, + { + "epoch": 1.43, + "learning_rate": 0.00026585625765116307, + "loss": 0.108, + "step": 211600 + }, + { + "epoch": 1.43, + "learning_rate": 0.00026568088189098604, + "loss": 0.1066, + "step": 211800 + }, + { + "epoch": 1.43, + "learning_rate": 0.00026550453481344815, + "loss": 0.1103, + "step": 212000 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002653280976752753, + "loss": 0.1218, + "step": 212200 + }, + { + "epoch": 1.43, + "learning_rate": 0.00026515157067404056, + "loss": 0.0986, + "step": 212400 + }, + { + "epoch": 1.43, + "learning_rate": 0.00026497495400741773, + "loss": 0.1041, + "step": 212600 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002647982478731809, + "loss": 0.1071, + "step": 212800 + }, + { + "epoch": 1.43, + "learning_rate": 0.0002646214524692044, + "loss": 0.1149, + "step": 213000 + }, + { + "epoch": 1.44, + "learning_rate": 0.00026444456799346256, + "loss": 0.1116, + "step": 213200 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002642675946440293, + "loss": 0.1057, + "step": 213400 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002640905326190783, + "loss": 0.1118, + "step": 213600 + }, + { + "epoch": 1.44, + "learning_rate": 0.00026391338211688236, + "loss": 0.1243, + "step": 213800 + }, + { + "epoch": 1.44, + "learning_rate": 0.00026373614333581336, + "loss": 0.102, + "step": 214000 + }, + { + "epoch": 1.44, + "learning_rate": 0.00026355881647434213, + "loss": 0.1077, + "step": 214200 + }, + { + "epoch": 1.44, + "learning_rate": 0.0002633814017310381, + "loss": 0.1235, + "step": 214400 + }, + { + "epoch": 1.45, + "learning_rate": 0.000263203899304569, + "loss": 0.0982, + "step": 214600 + }, + { + "epoch": 1.45, + "learning_rate": 0.000263026309393701, + "loss": 0.1053, + "step": 214800 + }, + { + "epoch": 1.45, + "learning_rate": 0.00026284863219729797, + "loss": 0.1, + "step": 215000 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002626708679143215, + "loss": 0.109, + "step": 215200 + }, + { + "epoch": 1.45, + "learning_rate": 0.00026249301674383113, + "loss": 0.1043, + "step": 215400 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002623150788849831, + "loss": 0.1044, + "step": 215600 + }, + { + "epoch": 1.45, + "learning_rate": 0.0002621379448735828, + "loss": 0.1312, + "step": 215800 + }, + { + "epoch": 1.46, + "learning_rate": 0.00026195983466683015, + "loss": 0.1166, + "step": 216000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00026178252956408584, + "loss": 0.1259, + "step": 216200 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002616042478032307, + "loss": 0.1008, + "step": 216400 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002614258803492553, + "loss": 0.1057, + "step": 216600 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002612474274018945, + "loss": 0.1101, + "step": 216800 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002610688891609786, + "loss": 0.1088, + "step": 217000 + }, + { + "epoch": 1.46, + "learning_rate": 0.00026089026582643357, + "loss": 0.1161, + "step": 217200 + }, + { + "epoch": 1.46, + "learning_rate": 0.0002607115575982805, + "loss": 0.107, + "step": 217400 + }, + { + "epoch": 1.47, + "learning_rate": 0.00026053276467663576, + "loss": 0.1081, + "step": 217600 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002603538872617103, + "loss": 0.1039, + "step": 217800 + }, + { + "epoch": 1.47, + "learning_rate": 0.00026017492555381, + "loss": 0.1083, + "step": 218000 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002599958797533348, + "loss": 0.1198, + "step": 218200 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002598167500607792, + "loss": 0.1207, + "step": 218400 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002596375366767311, + "loss": 0.1063, + "step": 218600 + }, + { + "epoch": 1.47, + "learning_rate": 0.0002594591364935983, + "loss": 0.1176, + "step": 218800 + }, + { + "epoch": 1.48, + "learning_rate": 0.00025927975674465545, + "loss": 0.1, + "step": 219000 + }, + { + "epoch": 1.48, + "learning_rate": 0.00025910029390554173, + "loss": 0.1113, + "step": 219200 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002589207481772184, + "loss": 0.1117, + "step": 219400 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002587411197607395, + "loss": 0.1175, + "step": 219600 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002585614088572518, + "loss": 0.1, + "step": 219800 + }, + { + "epoch": 1.48, + "learning_rate": 0.00025838161566799425, + "loss": 0.1118, + "step": 220000 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002582017403942981, + "loss": 0.1157, + "step": 220200 + }, + { + "epoch": 1.48, + "learning_rate": 0.0002580217832375865, + "loss": 0.1029, + "step": 220400 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002578417443993741, + "loss": 0.1049, + "step": 220600 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002576616240812673, + "loss": 0.0959, + "step": 220800 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002574814224849635, + "loss": 0.1038, + "step": 221000 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002573011398122514, + "loss": 0.1141, + "step": 221200 + }, + { + "epoch": 1.49, + "learning_rate": 0.00025712077626501007, + "loss": 0.1005, + "step": 221400 + }, + { + "epoch": 1.49, + "learning_rate": 0.0002569403320452095, + "loss": 0.1049, + "step": 221600 + }, + { + "epoch": 1.49, + "learning_rate": 0.00025675980735491, + "loss": 0.1105, + "step": 221800 + }, + { + "epoch": 1.5, + "learning_rate": 0.00025657920239626175, + "loss": 0.1046, + "step": 222000 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002563985173715049, + "loss": 0.1049, + "step": 222200 + }, + { + "epoch": 1.5, + "learning_rate": 0.00025621775248296953, + "loss": 0.1063, + "step": 222400 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002560369079330747, + "loss": 0.1137, + "step": 222600 + }, + { + "epoch": 1.5, + "learning_rate": 0.00025585598392432904, + "loss": 0.1066, + "step": 222800 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002556758858724697, + "loss": 0.1117, + "step": 223000 + }, + { + "epoch": 1.5, + "learning_rate": 0.0002554957095545989, + "loss": 0.1273, + "step": 223200 + }, + { + "epoch": 1.5, + "learning_rate": 0.00025531454917274415, + "loss": 0.1036, + "step": 223400 + }, + { + "epoch": 1.51, + "learning_rate": 0.000255133310140931, + "loss": 0.116, + "step": 223600 + }, + { + "epoch": 1.51, + "learning_rate": 0.00025495199266210964, + "loss": 0.0995, + "step": 223800 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002547705969393181, + "loss": 0.1075, + "step": 224000 + }, + { + "epoch": 1.51, + "learning_rate": 0.00025458912317568216, + "loss": 0.108, + "step": 224200 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002544075715744149, + "loss": 0.1033, + "step": 224400 + }, + { + "epoch": 1.51, + "learning_rate": 0.0002542259423388165, + "loss": 0.1068, + "step": 224600 + }, + { + "epoch": 1.51, + "learning_rate": 0.00025404423567227416, + "loss": 0.117, + "step": 224800 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002538624517782618, + "loss": 0.1027, + "step": 225000 + }, + { + "epoch": 1.52, + "learning_rate": 0.00025368059086033976, + "loss": 0.1048, + "step": 225200 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002534995630015991, + "loss": 0.1265, + "step": 225400 + }, + { + "epoch": 1.52, + "learning_rate": 0.00025331754902945935, + "loss": 0.1161, + "step": 225600 + }, + { + "epoch": 1.52, + "learning_rate": 0.00025313545864358826, + "loss": 0.0981, + "step": 225800 + }, + { + "epoch": 1.52, + "learning_rate": 0.0002529532920478896, + "loss": 0.11, + "step": 226000 + }, + { + "epoch": 1.52, + "learning_rate": 0.000252771049446352, + "loss": 0.1136, + "step": 226200 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002525887310430497, + "loss": 0.1101, + "step": 226400 + }, + { + "epoch": 1.53, + "learning_rate": 0.00025240633704214153, + "loss": 0.1082, + "step": 226600 + }, + { + "epoch": 1.53, + "learning_rate": 0.00025222386764787107, + "loss": 0.1127, + "step": 226800 + }, + { + "epoch": 1.53, + "learning_rate": 0.00025204132306456625, + "loss": 0.1061, + "step": 227000 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002518587034966394, + "loss": 0.1125, + "step": 227200 + }, + { + "epoch": 1.53, + "learning_rate": 0.00025167600914858645, + "loss": 0.1111, + "step": 227400 + }, + { + "epoch": 1.53, + "learning_rate": 0.0002514932402249875, + "loss": 0.1103, + "step": 227600 + }, + { + "epoch": 1.53, + "learning_rate": 0.00025131039693050575, + "loss": 0.1084, + "step": 227800 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002511274794698881, + "loss": 0.1109, + "step": 228000 + }, + { + "epoch": 1.54, + "learning_rate": 0.000250944488047964, + "loss": 0.1005, + "step": 228200 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002507614228696462, + "loss": 0.105, + "step": 228400 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002505782841399298, + "loss": 0.1013, + "step": 228600 + }, + { + "epoch": 1.54, + "learning_rate": 0.00025039507206389233, + "loss": 0.1049, + "step": 228800 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002502117868466933, + "loss": 0.0967, + "step": 229000 + }, + { + "epoch": 1.54, + "learning_rate": 0.0002500284286935744, + "loss": 0.0953, + "step": 229200 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002498449978098587, + "loss": 0.0965, + "step": 229400 + }, + { + "epoch": 1.55, + "learning_rate": 0.00024966149440095104, + "loss": 0.1028, + "step": 229600 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002494779186723371, + "loss": 0.1118, + "step": 229800 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002492951892478414, + "loss": 0.1011, + "step": 230000 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002491114698556271, + "loss": 0.1075, + "step": 230200 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002489276787596204, + "loss": 0.0981, + "step": 230400 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002487438161656293, + "loss": 0.108, + "step": 230600 + }, + { + "epoch": 1.55, + "learning_rate": 0.0002485598822795418, + "loss": 0.1076, + "step": 230800 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002483758773073258, + "loss": 0.1204, + "step": 231000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002481918014550289, + "loss": 0.1068, + "step": 231200 + }, + { + "epoch": 1.56, + "learning_rate": 0.00024800765492877794, + "loss": 0.1066, + "step": 231400 + }, + { + "epoch": 1.56, + "learning_rate": 0.000247823437934779, + "loss": 0.1072, + "step": 231600 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002476391506793169, + "loss": 0.1118, + "step": 231800 + }, + { + "epoch": 1.56, + "learning_rate": 0.00024745479336875533, + "loss": 0.1003, + "step": 232000 + }, + { + "epoch": 1.56, + "learning_rate": 0.0002472703662095364, + "loss": 0.1214, + "step": 232200 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002470858694081803, + "loss": 0.1063, + "step": 232400 + }, + { + "epoch": 1.57, + "learning_rate": 0.00024690222617484883, + "loss": 0.1117, + "step": 232600 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002467185144021892, + "loss": 0.1019, + "step": 232800 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002465338106035194, + "loss": 0.1018, + "step": 233000 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002463490379875025, + "loss": 0.1261, + "step": 233200 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002461641967610455, + "loss": 0.103, + "step": 233400 + }, + { + "epoch": 1.57, + "learning_rate": 0.0002459792871311324, + "loss": 0.1017, + "step": 233600 + }, + { + "epoch": 1.58, + "learning_rate": 0.00024579430930482364, + "loss": 0.0997, + "step": 233800 + }, + { + "epoch": 1.58, + "learning_rate": 0.00024560926348925633, + "loss": 0.1041, + "step": 234000 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002454241498916433, + "loss": 0.1037, + "step": 234200 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002452389687192737, + "loss": 0.1114, + "step": 234400 + }, + { + "epoch": 1.58, + "learning_rate": 0.00024505372017951196, + "loss": 0.1064, + "step": 234600 + }, + { + "epoch": 1.58, + "learning_rate": 0.00024486840447979826, + "loss": 0.1128, + "step": 234800 + }, + { + "epoch": 1.58, + "learning_rate": 0.00024468302182764775, + "loss": 0.107, + "step": 235000 + }, + { + "epoch": 1.58, + "learning_rate": 0.0002444975724306508, + "loss": 0.1024, + "step": 235200 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002443120564964722, + "loss": 0.1176, + "step": 235400 + }, + { + "epoch": 1.59, + "learning_rate": 0.00024412647423285164, + "loss": 0.0995, + "step": 235600 + }, + { + "epoch": 1.59, + "learning_rate": 0.00024394082584760268, + "loss": 0.1047, + "step": 235800 + }, + { + "epoch": 1.59, + "learning_rate": 0.00024375511154861332, + "loss": 0.1159, + "step": 236000 + }, + { + "epoch": 1.59, + "learning_rate": 0.00024356933154384496, + "loss": 0.1087, + "step": 236200 + }, + { + "epoch": 1.59, + "learning_rate": 0.00024338348604133295, + "loss": 0.1051, + "step": 236400 + }, + { + "epoch": 1.59, + "learning_rate": 0.0002431975752491857, + "loss": 0.1116, + "step": 236600 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024301252941649857, + "loss": 0.1048, + "step": 236800 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024282648899354658, + "loss": 0.0991, + "step": 237000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024264038390468088, + "loss": 0.1106, + "step": 237200 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024245421435830072, + "loss": 0.0987, + "step": 237400 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024226798056287748, + "loss": 0.1062, + "step": 237600 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024208168272695445, + "loss": 0.106, + "step": 237800 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024189532105914677, + "loss": 0.1128, + "step": 238000 + }, + { + "epoch": 1.6, + "learning_rate": 0.00024170889576814088, + "loss": 0.1094, + "step": 238200 + }, + { + "epoch": 1.61, + "learning_rate": 0.00024152240706269457, + "loss": 0.121, + "step": 238400 + }, + { + "epoch": 1.61, + "learning_rate": 0.00024133585515163664, + "loss": 0.1045, + "step": 238600 + }, + { + "epoch": 1.61, + "learning_rate": 0.00024114924024386668, + "loss": 0.1022, + "step": 238800 + }, + { + "epoch": 1.61, + "learning_rate": 0.00024096349609267103, + "loss": 0.1023, + "step": 239000 + }, + { + "epoch": 1.61, + "learning_rate": 0.00024077675613083096, + "loss": 0.1093, + "step": 239200 + }, + { + "epoch": 1.61, + "learning_rate": 0.000240589953798354, + "loss": 0.1054, + "step": 239400 + }, + { + "epoch": 1.61, + "learning_rate": 0.00024040308930442034, + "loss": 0.1011, + "step": 239600 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002402161628582794, + "loss": 0.0995, + "step": 239800 + }, + { + "epoch": 1.62, + "learning_rate": 0.00024002917466925026, + "loss": 0.1062, + "step": 240000 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002398421249467209, + "loss": 0.1071, + "step": 240200 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023965594960757828, + "loss": 0.1081, + "step": 240400 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023946877775153958, + "loss": 0.1126, + "step": 240600 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023928154498952938, + "loss": 0.1077, + "step": 240800 + }, + { + "epoch": 1.62, + "learning_rate": 0.00023909425153120962, + "loss": 0.1136, + "step": 241000 + }, + { + "epoch": 1.62, + "learning_rate": 0.0002389068975863103, + "loss": 0.1101, + "step": 241200 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023871948336462903, + "loss": 0.0935, + "step": 241400 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023853200907603106, + "loss": 0.1068, + "step": 241600 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002383444749304487, + "loss": 0.1098, + "step": 241800 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002381568811378815, + "loss": 0.1166, + "step": 242000 + }, + { + "epoch": 1.63, + "learning_rate": 0.0002379692279083957, + "loss": 0.1049, + "step": 242200 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023778151545212405, + "loss": 0.1021, + "step": 242400 + }, + { + "epoch": 1.63, + "learning_rate": 0.00023759374397926578, + "loss": 0.1157, + "step": 242600 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023740591370008605, + "loss": 0.0974, + "step": 242800 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023721802482491592, + "loss": 0.1002, + "step": 243000 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023703007756415207, + "loss": 0.0989, + "step": 243200 + }, + { + "epoch": 1.64, + "learning_rate": 0.0002368420721282566, + "loss": 0.1032, + "step": 243400 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023665400872775662, + "loss": 0.1085, + "step": 243600 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023646588757324441, + "loss": 0.1113, + "step": 243800 + }, + { + "epoch": 1.64, + "learning_rate": 0.00023627770887537663, + "loss": 0.1086, + "step": 244000 + }, + { + "epoch": 1.65, + "learning_rate": 0.00023608947284487457, + "loss": 0.0993, + "step": 244200 + }, + { + "epoch": 1.65, + "learning_rate": 0.0002359021213000272, + "loss": 0.1198, + "step": 244400 + }, + { + "epoch": 1.65, + "learning_rate": 0.00023571377152070707, + "loss": 0.1126, + "step": 244600 + }, + { + "epoch": 1.65, + "learning_rate": 0.00023552536504024596, + "loss": 0.1061, + "step": 244800 + }, + { + "epoch": 1.65, + "learning_rate": 0.00023533690206961993, + "loss": 0.1046, + "step": 245000 + }, + { + "epoch": 1.65, + "learning_rate": 0.0002351483828198688, + "loss": 0.1033, + "step": 245200 + }, + { + "epoch": 1.65, + "learning_rate": 0.00023495980750209484, + "loss": 0.1094, + "step": 245400 + }, + { + "epoch": 1.65, + "learning_rate": 0.00023477117632746366, + "loss": 0.0984, + "step": 245600 + }, + { + "epoch": 1.66, + "learning_rate": 0.00023458248950720297, + "loss": 0.1101, + "step": 245800 + }, + { + "epoch": 1.66, + "learning_rate": 0.00023439374725260314, + "loss": 0.108, + "step": 246000 + }, + { + "epoch": 1.66, + "learning_rate": 0.00023420494977501635, + "loss": 0.1068, + "step": 246200 + }, + { + "epoch": 1.66, + "learning_rate": 0.00023401704168479406, + "loss": 0.103, + "step": 246400 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002338281346690118, + "loss": 0.1124, + "step": 246600 + }, + { + "epoch": 1.66, + "learning_rate": 0.00023363917306361173, + "loss": 0.103, + "step": 246800 + }, + { + "epoch": 1.66, + "learning_rate": 0.0002334501570801918, + "loss": 0.1052, + "step": 247000 + }, + { + "epoch": 1.67, + "learning_rate": 0.00023326108693041077, + "loss": 0.1122, + "step": 247200 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002330719628259882, + "loss": 0.1069, + "step": 247400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00023288278497870388, + "loss": 0.106, + "step": 247600 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002326935536003979, + "loss": 0.106, + "step": 247800 + }, + { + "epoch": 1.67, + "learning_rate": 0.0002325042689029703, + "loss": 0.1027, + "step": 248000 + }, + { + "epoch": 1.67, + "learning_rate": 0.00023231493109838075, + "loss": 0.1063, + "step": 248200 + }, + { + "epoch": 1.67, + "learning_rate": 0.00023212648748337304, + "loss": 0.1053, + "step": 248400 + }, + { + "epoch": 1.67, + "learning_rate": 0.00023193704436346414, + "loss": 0.1191, + "step": 248600 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023174849637970008, + "loss": 0.1165, + "step": 248800 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023155894878878252, + "loss": 0.0973, + "step": 249000 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023136934914926618, + "loss": 0.1023, + "step": 249200 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023117969767346322, + "loss": 0.1, + "step": 249400 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023098999457374426, + "loss": 0.1107, + "step": 249600 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023080024006253745, + "loss": 0.1013, + "step": 249800 + }, + { + "epoch": 1.68, + "learning_rate": 0.00023061043435232856, + "loss": 0.0996, + "step": 250000 + }, + { + "epoch": 1.69, + "learning_rate": 0.00023042057765566083, + "loss": 0.1205, + "step": 250200 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002302306701851344, + "loss": 0.115, + "step": 250400 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002300407121534064, + "loss": 0.0969, + "step": 250600 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002298507037731906, + "loss": 0.1042, + "step": 250800 + }, + { + "epoch": 1.69, + "learning_rate": 0.000229660645257257, + "loss": 0.112, + "step": 251000 + }, + { + "epoch": 1.69, + "learning_rate": 0.0002294705368184319, + "loss": 0.1101, + "step": 251200 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022928037866959736, + "loss": 0.1112, + "step": 251400 + }, + { + "epoch": 1.69, + "learning_rate": 0.00022909017102369127, + "loss": 0.0958, + "step": 251600 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002288999140937068, + "loss": 0.115, + "step": 251800 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022870960809269237, + "loss": 0.0982, + "step": 252000 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022852020512922758, + "loss": 0.101, + "step": 252200 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022832980186821158, + "loss": 0.0993, + "step": 252400 + }, + { + "epoch": 1.7, + "learning_rate": 0.0002281393501745734, + "loss": 0.1095, + "step": 252600 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022794885026157955, + "loss": 0.102, + "step": 252800 + }, + { + "epoch": 1.7, + "learning_rate": 0.00022775830234255057, + "loss": 0.0989, + "step": 253000 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022756770663086073, + "loss": 0.104, + "step": 253200 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002273770633399378, + "loss": 0.1069, + "step": 253400 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022718637268326274, + "loss": 0.1034, + "step": 253600 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022699563487436978, + "loss": 0.1017, + "step": 253800 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022680485012684578, + "loss": 0.1064, + "step": 254000 + }, + { + "epoch": 1.71, + "learning_rate": 0.0002266140186543302, + "loss": 0.1003, + "step": 254200 + }, + { + "epoch": 1.71, + "learning_rate": 0.00022642409517577717, + "loss": 0.1237, + "step": 254400 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022623317112536182, + "loss": 0.1315, + "step": 254600 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022604220099011713, + "loss": 0.1103, + "step": 254800 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022585118498389016, + "loss": 0.1142, + "step": 255000 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022566012332057937, + "loss": 0.1058, + "step": 255200 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022546901621413423, + "loss": 0.098, + "step": 255400 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022527786387855522, + "loss": 0.1109, + "step": 255600 + }, + { + "epoch": 1.72, + "learning_rate": 0.00022508666652789347, + "loss": 0.1164, + "step": 255800 + }, + { + "epoch": 1.72, + "learning_rate": 0.0002248954243762504, + "loss": 0.1031, + "step": 256000 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002247041376377777, + "loss": 0.123, + "step": 256200 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022451280652667694, + "loss": 0.1113, + "step": 256400 + }, + { + "epoch": 1.73, + "learning_rate": 0.0002243214312571994, + "loss": 0.1076, + "step": 256600 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022413001204364582, + "loss": 0.1129, + "step": 256800 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022393854910036614, + "loss": 0.0994, + "step": 257000 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022374800028194188, + "loss": 0.124, + "step": 257200 + }, + { + "epoch": 1.73, + "learning_rate": 0.00022355645073842626, + "loss": 0.0966, + "step": 257400 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022336485810745454, + "loss": 0.1031, + "step": 257600 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022317322260357087, + "loss": 0.1166, + "step": 257800 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022298154444136742, + "loss": 0.1128, + "step": 258000 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022278982383548408, + "loss": 0.1087, + "step": 258200 + }, + { + "epoch": 1.74, + "learning_rate": 0.0002225980610006082, + "loss": 0.1097, + "step": 258400 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022240625615147454, + "loss": 0.104, + "step": 258600 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022221440950286487, + "loss": 0.1104, + "step": 258800 + }, + { + "epoch": 1.74, + "learning_rate": 0.00022202252126960782, + "loss": 0.1102, + "step": 259000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022183059166657837, + "loss": 0.1033, + "step": 259200 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002216386209086982, + "loss": 0.1034, + "step": 259400 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002214466092109346, + "loss": 0.1014, + "step": 259600 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022125455678830122, + "loss": 0.1118, + "step": 259800 + }, + { + "epoch": 1.75, + "learning_rate": 0.0002210624638558569, + "loss": 0.1032, + "step": 260000 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022087033062870605, + "loss": 0.0966, + "step": 260200 + }, + { + "epoch": 1.75, + "learning_rate": 0.00022067815732199807, + "loss": 0.107, + "step": 260400 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022048690531558938, + "loss": 0.1213, + "step": 260600 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022029465269310516, + "loss": 0.1062, + "step": 260800 + }, + { + "epoch": 1.76, + "learning_rate": 0.00022010236063570412, + "loss": 0.0971, + "step": 261000 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021991002935871357, + "loss": 0.1026, + "step": 261200 + }, + { + "epoch": 1.76, + "learning_rate": 0.0002197176590775048, + "loss": 0.1056, + "step": 261400 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021952525000749266, + "loss": 0.1076, + "step": 261600 + }, + { + "epoch": 1.76, + "learning_rate": 0.00021933280236413567, + "loss": 0.1034, + "step": 261800 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021914031636293516, + "loss": 0.105, + "step": 262000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021894779221943593, + "loss": 0.1006, + "step": 262200 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021875523014922496, + "loss": 0.0986, + "step": 262400 + }, + { + "epoch": 1.77, + "learning_rate": 0.0002185626303679321, + "loss": 0.1038, + "step": 262600 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021837095637052567, + "loss": 0.1153, + "step": 262800 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021817828199998843, + "loss": 0.1022, + "step": 263000 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021798557056443188, + "loss": 0.1056, + "step": 263200 + }, + { + "epoch": 1.77, + "learning_rate": 0.00021779282227965291, + "loss": 0.1112, + "step": 263400 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021760003736148975, + "loss": 0.1254, + "step": 263600 + }, + { + "epoch": 1.78, + "learning_rate": 0.0002174072160258217, + "loss": 0.0986, + "step": 263800 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021721435848856872, + "loss": 0.1134, + "step": 264000 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021702146496569135, + "loss": 0.1026, + "step": 264200 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021682853567319053, + "loss": 0.1051, + "step": 264400 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021663557082710706, + "loss": 0.1032, + "step": 264600 + }, + { + "epoch": 1.78, + "learning_rate": 0.00021644257064352173, + "loss": 0.1088, + "step": 264800 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021624953533855483, + "loss": 0.1064, + "step": 265000 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021605646512836595, + "loss": 0.1037, + "step": 265200 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021586336022915377, + "loss": 0.1051, + "step": 265400 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021567022085715594, + "loss": 0.1153, + "step": 265600 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021547704722864853, + "loss": 0.1095, + "step": 265800 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002152838395599461, + "loss": 0.1001, + "step": 266000 + }, + { + "epoch": 1.79, + "learning_rate": 0.00021509059806740124, + "loss": 0.1076, + "step": 266200 + }, + { + "epoch": 1.79, + "learning_rate": 0.0002148973229674045, + "loss": 0.1002, + "step": 266400 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021470401447638398, + "loss": 0.1111, + "step": 266600 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002145106728108052, + "loss": 0.1106, + "step": 266800 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021431729818717092, + "loss": 0.1043, + "step": 267000 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002141248579399327, + "loss": 0.1007, + "step": 267200 + }, + { + "epoch": 1.8, + "learning_rate": 0.00021393141821192865, + "loss": 0.0938, + "step": 267400 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002137379461745144, + "loss": 0.112, + "step": 267600 + }, + { + "epoch": 1.8, + "learning_rate": 0.0002135444420443386, + "loss": 0.097, + "step": 267800 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002133509060380859, + "loss": 0.0971, + "step": 268000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021315733837247666, + "loss": 0.1002, + "step": 268200 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021296373926426643, + "loss": 0.1044, + "step": 268400 + }, + { + "epoch": 1.81, + "learning_rate": 0.0002127701089302464, + "loss": 0.1069, + "step": 268600 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021257644758724233, + "loss": 0.1053, + "step": 268800 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021238275545211504, + "loss": 0.0994, + "step": 269000 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021218903274175944, + "loss": 0.1059, + "step": 269200 + }, + { + "epoch": 1.81, + "learning_rate": 0.00021199527967310516, + "loss": 0.1064, + "step": 269400 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021180246545378291, + "loss": 0.1124, + "step": 269600 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021160865246853668, + "loss": 0.1018, + "step": 269800 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021141480977489752, + "loss": 0.1083, + "step": 270000 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021122093758992914, + "loss": 0.1062, + "step": 270200 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021102703613072832, + "loss": 0.107, + "step": 270400 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021083310561442455, + "loss": 0.1007, + "step": 270600 + }, + { + "epoch": 1.82, + "learning_rate": 0.00021063914625817995, + "loss": 0.1065, + "step": 270800 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021044612828992364, + "loss": 0.1126, + "step": 271000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021025211204689966, + "loss": 0.1022, + "step": 271200 + }, + { + "epoch": 1.83, + "learning_rate": 0.00021005806761452758, + "loss": 0.0992, + "step": 271400 + }, + { + "epoch": 1.83, + "learning_rate": 0.00020986399521009683, + "loss": 0.0991, + "step": 271600 + }, + { + "epoch": 1.83, + "learning_rate": 0.00020966989505092856, + "loss": 0.1072, + "step": 271800 + }, + { + "epoch": 1.83, + "learning_rate": 0.00020947576735437463, + "loss": 0.0948, + "step": 272000 + }, + { + "epoch": 1.83, + "learning_rate": 0.00020928161233781808, + "loss": 0.1053, + "step": 272200 + }, + { + "epoch": 1.84, + "learning_rate": 0.00020908743021867218, + "loss": 0.1007, + "step": 272400 + }, + { + "epoch": 1.84, + "learning_rate": 0.00020889322121438094, + "loss": 0.0949, + "step": 272600 + }, + { + "epoch": 1.84, + "learning_rate": 0.00020869898554241816, + "loss": 0.1083, + "step": 272800 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002085047234202877, + "loss": 0.1076, + "step": 273000 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002083114065721905, + "loss": 0.1129, + "step": 273200 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002081170923318883, + "loss": 0.0989, + "step": 273400 + }, + { + "epoch": 1.84, + "learning_rate": 0.00020792275229301839, + "loss": 0.1126, + "step": 273600 + }, + { + "epoch": 1.84, + "learning_rate": 0.0002077283866732015, + "loss": 0.1027, + "step": 273800 + }, + { + "epoch": 1.85, + "learning_rate": 0.00020753399569008686, + "loss": 0.1086, + "step": 274000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00020733957956135224, + "loss": 0.1178, + "step": 274200 + }, + { + "epoch": 1.85, + "learning_rate": 0.00020714513850470342, + "loss": 0.1053, + "step": 274400 + }, + { + "epoch": 1.85, + "learning_rate": 0.00020695067273787424, + "loss": 0.0955, + "step": 274600 + }, + { + "epoch": 1.85, + "learning_rate": 0.00020675618247862612, + "loss": 0.1083, + "step": 274800 + }, + { + "epoch": 1.85, + "learning_rate": 0.0002065616679447479, + "loss": 0.1019, + "step": 275000 + }, + { + "epoch": 1.85, + "learning_rate": 0.00020636712935405562, + "loss": 0.1158, + "step": 275200 + }, + { + "epoch": 1.86, + "learning_rate": 0.00020617256692439238, + "loss": 0.1004, + "step": 275400 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002059789538622786, + "loss": 0.1307, + "step": 275600 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002057843445247827, + "loss": 0.1028, + "step": 275800 + }, + { + "epoch": 1.86, + "learning_rate": 0.00020558971200091428, + "loss": 0.1094, + "step": 276000 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002053950565086214, + "loss": 0.1136, + "step": 276200 + }, + { + "epoch": 1.86, + "learning_rate": 0.00020520037826587812, + "loss": 0.104, + "step": 276400 + }, + { + "epoch": 1.86, + "learning_rate": 0.0002050056774906837, + "loss": 0.108, + "step": 276600 + }, + { + "epoch": 1.86, + "learning_rate": 0.00020481095440106272, + "loss": 0.1083, + "step": 276800 + }, + { + "epoch": 1.87, + "learning_rate": 0.00020461620921506486, + "loss": 0.1, + "step": 277000 + }, + { + "epoch": 1.87, + "learning_rate": 0.0002044214421507644, + "loss": 0.1045, + "step": 277200 + }, + { + "epoch": 1.87, + "learning_rate": 0.00020422665342626015, + "loss": 0.1069, + "step": 277400 + }, + { + "epoch": 1.87, + "learning_rate": 0.00020403184325967527, + "loss": 0.1211, + "step": 277600 + }, + { + "epoch": 1.87, + "learning_rate": 0.00020383701186915681, + "loss": 0.0911, + "step": 277800 + }, + { + "epoch": 1.87, + "learning_rate": 0.00020364215947287566, + "loss": 0.1028, + "step": 278000 + }, + { + "epoch": 1.87, + "learning_rate": 0.00020344728628902614, + "loss": 0.1103, + "step": 278200 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020325239253582604, + "loss": 0.0972, + "step": 278400 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002030574784315159, + "loss": 0.1101, + "step": 278600 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002028625441943593, + "loss": 0.1062, + "step": 278800 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020266759004264228, + "loss": 0.1075, + "step": 279000 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020247261619467312, + "loss": 0.0943, + "step": 279200 + }, + { + "epoch": 1.88, + "learning_rate": 0.00020227762286878224, + "loss": 0.0966, + "step": 279400 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002020835853937961, + "loss": 0.105, + "step": 279600 + }, + { + "epoch": 1.88, + "learning_rate": 0.0002018885538618027, + "loss": 0.0999, + "step": 279800 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020169350350591656, + "loss": 0.1132, + "step": 280000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020149940993528039, + "loss": 0.105, + "step": 280200 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020130432267826973, + "loss": 0.1023, + "step": 280400 + }, + { + "epoch": 1.89, + "learning_rate": 0.0002011092172515844, + "loss": 0.0991, + "step": 280600 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020091409387370222, + "loss": 0.1192, + "step": 280800 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020071895276312101, + "loss": 0.1115, + "step": 281000 + }, + { + "epoch": 1.89, + "learning_rate": 0.00020052379413835846, + "loss": 0.1081, + "step": 281200 + }, + { + "epoch": 1.9, + "learning_rate": 0.00020032861821795184, + "loss": 0.1058, + "step": 281400 + }, + { + "epoch": 1.9, + "learning_rate": 0.0002001334252204578, + "loss": 0.1074, + "step": 281600 + }, + { + "epoch": 1.9, + "learning_rate": 0.00019993821536445222, + "loss": 0.1086, + "step": 281800 + }, + { + "epoch": 1.9, + "learning_rate": 0.00019974298886852966, + "loss": 0.1091, + "step": 282000 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001995477459513035, + "loss": 0.1083, + "step": 282200 + }, + { + "epoch": 1.9, + "learning_rate": 0.0001993524868314054, + "loss": 0.1123, + "step": 282400 + }, + { + "epoch": 1.9, + "learning_rate": 0.00019915721172748514, + "loss": 0.0978, + "step": 282600 + }, + { + "epoch": 1.91, + "learning_rate": 0.0001989628973514115, + "loss": 0.1267, + "step": 282800 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019876759101265725, + "loss": 0.0962, + "step": 283000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019857226934484322, + "loss": 0.1077, + "step": 283200 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019837693256668934, + "loss": 0.1028, + "step": 283400 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019818158089693235, + "loss": 0.1053, + "step": 283600 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019798621455432573, + "loss": 0.104, + "step": 283800 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019779083375763941, + "loss": 0.0913, + "step": 284000 + }, + { + "epoch": 1.91, + "learning_rate": 0.00019759543872565946, + "loss": 0.1018, + "step": 284200 + }, + { + "epoch": 1.92, + "learning_rate": 0.0001974000296771879, + "loss": 0.1131, + "step": 284400 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019720460683104248, + "loss": 0.1039, + "step": 284600 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019700917040605631, + "loss": 0.1166, + "step": 284800 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019681469790287367, + "loss": 0.1165, + "step": 285000 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019661923504192736, + "loss": 0.1031, + "step": 285200 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019642375925763573, + "loss": 0.1168, + "step": 285400 + }, + { + "epoch": 1.92, + "learning_rate": 0.00019622827076889117, + "loss": 0.1137, + "step": 285600 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019603276979460036, + "loss": 0.102, + "step": 285800 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019583725655368395, + "loss": 0.1076, + "step": 286000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019564173126507638, + "loss": 0.1038, + "step": 286200 + }, + { + "epoch": 1.93, + "learning_rate": 0.0001954461941477256, + "loss": 0.1135, + "step": 286400 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019525064542059252, + "loss": 0.1059, + "step": 286600 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019505508530265155, + "loss": 0.1097, + "step": 286800 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019485951401288927, + "loss": 0.1102, + "step": 287000 + }, + { + "epoch": 1.93, + "learning_rate": 0.00019466393177030537, + "loss": 0.1019, + "step": 287200 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019446833879391132, + "loss": 0.095, + "step": 287400 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019427273530273097, + "loss": 0.1002, + "step": 287600 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019407712151579958, + "loss": 0.1048, + "step": 287800 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001938814976521643, + "loss": 0.0957, + "step": 288000 + }, + { + "epoch": 1.94, + "learning_rate": 0.0001936858639308833, + "loss": 0.1086, + "step": 288200 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019349022057102595, + "loss": 0.1094, + "step": 288400 + }, + { + "epoch": 1.94, + "learning_rate": 0.00019329456779167232, + "loss": 0.1095, + "step": 288600 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019309988414433548, + "loss": 0.1254, + "step": 288800 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019290421322763307, + "loss": 0.1082, + "step": 289000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019270853354764138, + "loss": 0.1007, + "step": 289200 + }, + { + "epoch": 1.95, + "learning_rate": 0.0001925128453234813, + "loss": 0.109, + "step": 289400 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019231714877428302, + "loss": 0.1117, + "step": 289600 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019212144411918632, + "loss": 0.1033, + "step": 289800 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019192573157733988, + "loss": 0.1046, + "step": 290000 + }, + { + "epoch": 1.95, + "learning_rate": 0.00019173001136790129, + "loss": 0.1005, + "step": 290200 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001915342837100367, + "loss": 0.1151, + "step": 290400 + }, + { + "epoch": 1.96, + "learning_rate": 0.00019133854882292064, + "loss": 0.0974, + "step": 290600 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001911428069257357, + "loss": 0.1024, + "step": 290800 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001909470582376723, + "loss": 0.1075, + "step": 291000 + }, + { + "epoch": 1.96, + "learning_rate": 0.00019075130297792843, + "loss": 0.1117, + "step": 291200 + }, + { + "epoch": 1.96, + "learning_rate": 0.0001905555413657096, + "loss": 0.1029, + "step": 291400 + }, + { + "epoch": 1.96, + "learning_rate": 0.00019035977362022831, + "loss": 0.1056, + "step": 291600 + }, + { + "epoch": 1.97, + "learning_rate": 0.00019016399996070392, + "loss": 0.1045, + "step": 291800 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018996822060636244, + "loss": 0.1058, + "step": 292000 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018977243577643626, + "loss": 0.106, + "step": 292200 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018957664569016394, + "loss": 0.098, + "step": 292400 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018938085056678978, + "loss": 0.1008, + "step": 292600 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018918602963689183, + "loss": 0.1052, + "step": 292800 + }, + { + "epoch": 1.97, + "learning_rate": 0.00018899022511951726, + "loss": 0.1062, + "step": 293000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018879441622171074, + "loss": 0.1033, + "step": 293200 + }, + { + "epoch": 1.98, + "learning_rate": 0.0001885986031627376, + "loss": 0.0977, + "step": 293400 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018840278616186814, + "loss": 0.0991, + "step": 293600 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018820696543837674, + "loss": 0.1059, + "step": 293800 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018801114121154224, + "loss": 0.1171, + "step": 294000 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018781629284600801, + "loss": 0.1001, + "step": 294200 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018762046228511725, + "loss": 0.1031, + "step": 294400 + }, + { + "epoch": 1.98, + "learning_rate": 0.00018742462887764543, + "loss": 0.1137, + "step": 294600 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018722879284288572, + "loss": 0.1026, + "step": 294800 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001870329544001338, + "loss": 0.0991, + "step": 295000 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018683711376868837, + "loss": 0.1004, + "step": 295200 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001866412711678503, + "loss": 0.101, + "step": 295400 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018644542681692293, + "loss": 0.0959, + "step": 295600 + }, + { + "epoch": 1.99, + "learning_rate": 0.00018624958093521127, + "loss": 0.1299, + "step": 295800 + }, + { + "epoch": 1.99, + "learning_rate": 0.0001860537337420224, + "loss": 0.1124, + "step": 296000 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018585788545666447, + "loss": 0.0973, + "step": 296200 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018566203629844722, + "loss": 0.1098, + "step": 296400 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018546618648668112, + "loss": 0.0972, + "step": 296600 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001852713154926249, + "loss": 0.108, + "step": 296800 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.9392292210730847, + "eval_auc": 0.8528852594008389, + "eval_f1": 0.34279097031908445, + "eval_loss": 0.45720356702804565, + "eval_mcc": 0.3902260284864879, + "eval_precision": 0.22106795124992942, + "eval_recall": 0.7627978181868509, + "eval_runtime": 9308.0675, + "eval_samples_per_second": 23.702, + "eval_steps_per_second": 3.95, + "step": 296880 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001850754650322246, + "loss": 0.1136, + "step": 297000 + }, + { + "epoch": 2.0, + "learning_rate": 0.000184879614575114, + "loss": 0.1027, + "step": 297200 + }, + { + "epoch": 2.0, + "learning_rate": 0.00018468376434060496, + "loss": 0.1059, + "step": 297400 + }, + { + "epoch": 2.0, + "learning_rate": 0.0001844879145480095, + "loss": 0.0996, + "step": 297600 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018429206541663873, + "loss": 0.0973, + "step": 297800 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001840962171658032, + "loss": 0.107, + "step": 298000 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018390037001481238, + "loss": 0.116, + "step": 298200 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001837045241829747, + "loss": 0.1054, + "step": 298400 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001835086798895969, + "loss": 0.0951, + "step": 298600 + }, + { + "epoch": 2.01, + "learning_rate": 0.00018331283735398417, + "loss": 0.0998, + "step": 298800 + }, + { + "epoch": 2.01, + "learning_rate": 0.0001831169967954395, + "loss": 0.104, + "step": 299000 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018292115843326396, + "loss": 0.0909, + "step": 299200 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018272532248675595, + "loss": 0.1104, + "step": 299400 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018252948917521128, + "loss": 0.1006, + "step": 299600 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018233365871792265, + "loss": 0.1104, + "step": 299800 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018213783133417977, + "loss": 0.104, + "step": 300000 + }, + { + "epoch": 2.02, + "learning_rate": 0.00018194200724326877, + "loss": 0.1022, + "step": 300200 + }, + { + "epoch": 2.02, + "learning_rate": 0.0001817461866644722, + "loss": 0.0947, + "step": 300400 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018155036981706848, + "loss": 0.0978, + "step": 300600 + }, + { + "epoch": 2.03, + "learning_rate": 0.0001813545569203321, + "loss": 0.0989, + "step": 300800 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018115972722643154, + "loss": 0.1064, + "step": 301000 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018096392286634348, + "loss": 0.1014, + "step": 301200 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018076812311362204, + "loss": 0.1056, + "step": 301400 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018057232818752245, + "loss": 0.1015, + "step": 301600 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018037653830729451, + "loss": 0.1128, + "step": 301800 + }, + { + "epoch": 2.03, + "learning_rate": 0.00018018075369218234, + "loss": 0.1058, + "step": 302000 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017998497456142435, + "loss": 0.1074, + "step": 302200 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001797892011342526, + "loss": 0.0964, + "step": 302400 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017959343362989282, + "loss": 0.1082, + "step": 302600 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001793976722675641, + "loss": 0.0964, + "step": 302800 + }, + { + "epoch": 2.04, + "learning_rate": 0.00017920191726647884, + "loss": 0.0953, + "step": 303000 + }, + { + "epoch": 2.04, + "learning_rate": 0.000179006168845842, + "loss": 0.1006, + "step": 303200 + }, + { + "epoch": 2.04, + "learning_rate": 0.0001788104272248514, + "loss": 0.1027, + "step": 303400 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017861567127788598, + "loss": 0.1163, + "step": 303600 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017841994387701507, + "loss": 0.097, + "step": 303800 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017822422393224112, + "loss": 0.1049, + "step": 304000 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017802851166272992, + "loss": 0.116, + "step": 304200 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017783280728763884, + "loss": 0.1055, + "step": 304400 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017763711102611625, + "loss": 0.1174, + "step": 304600 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017744142309730155, + "loss": 0.0937, + "step": 304800 + }, + { + "epoch": 2.05, + "learning_rate": 0.00017724574372032464, + "loss": 0.1038, + "step": 305000 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017705007311430605, + "loss": 0.1094, + "step": 305200 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017685441149835636, + "loss": 0.1074, + "step": 305400 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017665875909157614, + "loss": 0.1146, + "step": 305600 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017646311611305553, + "loss": 0.0962, + "step": 305800 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017626943906671251, + "loss": 0.0977, + "step": 306000 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017607381550219104, + "loss": 0.1037, + "step": 306200 + }, + { + "epoch": 2.06, + "learning_rate": 0.00017587820202094516, + "loss": 0.0944, + "step": 306400 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017568259884202144, + "loss": 0.0989, + "step": 306600 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017548700618445504, + "loss": 0.0962, + "step": 306800 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017529142426726916, + "loss": 0.1016, + "step": 307000 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017509585330947522, + "loss": 0.1001, + "step": 307200 + }, + { + "epoch": 2.07, + "learning_rate": 0.0001749002935300722, + "loss": 0.1036, + "step": 307400 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017470474514804667, + "loss": 0.103, + "step": 307600 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017450920838237228, + "loss": 0.1031, + "step": 307800 + }, + { + "epoch": 2.07, + "learning_rate": 0.00017431368345200983, + "loss": 0.0962, + "step": 308000 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017411817057590677, + "loss": 0.1057, + "step": 308200 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017392266997299716, + "loss": 0.0948, + "step": 308400 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017372718186220115, + "loss": 0.0956, + "step": 308600 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017353170646242503, + "loss": 0.1022, + "step": 308800 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017333624399256084, + "loss": 0.1114, + "step": 309000 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017314079467148615, + "loss": 0.1072, + "step": 309200 + }, + { + "epoch": 2.08, + "learning_rate": 0.00017294535871806367, + "loss": 0.1057, + "step": 309400 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017274993635114138, + "loss": 0.1031, + "step": 309600 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017255452778955184, + "loss": 0.1013, + "step": 309800 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017236011018955237, + "loss": 0.1011, + "step": 310000 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017216472982330518, + "loss": 0.1136, + "step": 310200 + }, + { + "epoch": 2.09, + "learning_rate": 0.0001719693639177011, + "loss": 0.0908, + "step": 310400 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017177401269150938, + "loss": 0.105, + "step": 310600 + }, + { + "epoch": 2.09, + "learning_rate": 0.00017157867636348315, + "loss": 0.0948, + "step": 310800 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001713843317204492, + "loss": 0.1161, + "step": 311000 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017118902576772357, + "loss": 0.1001, + "step": 311200 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017099373536822785, + "loss": 0.1033, + "step": 311400 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017079846074064678, + "loss": 0.11, + "step": 311600 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017060320210364753, + "loss": 0.1015, + "step": 311800 + }, + { + "epoch": 2.1, + "learning_rate": 0.00017040795967587942, + "loss": 0.1028, + "step": 312000 + }, + { + "epoch": 2.1, + "learning_rate": 0.0001702127336759737, + "loss": 0.1042, + "step": 312200 + }, + { + "epoch": 2.1, + "learning_rate": 0.000170017524322543, + "loss": 0.097, + "step": 312400 + }, + { + "epoch": 2.11, + "learning_rate": 0.00016982233183418142, + "loss": 0.1065, + "step": 312600 + }, + { + "epoch": 2.11, + "learning_rate": 0.00016962715642946416, + "loss": 0.1038, + "step": 312800 + }, + { + "epoch": 2.11, + "learning_rate": 0.00016943199832694745, + "loss": 0.0971, + "step": 313000 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001692378334041325, + "loss": 0.1186, + "step": 313200 + }, + { + "epoch": 2.11, + "learning_rate": 0.00016904271047236735, + "loss": 0.1023, + "step": 313400 + }, + { + "epoch": 2.11, + "learning_rate": 0.0001688476054972613, + "loss": 0.1107, + "step": 313600 + }, + { + "epoch": 2.11, + "learning_rate": 0.00016865251869729154, + "loss": 0.1021, + "step": 313800 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016845745029091496, + "loss": 0.1033, + "step": 314000 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016826240049656774, + "loss": 0.103, + "step": 314200 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016806736953266542, + "loss": 0.1249, + "step": 314400 + }, + { + "epoch": 2.12, + "learning_rate": 0.0001678723576176023, + "loss": 0.1051, + "step": 314600 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016767736496975142, + "loss": 0.1015, + "step": 314800 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016748239180746412, + "loss": 0.1085, + "step": 315000 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016728743834907016, + "loss": 0.1132, + "step": 315200 + }, + { + "epoch": 2.12, + "learning_rate": 0.00016709250481287705, + "loss": 0.1047, + "step": 315400 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016689759141716998, + "loss": 0.1043, + "step": 315600 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016670269838021167, + "loss": 0.0951, + "step": 315800 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016650782592024204, + "loss": 0.0932, + "step": 316000 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016631297425547797, + "loss": 0.099, + "step": 316200 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016611814360411296, + "loss": 0.1031, + "step": 316400 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016592333418431702, + "loss": 0.0995, + "step": 316600 + }, + { + "epoch": 2.13, + "learning_rate": 0.00016572854621423654, + "loss": 0.104, + "step": 316800 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001655337799119937, + "loss": 0.103, + "step": 317000 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001653400091629659, + "loss": 0.1045, + "step": 317200 + }, + { + "epoch": 2.14, + "learning_rate": 0.0001651452867396053, + "loss": 0.0889, + "step": 317400 + }, + { + "epoch": 2.14, + "learning_rate": 0.00016495058663721225, + "loss": 0.1221, + "step": 317600 + }, + { + "epoch": 2.14, + "learning_rate": 0.00016475688240520132, + "loss": 0.1154, + "step": 317800 + }, + { + "epoch": 2.14, + "learning_rate": 0.00016456222748446265, + "loss": 0.103, + "step": 318000 + }, + { + "epoch": 2.14, + "learning_rate": 0.00016436759553759738, + "loss": 0.0989, + "step": 318200 + }, + { + "epoch": 2.14, + "learning_rate": 0.00016417298678255308, + "loss": 0.0938, + "step": 318400 + }, + { + "epoch": 2.15, + "learning_rate": 0.00016397840143725118, + "loss": 0.0976, + "step": 318600 + }, + { + "epoch": 2.15, + "learning_rate": 0.00016378383971958716, + "loss": 0.0925, + "step": 318800 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001635893018474299, + "loss": 0.1068, + "step": 319000 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001633947880386215, + "loss": 0.116, + "step": 319200 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001632002985109772, + "loss": 0.0956, + "step": 319400 + }, + { + "epoch": 2.15, + "learning_rate": 0.00016300583348228514, + "loss": 0.1011, + "step": 319600 + }, + { + "epoch": 2.15, + "learning_rate": 0.0001628113931703059, + "loss": 0.1048, + "step": 319800 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001626169777927724, + "loss": 0.1002, + "step": 320000 + }, + { + "epoch": 2.16, + "learning_rate": 0.00016242258756738958, + "loss": 0.1011, + "step": 320200 + }, + { + "epoch": 2.16, + "learning_rate": 0.00016222822271183445, + "loss": 0.0942, + "step": 320400 + }, + { + "epoch": 2.16, + "learning_rate": 0.00016203388344375536, + "loss": 0.0989, + "step": 320600 + }, + { + "epoch": 2.16, + "learning_rate": 0.00016183956998077213, + "loss": 0.0939, + "step": 320800 + }, + { + "epoch": 2.16, + "learning_rate": 0.00016164528254047558, + "loss": 0.1083, + "step": 321000 + }, + { + "epoch": 2.16, + "learning_rate": 0.0001614510213404275, + "loss": 0.1013, + "step": 321200 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016125678659816036, + "loss": 0.1113, + "step": 321400 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016106257853117678, + "loss": 0.099, + "step": 321600 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016086839735694964, + "loss": 0.0927, + "step": 321800 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016067424329292172, + "loss": 0.0958, + "step": 322000 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016048205768788416, + "loss": 0.1214, + "step": 322200 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016028795821993542, + "loss": 0.0996, + "step": 322400 + }, + { + "epoch": 2.17, + "learning_rate": 0.00016009388651215772, + "loss": 0.114, + "step": 322600 + }, + { + "epoch": 2.17, + "learning_rate": 0.00015989984278187117, + "loss": 0.1022, + "step": 322800 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015970582724636473, + "loss": 0.1061, + "step": 323000 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015951184012289558, + "loss": 0.1175, + "step": 323200 + }, + { + "epoch": 2.18, + "learning_rate": 0.0001593178816286892, + "loss": 0.1248, + "step": 323400 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015912395198093893, + "loss": 0.0932, + "step": 323600 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015893005139680603, + "loss": 0.1193, + "step": 323800 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015873618009341897, + "loss": 0.1056, + "step": 324000 + }, + { + "epoch": 2.18, + "learning_rate": 0.00015854233828787348, + "loss": 0.1027, + "step": 324200 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015834949518341062, + "loss": 0.1091, + "step": 324400 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001581557128745038, + "loss": 0.0996, + "step": 324600 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001579619607134418, + "loss": 0.0955, + "step": 324800 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015776823891718692, + "loss": 0.1066, + "step": 325000 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015757454770266762, + "loss": 0.0984, + "step": 325200 + }, + { + "epoch": 2.19, + "learning_rate": 0.000157380887286778, + "loss": 0.1073, + "step": 325400 + }, + { + "epoch": 2.19, + "learning_rate": 0.0001571872578863776, + "loss": 0.1041, + "step": 325600 + }, + { + "epoch": 2.19, + "learning_rate": 0.00015699365971829127, + "loss": 0.0897, + "step": 325800 + }, + { + "epoch": 2.2, + "learning_rate": 0.000156800092999309, + "loss": 0.1055, + "step": 326000 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015660655794618556, + "loss": 0.0948, + "step": 326200 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015641305477564005, + "loss": 0.1, + "step": 326400 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015621958370435604, + "loss": 0.1066, + "step": 326600 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015602614494898112, + "loss": 0.0981, + "step": 326800 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015583273872612672, + "loss": 0.1166, + "step": 327000 + }, + { + "epoch": 2.2, + "learning_rate": 0.00015564033203791492, + "loss": 0.1292, + "step": 327200 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015544699136442306, + "loss": 0.0936, + "step": 327400 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015525368387198386, + "loss": 0.1185, + "step": 327600 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015506040977706186, + "loss": 0.1084, + "step": 327800 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001548671692960839, + "loss": 0.1044, + "step": 328000 + }, + { + "epoch": 2.21, + "learning_rate": 0.0001546739626454396, + "loss": 0.1081, + "step": 328200 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015448079004148031, + "loss": 0.111, + "step": 328400 + }, + { + "epoch": 2.21, + "learning_rate": 0.00015428765170051945, + "loss": 0.0915, + "step": 328600 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015409454783883185, + "loss": 0.0924, + "step": 328800 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015390147867265418, + "loss": 0.0999, + "step": 329000 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015370844441818377, + "loss": 0.1082, + "step": 329200 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001535154452915792, + "loss": 0.1013, + "step": 329400 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015332248150895937, + "loss": 0.1045, + "step": 329600 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015312955328640408, + "loss": 0.1024, + "step": 329800 + }, + { + "epoch": 2.22, + "learning_rate": 0.00015293762521283475, + "loss": 0.1203, + "step": 330000 + }, + { + "epoch": 2.22, + "learning_rate": 0.0001527447685779895, + "loss": 0.1025, + "step": 330200 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015255194815012768, + "loss": 0.0996, + "step": 330400 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015235916414516835, + "loss": 0.0987, + "step": 330600 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001521664167789896, + "loss": 0.1076, + "step": 330800 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001519737062674286, + "loss": 0.1026, + "step": 331000 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015178103282628138, + "loss": 0.1008, + "step": 331200 + }, + { + "epoch": 2.23, + "learning_rate": 0.00015158839667130239, + "loss": 0.1033, + "step": 331400 + }, + { + "epoch": 2.23, + "learning_rate": 0.0001513957980182041, + "loss": 0.1095, + "step": 331600 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015120323708265725, + "loss": 0.0961, + "step": 331800 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015101071408029035, + "loss": 0.0913, + "step": 332000 + }, + { + "epoch": 2.24, + "learning_rate": 0.0001508182292266893, + "loss": 0.1027, + "step": 332200 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015062578273739727, + "loss": 0.0969, + "step": 332400 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015043337482791453, + "loss": 0.1114, + "step": 332600 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015024196746240976, + "loss": 0.1279, + "step": 332800 + }, + { + "epoch": 2.24, + "learning_rate": 0.00015004963716328432, + "loss": 0.1134, + "step": 333000 + }, + { + "epoch": 2.24, + "learning_rate": 0.00014985734608913203, + "loss": 0.1116, + "step": 333200 + }, + { + "epoch": 2.25, + "learning_rate": 0.00014966509445527926, + "loss": 0.1078, + "step": 333400 + }, + { + "epoch": 2.25, + "learning_rate": 0.000149472882477008, + "loss": 0.1076, + "step": 333600 + }, + { + "epoch": 2.25, + "learning_rate": 0.00014928071036955596, + "loss": 0.102, + "step": 333800 + }, + { + "epoch": 2.25, + "learning_rate": 0.000149088578348116, + "loss": 0.1087, + "step": 334000 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001488964866278365, + "loss": 0.1016, + "step": 334200 + }, + { + "epoch": 2.25, + "learning_rate": 0.0001487044354238203, + "loss": 0.1162, + "step": 334400 + }, + { + "epoch": 2.25, + "learning_rate": 0.00014851242495112513, + "loss": 0.1022, + "step": 334600 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001483204554247628, + "loss": 0.1051, + "step": 334800 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001481285270596997, + "loss": 0.0996, + "step": 335000 + }, + { + "epoch": 2.26, + "learning_rate": 0.00014793664007085584, + "loss": 0.1101, + "step": 335200 + }, + { + "epoch": 2.26, + "learning_rate": 0.00014774575379628046, + "loss": 0.0993, + "step": 335400 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001475539499948858, + "loss": 0.1009, + "step": 335600 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001473621882131179, + "loss": 0.0936, + "step": 335800 + }, + { + "epoch": 2.26, + "learning_rate": 0.0001471704686657102, + "loss": 0.0951, + "step": 336000 + }, + { + "epoch": 2.26, + "learning_rate": 0.00014697879156734897, + "loss": 0.0928, + "step": 336200 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014678715713267295, + "loss": 0.0844, + "step": 336400 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014659556557627314, + "loss": 0.1171, + "step": 336600 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014640401711269236, + "loss": 0.1, + "step": 336800 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014621251195642523, + "loss": 0.0996, + "step": 337000 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014602105032191805, + "loss": 0.102, + "step": 337200 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014582963242356826, + "loss": 0.1033, + "step": 337400 + }, + { + "epoch": 2.27, + "learning_rate": 0.00014563825847572419, + "loss": 0.1083, + "step": 337600 + }, + { + "epoch": 2.28, + "learning_rate": 0.00014544692869268512, + "loss": 0.1036, + "step": 337800 + }, + { + "epoch": 2.28, + "learning_rate": 0.00014525564328870093, + "loss": 0.098, + "step": 338000 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001450644024779717, + "loss": 0.1062, + "step": 338200 + }, + { + "epoch": 2.28, + "learning_rate": 0.00014487320647464745, + "loss": 0.1026, + "step": 338400 + }, + { + "epoch": 2.28, + "learning_rate": 0.00014468205549282825, + "loss": 0.1009, + "step": 338600 + }, + { + "epoch": 2.28, + "learning_rate": 0.0001444909497465637, + "loss": 0.0908, + "step": 338800 + }, + { + "epoch": 2.28, + "learning_rate": 0.00014429988944985274, + "loss": 0.0984, + "step": 339000 + }, + { + "epoch": 2.29, + "learning_rate": 0.00014410982977586752, + "loss": 0.1012, + "step": 339200 + }, + { + "epoch": 2.29, + "learning_rate": 0.00014391886079013753, + "loss": 0.1002, + "step": 339400 + }, + { + "epoch": 2.29, + "learning_rate": 0.00014372793789458235, + "loss": 0.0982, + "step": 339600 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001435370613029963, + "loss": 0.0982, + "step": 339800 + }, + { + "epoch": 2.29, + "learning_rate": 0.00014334623122912144, + "loss": 0.1017, + "step": 340000 + }, + { + "epoch": 2.29, + "learning_rate": 0.00014315544788664816, + "loss": 0.1003, + "step": 340200 + }, + { + "epoch": 2.29, + "learning_rate": 0.00014296471148921431, + "loss": 0.1058, + "step": 340400 + }, + { + "epoch": 2.29, + "learning_rate": 0.0001427740222504052, + "loss": 0.1019, + "step": 340600 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014258338038375333, + "loss": 0.0966, + "step": 340800 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014239278610273816, + "loss": 0.1263, + "step": 341000 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014220223962078593, + "loss": 0.1079, + "step": 341200 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014201174115126926, + "loss": 0.0942, + "step": 341400 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014182129090750697, + "loss": 0.1076, + "step": 341600 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014163088910276403, + "loss": 0.1126, + "step": 341800 + }, + { + "epoch": 2.3, + "learning_rate": 0.00014144053595025106, + "loss": 0.0997, + "step": 342000 + }, + { + "epoch": 2.31, + "learning_rate": 0.00014125023166312428, + "loss": 0.1142, + "step": 342200 + }, + { + "epoch": 2.31, + "learning_rate": 0.00014105997645448497, + "loss": 0.0901, + "step": 342400 + }, + { + "epoch": 2.31, + "learning_rate": 0.00014086977053737976, + "loss": 0.1036, + "step": 342600 + }, + { + "epoch": 2.31, + "learning_rate": 0.00014067961412479992, + "loss": 0.1047, + "step": 342800 + }, + { + "epoch": 2.31, + "learning_rate": 0.0001404895074296813, + "loss": 0.1013, + "step": 343000 + }, + { + "epoch": 2.31, + "learning_rate": 0.00014030040082417417, + "loss": 0.1205, + "step": 343200 + }, + { + "epoch": 2.31, + "learning_rate": 0.00014011039395131743, + "loss": 0.1045, + "step": 343400 + }, + { + "epoch": 2.31, + "learning_rate": 0.00013992043743333077, + "loss": 0.0957, + "step": 343600 + }, + { + "epoch": 2.32, + "learning_rate": 0.00013973053148292624, + "loss": 0.1018, + "step": 343800 + }, + { + "epoch": 2.32, + "learning_rate": 0.00013954067631275925, + "loss": 0.1008, + "step": 344000 + }, + { + "epoch": 2.32, + "learning_rate": 0.00013935087213542822, + "loss": 0.1071, + "step": 344200 + }, + { + "epoch": 2.32, + "learning_rate": 0.0001391611191634746, + "loss": 0.0985, + "step": 344400 + }, + { + "epoch": 2.32, + "learning_rate": 0.00013897141760938253, + "loss": 0.0948, + "step": 344600 + }, + { + "epoch": 2.32, + "learning_rate": 0.00013878176768557856, + "loss": 0.1013, + "step": 344800 + }, + { + "epoch": 2.32, + "learning_rate": 0.00013859216960443127, + "loss": 0.1115, + "step": 345000 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001384026235782513, + "loss": 0.1203, + "step": 345200 + }, + { + "epoch": 2.33, + "learning_rate": 0.00013821312981929113, + "loss": 0.1, + "step": 345400 + }, + { + "epoch": 2.33, + "learning_rate": 0.00013802463561524874, + "loss": 0.1154, + "step": 345600 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001378352467632653, + "loss": 0.0989, + "step": 345800 + }, + { + "epoch": 2.33, + "learning_rate": 0.00013764591081384634, + "loss": 0.1086, + "step": 346000 + }, + { + "epoch": 2.33, + "learning_rate": 0.00013745662797900887, + "loss": 0.1027, + "step": 346200 + }, + { + "epoch": 2.33, + "learning_rate": 0.00013726739847071058, + "loss": 0.1127, + "step": 346400 + }, + { + "epoch": 2.33, + "learning_rate": 0.0001370782225008493, + "loss": 0.1105, + "step": 346600 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001368891002812631, + "loss": 0.1077, + "step": 346800 + }, + { + "epoch": 2.34, + "learning_rate": 0.00013670003202372969, + "loss": 0.0995, + "step": 347000 + }, + { + "epoch": 2.34, + "learning_rate": 0.00013651101793996643, + "loss": 0.0962, + "step": 347200 + }, + { + "epoch": 2.34, + "learning_rate": 0.00013632205824162992, + "loss": 0.103, + "step": 347400 + }, + { + "epoch": 2.34, + "learning_rate": 0.0001361331531403161, + "loss": 0.1115, + "step": 347600 + }, + { + "epoch": 2.34, + "learning_rate": 0.00013594430284755954, + "loss": 0.1018, + "step": 347800 + }, + { + "epoch": 2.34, + "learning_rate": 0.00013575550757483355, + "loss": 0.103, + "step": 348000 + }, + { + "epoch": 2.35, + "learning_rate": 0.00013556676753354963, + "loss": 0.0979, + "step": 348200 + }, + { + "epoch": 2.35, + "learning_rate": 0.00013537808293505782, + "loss": 0.093, + "step": 348400 + }, + { + "epoch": 2.35, + "learning_rate": 0.00013518945399064575, + "loss": 0.0965, + "step": 348600 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001350008809115388, + "loss": 0.1026, + "step": 348800 + }, + { + "epoch": 2.35, + "learning_rate": 0.00013481236390889978, + "loss": 0.0995, + "step": 349000 + }, + { + "epoch": 2.35, + "learning_rate": 0.0001346239031938289, + "loss": 0.0973, + "step": 349200 + }, + { + "epoch": 2.35, + "learning_rate": 0.00013443549897736306, + "loss": 0.1151, + "step": 349400 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013424715147047607, + "loss": 0.1064, + "step": 349600 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013405886088407808, + "loss": 0.1019, + "step": 349800 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013387156845382826, + "loss": 0.0969, + "step": 350000 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013368339205364926, + "loss": 0.0959, + "step": 350200 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013349527320525328, + "loss": 0.0958, + "step": 350400 + }, + { + "epoch": 2.36, + "learning_rate": 0.0001333072121192946, + "loss": 0.1066, + "step": 350600 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013311920900636274, + "loss": 0.1022, + "step": 350800 + }, + { + "epoch": 2.36, + "learning_rate": 0.00013293126407698218, + "loss": 0.1061, + "step": 351000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00013274337754161247, + "loss": 0.0913, + "step": 351200 + }, + { + "epoch": 2.37, + "learning_rate": 0.00013255554961064765, + "loss": 0.1066, + "step": 351400 + }, + { + "epoch": 2.37, + "learning_rate": 0.00013236778049441624, + "loss": 0.1049, + "step": 351600 + }, + { + "epoch": 2.37, + "learning_rate": 0.00013218100880646436, + "loss": 0.1188, + "step": 351800 + }, + { + "epoch": 2.37, + "learning_rate": 0.0001319933576537225, + "loss": 0.1021, + "step": 352000 + }, + { + "epoch": 2.37, + "learning_rate": 0.00013180576594525267, + "loss": 0.097, + "step": 352200 + }, + { + "epoch": 2.37, + "learning_rate": 0.00013161823389111886, + "loss": 0.1314, + "step": 352400 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013143076170131802, + "loss": 0.099, + "step": 352600 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013124334958578048, + "loss": 0.108, + "step": 352800 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013105599775436883, + "loss": 0.1023, + "step": 353000 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013086870641687857, + "loss": 0.0957, + "step": 353200 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013068147578303702, + "loss": 0.1011, + "step": 353400 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013049430606250402, + "loss": 0.0946, + "step": 353600 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013030719746487084, + "loss": 0.0988, + "step": 353800 + }, + { + "epoch": 2.38, + "learning_rate": 0.00013012015019966042, + "loss": 0.0981, + "step": 354000 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001299331644763269, + "loss": 0.0932, + "step": 354200 + }, + { + "epoch": 2.39, + "learning_rate": 0.00012974624050425586, + "loss": 0.1002, + "step": 354400 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001295593784927633, + "loss": 0.1063, + "step": 354600 + }, + { + "epoch": 2.39, + "learning_rate": 0.00012937351249531093, + "loss": 0.1107, + "step": 354800 + }, + { + "epoch": 2.39, + "learning_rate": 0.00012918677472023102, + "loss": 0.0988, + "step": 355000 + }, + { + "epoch": 2.39, + "learning_rate": 0.00012900009953221569, + "loss": 0.1062, + "step": 355200 + }, + { + "epoch": 2.39, + "learning_rate": 0.0001288134871403024, + "loss": 0.0877, + "step": 355400 + }, + { + "epoch": 2.4, + "learning_rate": 0.00012862693775345853, + "loss": 0.095, + "step": 355600 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001284404515805808, + "loss": 0.1115, + "step": 355800 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001282540288304952, + "loss": 0.1071, + "step": 356000 + }, + { + "epoch": 2.4, + "learning_rate": 0.00012806766971195656, + "loss": 0.1109, + "step": 356200 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001278813744336486, + "loss": 0.1018, + "step": 356400 + }, + { + "epoch": 2.4, + "learning_rate": 0.0001276951432041835, + "loss": 0.097, + "step": 356600 + }, + { + "epoch": 2.4, + "learning_rate": 0.00012750897623210178, + "loss": 0.1116, + "step": 356800 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012732287372587187, + "loss": 0.1047, + "step": 357000 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012713683589389007, + "loss": 0.0908, + "step": 357200 + }, + { + "epoch": 2.41, + "learning_rate": 0.0001269517926474875, + "loss": 0.1082, + "step": 357400 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012676588446292912, + "loss": 0.0957, + "step": 357600 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012658004157633157, + "loss": 0.0995, + "step": 357800 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012639426419580065, + "loss": 0.0914, + "step": 358000 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012620855252936847, + "loss": 0.1007, + "step": 358200 + }, + { + "epoch": 2.41, + "learning_rate": 0.00012602290678499377, + "loss": 0.0993, + "step": 358400 + }, + { + "epoch": 2.42, + "learning_rate": 0.00012583732717056127, + "loss": 0.0978, + "step": 358600 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001256518138938819, + "loss": 0.0993, + "step": 358800 + }, + { + "epoch": 2.42, + "learning_rate": 0.0001254663671626921, + "loss": 0.1017, + "step": 359000 + }, + { + "epoch": 2.42, + "learning_rate": 0.00012528098718465392, + "loss": 0.1013, + "step": 359200 + }, + { + "epoch": 2.42, + "learning_rate": 0.00012509567416735444, + "loss": 0.1088, + "step": 359400 + }, + { + "epoch": 2.42, + "learning_rate": 0.00012491042831830606, + "loss": 0.1001, + "step": 359600 + }, + { + "epoch": 2.42, + "learning_rate": 0.00012472524984494574, + "loss": 0.1033, + "step": 359800 + }, + { + "epoch": 2.43, + "learning_rate": 0.0001245401389546352, + "loss": 0.1019, + "step": 360000 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012435509585466019, + "loss": 0.1072, + "step": 360200 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012417012075223078, + "loss": 0.0951, + "step": 360400 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012398521385448085, + "loss": 0.1083, + "step": 360600 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012380037536846792, + "loss": 0.0942, + "step": 360800 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012361560550117274, + "loss": 0.0898, + "step": 361000 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012343090445949949, + "loss": 0.1006, + "step": 361200 + }, + { + "epoch": 2.43, + "learning_rate": 0.00012324719543826092, + "loss": 0.1077, + "step": 361400 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001230626323215248, + "loss": 0.0861, + "step": 361600 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001228781386496262, + "loss": 0.1018, + "step": 361800 + }, + { + "epoch": 2.44, + "learning_rate": 0.00012269371462915983, + "loss": 0.1012, + "step": 362000 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001225093604666424, + "loss": 0.1009, + "step": 362200 + }, + { + "epoch": 2.44, + "learning_rate": 0.0001223250763685125, + "loss": 0.1008, + "step": 362400 + }, + { + "epoch": 2.44, + "learning_rate": 0.00012214086254113023, + "loss": 0.0896, + "step": 362600 + }, + { + "epoch": 2.44, + "learning_rate": 0.00012195671919077698, + "loss": 0.0979, + "step": 362800 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012177264652365515, + "loss": 0.1084, + "step": 363000 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012158864474588808, + "loss": 0.0963, + "step": 363200 + }, + { + "epoch": 2.45, + "learning_rate": 0.0001214056335397411, + "loss": 0.0992, + "step": 363400 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012122177380171675, + "loss": 0.1016, + "step": 363600 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012103890433285764, + "loss": 0.1261, + "step": 363800 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012085518745400288, + "loss": 0.1075, + "step": 364000 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012067154249186725, + "loss": 0.1148, + "step": 364200 + }, + { + "epoch": 2.45, + "learning_rate": 0.00012048796965209523, + "loss": 0.0964, + "step": 364400 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001203044691402504, + "loss": 0.0902, + "step": 364600 + }, + { + "epoch": 2.46, + "learning_rate": 0.00012012104116181545, + "loss": 0.0948, + "step": 364800 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001199376859221916, + "loss": 0.1204, + "step": 365000 + }, + { + "epoch": 2.46, + "learning_rate": 0.00011975440362669899, + "loss": 0.0992, + "step": 365200 + }, + { + "epoch": 2.46, + "learning_rate": 0.0001195711944805758, + "loss": 0.0975, + "step": 365400 + }, + { + "epoch": 2.46, + "learning_rate": 0.00011938805868897844, + "loss": 0.1056, + "step": 365600 + }, + { + "epoch": 2.46, + "learning_rate": 0.00011920499645698102, + "loss": 0.102, + "step": 365800 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011902292274808409, + "loss": 0.1135, + "step": 366000 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011884000787982256, + "loss": 0.0973, + "step": 366200 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011865716718486454, + "loss": 0.1332, + "step": 366400 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011847440086795387, + "loss": 0.1057, + "step": 366600 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011829170913375107, + "loss": 0.0963, + "step": 366800 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011810909218683297, + "loss": 0.0953, + "step": 367000 + }, + { + "epoch": 2.47, + "learning_rate": 0.00011792655023169285, + "loss": 0.0996, + "step": 367200 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011774408347274, + "loss": 0.0981, + "step": 367400 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011756169211429937, + "loss": 0.0889, + "step": 367600 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011737937636061158, + "loss": 0.098, + "step": 367800 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011719713641583258, + "loss": 0.0983, + "step": 368000 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011701497248403347, + "loss": 0.0945, + "step": 368200 + }, + { + "epoch": 2.48, + "learning_rate": 0.0001168328847692001, + "loss": 0.1066, + "step": 368400 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011665087347523309, + "loss": 0.1042, + "step": 368600 + }, + { + "epoch": 2.48, + "learning_rate": 0.00011646984828835294, + "loss": 0.1043, + "step": 368800 + }, + { + "epoch": 2.49, + "learning_rate": 0.00011628799006282917, + "loss": 0.1119, + "step": 369000 + }, + { + "epoch": 2.49, + "learning_rate": 0.00011610620886834118, + "loss": 0.1002, + "step": 369200 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001159245049084462, + "loss": 0.0941, + "step": 369400 + }, + { + "epoch": 2.49, + "learning_rate": 0.00011574287838661518, + "loss": 0.1004, + "step": 369600 + }, + { + "epoch": 2.49, + "learning_rate": 0.00011556132950623226, + "loss": 0.1128, + "step": 369800 + }, + { + "epoch": 2.49, + "learning_rate": 0.00011537985847059468, + "loss": 0.0966, + "step": 370000 + }, + { + "epoch": 2.49, + "learning_rate": 0.0001151984654829123, + "loss": 0.0856, + "step": 370200 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011501715074630793, + "loss": 0.0937, + "step": 370400 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011483591446381649, + "loss": 0.0977, + "step": 370600 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011465475683838518, + "loss": 0.0989, + "step": 370800 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011447367807287301, + "loss": 0.1096, + "step": 371000 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011429267837005082, + "loss": 0.0942, + "step": 371200 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011411175793260084, + "loss": 0.1119, + "step": 371400 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011393091696311648, + "loss": 0.1009, + "step": 371600 + }, + { + "epoch": 2.5, + "learning_rate": 0.00011375015566410225, + "loss": 0.0998, + "step": 371800 + }, + { + "epoch": 2.51, + "learning_rate": 0.00011356947423797343, + "loss": 0.1087, + "step": 372000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00011338887288705594, + "loss": 0.0988, + "step": 372200 + }, + { + "epoch": 2.51, + "learning_rate": 0.00011320835181358581, + "loss": 0.1014, + "step": 372400 + }, + { + "epoch": 2.51, + "learning_rate": 0.00011302791121970932, + "loss": 0.1031, + "step": 372600 + }, + { + "epoch": 2.51, + "learning_rate": 0.00011284755130748267, + "loss": 0.1026, + "step": 372800 + }, + { + "epoch": 2.51, + "learning_rate": 0.0001126681734724828, + "loss": 0.1076, + "step": 373000 + }, + { + "epoch": 2.51, + "learning_rate": 0.00011248797512343315, + "loss": 0.1032, + "step": 373200 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011230785806065014, + "loss": 0.0958, + "step": 373400 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011212782248582758, + "loss": 0.1019, + "step": 373600 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011194786860056808, + "loss": 0.1019, + "step": 373800 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011176889576231639, + "loss": 0.1007, + "step": 374000 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011158910544966139, + "loss": 0.0976, + "step": 374200 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011140939742982113, + "loss": 0.0951, + "step": 374400 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011122977190403134, + "loss": 0.1018, + "step": 374600 + }, + { + "epoch": 2.52, + "learning_rate": 0.00011105022907343558, + "loss": 0.0896, + "step": 374800 + }, + { + "epoch": 2.53, + "learning_rate": 0.00011087076913908463, + "loss": 0.0906, + "step": 375000 + }, + { + "epoch": 2.53, + "learning_rate": 0.0001106913923019366, + "loss": 0.0944, + "step": 375200 + }, + { + "epoch": 2.53, + "learning_rate": 0.00011051209876285627, + "loss": 0.0973, + "step": 375400 + }, + { + "epoch": 2.53, + "learning_rate": 0.00011033288872261545, + "loss": 0.1068, + "step": 375600 + }, + { + "epoch": 2.53, + "learning_rate": 0.00011015376238189234, + "loss": 0.1043, + "step": 375800 + }, + { + "epoch": 2.53, + "learning_rate": 0.00010997471994127134, + "loss": 0.0894, + "step": 376000 + }, + { + "epoch": 2.53, + "learning_rate": 0.00010979576160124292, + "loss": 0.0963, + "step": 376200 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010961688756220349, + "loss": 0.088, + "step": 376400 + }, + { + "epoch": 2.54, + "learning_rate": 0.000109438098024455, + "loss": 0.0978, + "step": 376600 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010925939318820466, + "loss": 0.0985, + "step": 376800 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010908077325356493, + "loss": 0.1019, + "step": 377000 + }, + { + "epoch": 2.54, + "learning_rate": 0.00010890313088269734, + "loss": 0.1189, + "step": 377200 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001087246809242309, + "loss": 0.0942, + "step": 377400 + }, + { + "epoch": 2.54, + "learning_rate": 0.0001085463164661423, + "loss": 0.1015, + "step": 377600 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010836803770816288, + "loss": 0.0942, + "step": 377800 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010818984484992784, + "loss": 0.0976, + "step": 378000 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010801173809097632, + "loss": 0.1048, + "step": 378200 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010783371763075129, + "loss": 0.1082, + "step": 378400 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010765578366859867, + "loss": 0.1012, + "step": 378600 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010747793640376775, + "loss": 0.1147, + "step": 378800 + }, + { + "epoch": 2.55, + "learning_rate": 0.0001073001760354105, + "loss": 0.099, + "step": 379000 + }, + { + "epoch": 2.55, + "learning_rate": 0.00010712250276258208, + "loss": 0.1104, + "step": 379200 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010694491678423967, + "loss": 0.1083, + "step": 379400 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010676830557369923, + "loss": 0.1113, + "step": 379600 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010659089434185469, + "loss": 0.1027, + "step": 379800 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010641357099978772, + "loss": 0.1103, + "step": 380000 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010623633574606374, + "loss": 0.0954, + "step": 380200 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010605918877914944, + "loss": 0.095, + "step": 380400 + }, + { + "epoch": 2.56, + "learning_rate": 0.00010588213029741291, + "loss": 0.1019, + "step": 380600 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010570516049912313, + "loss": 0.09, + "step": 380800 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010552827958244953, + "loss": 0.0975, + "step": 381000 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010535148774546213, + "loss": 0.0935, + "step": 381200 + }, + { + "epoch": 2.57, + "learning_rate": 0.0001051747851861313, + "loss": 0.1078, + "step": 381400 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010499817210232743, + "loss": 0.0932, + "step": 381600 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010482164869182056, + "loss": 0.095, + "step": 381800 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010464521515228036, + "loss": 0.111, + "step": 382000 + }, + { + "epoch": 2.57, + "learning_rate": 0.00010446887168127594, + "loss": 0.1032, + "step": 382200 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010429349951743742, + "loss": 0.1066, + "step": 382400 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010411733632300056, + "loss": 0.1097, + "step": 382600 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010394126378821466, + "loss": 0.0926, + "step": 382800 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010376528211024446, + "loss": 0.0991, + "step": 383000 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010358939148615315, + "loss": 0.1021, + "step": 383200 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010341359211290173, + "loss": 0.0961, + "step": 383400 + }, + { + "epoch": 2.58, + "learning_rate": 0.00010323788418734926, + "loss": 0.1129, + "step": 383600 + }, + { + "epoch": 2.59, + "learning_rate": 0.00010306226790625215, + "loss": 0.0933, + "step": 383800 + }, + { + "epoch": 2.59, + "learning_rate": 0.00010288674346626447, + "loss": 0.1021, + "step": 384000 + }, + { + "epoch": 2.59, + "learning_rate": 0.00010271131106393713, + "loss": 0.093, + "step": 384200 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001025359708957183, + "loss": 0.0918, + "step": 384400 + }, + { + "epoch": 2.59, + "learning_rate": 0.00010236159916639597, + "loss": 0.1259, + "step": 384600 + }, + { + "epoch": 2.59, + "learning_rate": 0.00010218644359170317, + "loss": 0.1107, + "step": 384800 + }, + { + "epoch": 2.59, + "learning_rate": 0.00010201138083886187, + "loss": 0.0947, + "step": 385000 + }, + { + "epoch": 2.59, + "learning_rate": 0.0001018364111039061, + "loss": 0.1151, + "step": 385200 + }, + { + "epoch": 2.6, + "learning_rate": 0.00010166153458276572, + "loss": 0.0986, + "step": 385400 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001014867514712663, + "loss": 0.1059, + "step": 385600 + }, + { + "epoch": 2.6, + "learning_rate": 0.00010131206196512888, + "loss": 0.0925, + "step": 385800 + }, + { + "epoch": 2.6, + "learning_rate": 0.00010113746625996951, + "loss": 0.1139, + "step": 386000 + }, + { + "epoch": 2.6, + "learning_rate": 0.0001009629645512992, + "loss": 0.1175, + "step": 386200 + }, + { + "epoch": 2.6, + "learning_rate": 0.00010078855703452384, + "loss": 0.092, + "step": 386400 + }, + { + "epoch": 2.6, + "learning_rate": 0.00010061424390494383, + "loss": 0.0903, + "step": 386600 + }, + { + "epoch": 2.61, + "learning_rate": 0.00010044002535775379, + "loss": 0.0964, + "step": 386800 + }, + { + "epoch": 2.61, + "learning_rate": 0.00010026677197080932, + "loss": 0.1179, + "step": 387000 + }, + { + "epoch": 2.61, + "learning_rate": 0.00010009274269821214, + "loss": 0.1127, + "step": 387200 + }, + { + "epoch": 2.61, + "learning_rate": 9.991880859197841e-05, + "loss": 0.1011, + "step": 387400 + }, + { + "epoch": 2.61, + "learning_rate": 9.974496984687836e-05, + "loss": 0.1025, + "step": 387600 + }, + { + "epoch": 2.61, + "learning_rate": 9.95712266575753e-05, + "loss": 0.1009, + "step": 387800 + }, + { + "epoch": 2.61, + "learning_rate": 9.939757921862591e-05, + "loss": 0.1119, + "step": 388000 + }, + { + "epoch": 2.62, + "learning_rate": 9.922402772447935e-05, + "loss": 0.1035, + "step": 388200 + }, + { + "epoch": 2.62, + "learning_rate": 9.905057236947736e-05, + "loss": 0.0966, + "step": 388400 + }, + { + "epoch": 2.62, + "learning_rate": 9.887721334785389e-05, + "loss": 0.092, + "step": 388600 + }, + { + "epoch": 2.62, + "learning_rate": 9.87039508537355e-05, + "loss": 0.0909, + "step": 388800 + }, + { + "epoch": 2.62, + "learning_rate": 9.853078508114025e-05, + "loss": 0.0932, + "step": 389000 + }, + { + "epoch": 2.62, + "learning_rate": 9.835771622397795e-05, + "loss": 0.0974, + "step": 389200 + }, + { + "epoch": 2.62, + "learning_rate": 9.818474447604984e-05, + "loss": 0.1015, + "step": 389400 + }, + { + "epoch": 2.62, + "learning_rate": 9.801273416091236e-05, + "loss": 0.0883, + "step": 389600 + }, + { + "epoch": 2.63, + "learning_rate": 9.783995672445778e-05, + "loss": 0.0948, + "step": 389800 + }, + { + "epoch": 2.63, + "learning_rate": 9.766727697702099e-05, + "loss": 0.1062, + "step": 390000 + }, + { + "epoch": 2.63, + "learning_rate": 9.749469511196766e-05, + "loss": 0.0984, + "step": 390200 + }, + { + "epoch": 2.63, + "learning_rate": 9.732221132255383e-05, + "loss": 0.0957, + "step": 390400 + }, + { + "epoch": 2.63, + "learning_rate": 9.714982580192565e-05, + "loss": 0.0966, + "step": 390600 + }, + { + "epoch": 2.63, + "learning_rate": 9.697753874311913e-05, + "loss": 0.1056, + "step": 390800 + }, + { + "epoch": 2.63, + "learning_rate": 9.680535033906026e-05, + "loss": 0.1003, + "step": 391000 + }, + { + "epoch": 2.64, + "learning_rate": 9.663326078256447e-05, + "loss": 0.1083, + "step": 391200 + }, + { + "epoch": 2.64, + "learning_rate": 9.646127026633645e-05, + "loss": 0.1138, + "step": 391400 + }, + { + "epoch": 2.64, + "learning_rate": 9.628937898296992e-05, + "loss": 0.0993, + "step": 391600 + }, + { + "epoch": 2.64, + "learning_rate": 9.611758712494759e-05, + "loss": 0.0994, + "step": 391800 + }, + { + "epoch": 2.64, + "learning_rate": 9.594589488464096e-05, + "loss": 0.1005, + "step": 392000 + }, + { + "epoch": 2.64, + "learning_rate": 9.577430245430975e-05, + "loss": 0.1023, + "step": 392200 + }, + { + "epoch": 2.64, + "learning_rate": 9.560281002610186e-05, + "loss": 0.0911, + "step": 392400 + }, + { + "epoch": 2.64, + "learning_rate": 9.543227450367323e-05, + "loss": 0.1144, + "step": 392600 + }, + { + "epoch": 2.65, + "learning_rate": 9.526098215330053e-05, + "loss": 0.1209, + "step": 392800 + }, + { + "epoch": 2.65, + "learning_rate": 9.508979037986393e-05, + "loss": 0.1067, + "step": 393000 + }, + { + "epoch": 2.65, + "learning_rate": 9.491869937506272e-05, + "loss": 0.0948, + "step": 393200 + }, + { + "epoch": 2.65, + "learning_rate": 9.474770933048338e-05, + "loss": 0.0912, + "step": 393400 + }, + { + "epoch": 2.65, + "learning_rate": 9.457682043759951e-05, + "loss": 0.1039, + "step": 393600 + }, + { + "epoch": 2.65, + "learning_rate": 9.440603288777125e-05, + "loss": 0.1014, + "step": 393800 + }, + { + "epoch": 2.65, + "learning_rate": 9.42353468722454e-05, + "loss": 0.1031, + "step": 394000 + }, + { + "epoch": 2.66, + "learning_rate": 9.40647625821549e-05, + "loss": 0.0927, + "step": 394200 + }, + { + "epoch": 2.66, + "learning_rate": 9.389428020851902e-05, + "loss": 0.1037, + "step": 394400 + }, + { + "epoch": 2.66, + "learning_rate": 9.37238999422426e-05, + "loss": 0.095, + "step": 394600 + }, + { + "epoch": 2.66, + "learning_rate": 9.355362197411646e-05, + "loss": 0.1023, + "step": 394800 + }, + { + "epoch": 2.66, + "learning_rate": 9.338344649481655e-05, + "loss": 0.0944, + "step": 395000 + }, + { + "epoch": 2.66, + "learning_rate": 9.321337369490438e-05, + "loss": 0.0985, + "step": 395200 + }, + { + "epoch": 2.66, + "learning_rate": 9.304340376482618e-05, + "loss": 0.1044, + "step": 395400 + }, + { + "epoch": 2.67, + "learning_rate": 9.287353689491322e-05, + "loss": 0.0921, + "step": 395600 + }, + { + "epoch": 2.67, + "learning_rate": 9.270462183632897e-05, + "loss": 0.1007, + "step": 395800 + }, + { + "epoch": 2.67, + "learning_rate": 9.25349611396029e-05, + "loss": 0.1066, + "step": 396000 + }, + { + "epoch": 2.67, + "learning_rate": 9.236540407239268e-05, + "loss": 0.0962, + "step": 396200 + }, + { + "epoch": 2.67, + "learning_rate": 9.21959508245671e-05, + "loss": 0.0937, + "step": 396400 + }, + { + "epoch": 2.67, + "learning_rate": 9.202660158587865e-05, + "loss": 0.1002, + "step": 396600 + }, + { + "epoch": 2.67, + "learning_rate": 9.185735654596356e-05, + "loss": 0.0958, + "step": 396800 + }, + { + "epoch": 2.67, + "learning_rate": 9.168821589434131e-05, + "loss": 0.1003, + "step": 397000 + }, + { + "epoch": 2.68, + "learning_rate": 9.151917982041441e-05, + "loss": 0.0953, + "step": 397200 + }, + { + "epoch": 2.68, + "learning_rate": 9.135024851346822e-05, + "loss": 0.0986, + "step": 397400 + }, + { + "epoch": 2.68, + "learning_rate": 9.118142216267094e-05, + "loss": 0.0989, + "step": 397600 + }, + { + "epoch": 2.68, + "learning_rate": 9.101270095707323e-05, + "loss": 0.1054, + "step": 397800 + }, + { + "epoch": 2.68, + "learning_rate": 9.08440850856079e-05, + "loss": 0.0947, + "step": 398000 + }, + { + "epoch": 2.68, + "learning_rate": 9.067557473708978e-05, + "loss": 0.0926, + "step": 398200 + }, + { + "epoch": 2.68, + "learning_rate": 9.050801186013034e-05, + "loss": 0.1183, + "step": 398400 + }, + { + "epoch": 2.69, + "learning_rate": 9.033971259350866e-05, + "loss": 0.0857, + "step": 398600 + }, + { + "epoch": 2.69, + "learning_rate": 9.017151941462705e-05, + "loss": 0.098, + "step": 398800 + }, + { + "epoch": 2.69, + "learning_rate": 9.000343251182724e-05, + "loss": 0.0992, + "step": 399000 + }, + { + "epoch": 2.69, + "learning_rate": 8.983545207333166e-05, + "loss": 0.0951, + "step": 399200 + }, + { + "epoch": 2.69, + "learning_rate": 8.966757828724382e-05, + "loss": 0.0974, + "step": 399400 + }, + { + "epoch": 2.69, + "learning_rate": 8.949981134154747e-05, + "loss": 0.102, + "step": 399600 + }, + { + "epoch": 2.69, + "learning_rate": 8.933215142410702e-05, + "loss": 0.1115, + "step": 399800 + }, + { + "epoch": 2.69, + "learning_rate": 8.916459872266682e-05, + "loss": 0.0893, + "step": 400000 + }, + { + "epoch": 2.7, + "learning_rate": 8.899715342485134e-05, + "loss": 0.0985, + "step": 400200 + }, + { + "epoch": 2.7, + "learning_rate": 8.882981571816455e-05, + "loss": 0.0924, + "step": 400400 + }, + { + "epoch": 2.7, + "learning_rate": 8.866258578999022e-05, + "loss": 0.1092, + "step": 400600 + }, + { + "epoch": 2.7, + "learning_rate": 8.84962991685287e-05, + "loss": 0.107, + "step": 400800 + }, + { + "epoch": 2.7, + "learning_rate": 8.832928481781708e-05, + "loss": 0.0951, + "step": 401000 + }, + { + "epoch": 2.7, + "learning_rate": 8.816237880610904e-05, + "loss": 0.0973, + "step": 401200 + }, + { + "epoch": 2.7, + "learning_rate": 8.799558132030461e-05, + "loss": 0.1106, + "step": 401400 + }, + { + "epoch": 2.71, + "learning_rate": 8.782889254718265e-05, + "loss": 0.1053, + "step": 401600 + }, + { + "epoch": 2.71, + "learning_rate": 8.766231267339994e-05, + "loss": 0.0896, + "step": 401800 + }, + { + "epoch": 2.71, + "learning_rate": 8.749584188549167e-05, + "loss": 0.0881, + "step": 402000 + }, + { + "epoch": 2.71, + "learning_rate": 8.732948036987061e-05, + "loss": 0.0927, + "step": 402200 + }, + { + "epoch": 2.71, + "learning_rate": 8.716322831282717e-05, + "loss": 0.0916, + "step": 402400 + }, + { + "epoch": 2.71, + "learning_rate": 8.699708590052936e-05, + "loss": 0.0935, + "step": 402600 + }, + { + "epoch": 2.71, + "learning_rate": 8.683105331902236e-05, + "loss": 0.0925, + "step": 402800 + }, + { + "epoch": 2.71, + "learning_rate": 8.666513075422828e-05, + "loss": 0.0985, + "step": 403000 + }, + { + "epoch": 2.72, + "learning_rate": 8.649931839194601e-05, + "loss": 0.1016, + "step": 403200 + }, + { + "epoch": 2.72, + "learning_rate": 8.633444465282406e-05, + "loss": 0.1098, + "step": 403400 + }, + { + "epoch": 2.72, + "learning_rate": 8.61688526991386e-05, + "loss": 0.0946, + "step": 403600 + }, + { + "epoch": 2.72, + "learning_rate": 8.600337150369374e-05, + "loss": 0.097, + "step": 403800 + }, + { + "epoch": 2.72, + "learning_rate": 8.583800125179424e-05, + "loss": 0.1026, + "step": 404000 + }, + { + "epoch": 2.72, + "learning_rate": 8.567274212862068e-05, + "loss": 0.1129, + "step": 404200 + }, + { + "epoch": 2.72, + "learning_rate": 8.550759431922901e-05, + "loss": 0.1012, + "step": 404400 + }, + { + "epoch": 2.73, + "learning_rate": 8.534255800855056e-05, + "loss": 0.1051, + "step": 404600 + }, + { + "epoch": 2.73, + "learning_rate": 8.517763338139195e-05, + "loss": 0.0858, + "step": 404800 + }, + { + "epoch": 2.73, + "learning_rate": 8.501282062243469e-05, + "loss": 0.0948, + "step": 405000 + }, + { + "epoch": 2.73, + "learning_rate": 8.484811991623495e-05, + "loss": 0.1021, + "step": 405200 + }, + { + "epoch": 2.73, + "learning_rate": 8.468353144722339e-05, + "loss": 0.0959, + "step": 405400 + }, + { + "epoch": 2.73, + "learning_rate": 8.451905539970511e-05, + "loss": 0.0899, + "step": 405600 + }, + { + "epoch": 2.73, + "learning_rate": 8.435469195785931e-05, + "loss": 0.0923, + "step": 405800 + }, + { + "epoch": 2.74, + "learning_rate": 8.419044130573905e-05, + "loss": 0.0968, + "step": 406000 + }, + { + "epoch": 2.74, + "learning_rate": 8.402630362727103e-05, + "loss": 0.0987, + "step": 406200 + }, + { + "epoch": 2.74, + "learning_rate": 8.38622791062554e-05, + "loss": 0.0931, + "step": 406400 + }, + { + "epoch": 2.74, + "learning_rate": 8.369836792636598e-05, + "loss": 0.0954, + "step": 406600 + }, + { + "epoch": 2.74, + "learning_rate": 8.353457027114924e-05, + "loss": 0.0964, + "step": 406800 + }, + { + "epoch": 2.74, + "learning_rate": 8.337088632402468e-05, + "loss": 0.0952, + "step": 407000 + }, + { + "epoch": 2.74, + "learning_rate": 8.320731626828434e-05, + "loss": 0.0971, + "step": 407200 + }, + { + "epoch": 2.74, + "learning_rate": 8.304386028709313e-05, + "loss": 0.093, + "step": 407400 + }, + { + "epoch": 2.75, + "learning_rate": 8.288133498758748e-05, + "loss": 0.0928, + "step": 407600 + }, + { + "epoch": 2.75, + "learning_rate": 8.271892298612768e-05, + "loss": 0.1274, + "step": 407800 + }, + { + "epoch": 2.75, + "learning_rate": 8.255580917915565e-05, + "loss": 0.1012, + "step": 408000 + }, + { + "epoch": 2.75, + "learning_rate": 8.239281017628585e-05, + "loss": 0.0956, + "step": 408200 + }, + { + "epoch": 2.75, + "learning_rate": 8.222992616004338e-05, + "loss": 0.1064, + "step": 408400 + }, + { + "epoch": 2.75, + "learning_rate": 8.206715731282473e-05, + "loss": 0.0948, + "step": 408600 + }, + { + "epoch": 2.75, + "learning_rate": 8.190450381689727e-05, + "loss": 0.0978, + "step": 408800 + }, + { + "epoch": 2.76, + "learning_rate": 8.174196585439935e-05, + "loss": 0.1054, + "step": 409000 + }, + { + "epoch": 2.76, + "learning_rate": 8.157954360733977e-05, + "loss": 0.1066, + "step": 409200 + }, + { + "epoch": 2.76, + "learning_rate": 8.141723725759798e-05, + "loss": 0.0969, + "step": 409400 + }, + { + "epoch": 2.76, + "learning_rate": 8.12550469869234e-05, + "loss": 0.101, + "step": 409600 + }, + { + "epoch": 2.76, + "learning_rate": 8.109297297693573e-05, + "loss": 0.0964, + "step": 409800 + }, + { + "epoch": 2.76, + "learning_rate": 8.093101540912421e-05, + "loss": 0.1076, + "step": 410000 + }, + { + "epoch": 2.76, + "learning_rate": 8.076998337916846e-05, + "loss": 0.098, + "step": 410200 + }, + { + "epoch": 2.76, + "learning_rate": 8.060825865518122e-05, + "loss": 0.123, + "step": 410400 + }, + { + "epoch": 2.77, + "learning_rate": 8.044665091615013e-05, + "loss": 0.0951, + "step": 410600 + }, + { + "epoch": 2.77, + "learning_rate": 8.028516034304228e-05, + "loss": 0.0947, + "step": 410800 + }, + { + "epoch": 2.77, + "learning_rate": 8.012378711669371e-05, + "loss": 0.1047, + "step": 411000 + }, + { + "epoch": 2.77, + "learning_rate": 7.996253141780896e-05, + "loss": 0.0945, + "step": 411200 + }, + { + "epoch": 2.77, + "learning_rate": 7.980139342696131e-05, + "loss": 0.093, + "step": 411400 + }, + { + "epoch": 2.77, + "learning_rate": 7.964037332459185e-05, + "loss": 0.1196, + "step": 411600 + }, + { + "epoch": 2.77, + "learning_rate": 7.94794712910098e-05, + "loss": 0.0881, + "step": 411800 + }, + { + "epoch": 2.78, + "learning_rate": 7.931868750639203e-05, + "loss": 0.0966, + "step": 412000 + }, + { + "epoch": 2.78, + "learning_rate": 7.915802215078338e-05, + "loss": 0.0917, + "step": 412200 + }, + { + "epoch": 2.78, + "learning_rate": 7.899747540409569e-05, + "loss": 0.0974, + "step": 412400 + }, + { + "epoch": 2.78, + "learning_rate": 7.883704744610814e-05, + "loss": 0.1022, + "step": 412600 + }, + { + "epoch": 2.78, + "learning_rate": 7.86767384564667e-05, + "loss": 0.0984, + "step": 412800 + }, + { + "epoch": 2.78, + "learning_rate": 7.851654861468458e-05, + "loss": 0.1012, + "step": 413000 + }, + { + "epoch": 2.78, + "learning_rate": 7.835647810014111e-05, + "loss": 0.1074, + "step": 413200 + }, + { + "epoch": 2.78, + "learning_rate": 7.81973265495537e-05, + "loss": 0.1134, + "step": 413400 + }, + { + "epoch": 2.79, + "learning_rate": 7.8037494628218e-05, + "loss": 0.0978, + "step": 413600 + }, + { + "epoch": 2.79, + "learning_rate": 7.78777825705624e-05, + "loss": 0.1013, + "step": 413800 + }, + { + "epoch": 2.79, + "learning_rate": 7.771819055543133e-05, + "loss": 0.1, + "step": 414000 + }, + { + "epoch": 2.79, + "learning_rate": 7.75587187615348e-05, + "loss": 0.0994, + "step": 414200 + }, + { + "epoch": 2.79, + "learning_rate": 7.739936736744827e-05, + "loss": 0.1118, + "step": 414400 + }, + { + "epoch": 2.79, + "learning_rate": 7.724013655161241e-05, + "loss": 0.0969, + "step": 414600 + }, + { + "epoch": 2.79, + "learning_rate": 7.708102649233277e-05, + "loss": 0.1056, + "step": 414800 + }, + { + "epoch": 2.8, + "learning_rate": 7.692203736777963e-05, + "loss": 0.1068, + "step": 415000 + }, + { + "epoch": 2.8, + "learning_rate": 7.676316935598797e-05, + "loss": 0.0928, + "step": 415200 + }, + { + "epoch": 2.8, + "learning_rate": 7.66044226348572e-05, + "loss": 0.1025, + "step": 415400 + }, + { + "epoch": 2.8, + "learning_rate": 7.644579738215075e-05, + "loss": 0.0809, + "step": 415600 + }, + { + "epoch": 2.8, + "learning_rate": 7.628729377549603e-05, + "loss": 0.0982, + "step": 415800 + }, + { + "epoch": 2.8, + "learning_rate": 7.612891199238438e-05, + "loss": 0.1081, + "step": 416000 + }, + { + "epoch": 2.8, + "learning_rate": 7.597065221017067e-05, + "loss": 0.0947, + "step": 416200 + }, + { + "epoch": 2.81, + "learning_rate": 7.581251460607308e-05, + "loss": 0.1047, + "step": 416400 + }, + { + "epoch": 2.81, + "learning_rate": 7.565528912876618e-05, + "loss": 0.1005, + "step": 416600 + }, + { + "epoch": 2.81, + "learning_rate": 7.549739579890737e-05, + "loss": 0.0968, + "step": 416800 + }, + { + "epoch": 2.81, + "learning_rate": 7.533962517711405e-05, + "loss": 0.1006, + "step": 417000 + }, + { + "epoch": 2.81, + "learning_rate": 7.518197744005657e-05, + "loss": 0.0914, + "step": 417200 + }, + { + "epoch": 2.81, + "learning_rate": 7.50244527642679e-05, + "loss": 0.0916, + "step": 417400 + }, + { + "epoch": 2.81, + "learning_rate": 7.486705132614297e-05, + "loss": 0.0894, + "step": 417600 + }, + { + "epoch": 2.81, + "learning_rate": 7.470977330193901e-05, + "loss": 0.0854, + "step": 417800 + }, + { + "epoch": 2.82, + "learning_rate": 7.455261886777472e-05, + "loss": 0.1029, + "step": 418000 + }, + { + "epoch": 2.82, + "learning_rate": 7.43955881996307e-05, + "loss": 0.1002, + "step": 418200 + }, + { + "epoch": 2.82, + "learning_rate": 7.423868147334866e-05, + "loss": 0.0922, + "step": 418400 + }, + { + "epoch": 2.82, + "learning_rate": 7.408189886463187e-05, + "loss": 0.0949, + "step": 418600 + }, + { + "epoch": 2.82, + "learning_rate": 7.392524054904427e-05, + "loss": 0.1019, + "step": 418800 + }, + { + "epoch": 2.82, + "learning_rate": 7.37687067020109e-05, + "loss": 0.1083, + "step": 419000 + }, + { + "epoch": 2.82, + "learning_rate": 7.361307923449183e-05, + "loss": 0.1276, + "step": 419200 + }, + { + "epoch": 2.83, + "learning_rate": 7.345679422575341e-05, + "loss": 0.0988, + "step": 419400 + }, + { + "epoch": 2.83, + "learning_rate": 7.330063421013223e-05, + "loss": 0.0929, + "step": 419600 + }, + { + "epoch": 2.83, + "learning_rate": 7.314459936249514e-05, + "loss": 0.1096, + "step": 419800 + }, + { + "epoch": 2.83, + "learning_rate": 7.298868985756884e-05, + "loss": 0.093, + "step": 420000 + }, + { + "epoch": 2.83, + "learning_rate": 7.28329058699398e-05, + "loss": 0.0938, + "step": 420200 + }, + { + "epoch": 2.83, + "learning_rate": 7.267724757405392e-05, + "loss": 0.0928, + "step": 420400 + }, + { + "epoch": 2.83, + "learning_rate": 7.252171514421626e-05, + "loss": 0.0987, + "step": 420600 + }, + { + "epoch": 2.83, + "learning_rate": 7.23663087545909e-05, + "loss": 0.092, + "step": 420800 + }, + { + "epoch": 2.84, + "learning_rate": 7.221102857920091e-05, + "loss": 0.0979, + "step": 421000 + }, + { + "epoch": 2.84, + "learning_rate": 7.205587479192805e-05, + "loss": 0.1012, + "step": 421200 + }, + { + "epoch": 2.84, + "learning_rate": 7.190084756651242e-05, + "loss": 0.1055, + "step": 421400 + }, + { + "epoch": 2.84, + "learning_rate": 7.174594707655235e-05, + "loss": 0.1097, + "step": 421600 + }, + { + "epoch": 2.84, + "learning_rate": 7.159117349550445e-05, + "loss": 0.1035, + "step": 421800 + }, + { + "epoch": 2.84, + "learning_rate": 7.143652699668319e-05, + "loss": 0.106, + "step": 422000 + }, + { + "epoch": 2.84, + "learning_rate": 7.128200775326058e-05, + "loss": 0.0934, + "step": 422200 + }, + { + "epoch": 2.85, + "learning_rate": 7.112761593826617e-05, + "loss": 0.0993, + "step": 422400 + }, + { + "epoch": 2.85, + "learning_rate": 7.097335172458691e-05, + "loss": 0.0987, + "step": 422600 + }, + { + "epoch": 2.85, + "learning_rate": 7.08192152849669e-05, + "loss": 0.0988, + "step": 422800 + }, + { + "epoch": 2.85, + "learning_rate": 7.0665206792007e-05, + "loss": 0.0969, + "step": 423000 + }, + { + "epoch": 2.85, + "learning_rate": 7.051209550105261e-05, + "loss": 0.0941, + "step": 423200 + }, + { + "epoch": 2.85, + "learning_rate": 7.035834277675685e-05, + "loss": 0.1009, + "step": 423400 + }, + { + "epoch": 2.85, + "learning_rate": 7.020471851520315e-05, + "loss": 0.096, + "step": 423600 + }, + { + "epoch": 2.86, + "learning_rate": 7.005122288841885e-05, + "loss": 0.0945, + "step": 423800 + }, + { + "epoch": 2.86, + "learning_rate": 6.989862258169717e-05, + "loss": 0.0957, + "step": 424000 + }, + { + "epoch": 2.86, + "learning_rate": 6.974538409463861e-05, + "loss": 0.1032, + "step": 424200 + }, + { + "epoch": 2.86, + "learning_rate": 6.959227475670901e-05, + "loss": 0.0928, + "step": 424400 + }, + { + "epoch": 2.86, + "learning_rate": 6.943929473935912e-05, + "loss": 0.0958, + "step": 424600 + }, + { + "epoch": 2.86, + "learning_rate": 6.928644421389496e-05, + "loss": 0.1102, + "step": 424800 + }, + { + "epoch": 2.86, + "learning_rate": 6.913372335147758e-05, + "loss": 0.0907, + "step": 425000 + }, + { + "epoch": 2.86, + "learning_rate": 6.898113232312271e-05, + "loss": 0.0998, + "step": 425200 + }, + { + "epoch": 2.87, + "learning_rate": 6.882867129970068e-05, + "loss": 0.0921, + "step": 425400 + }, + { + "epoch": 2.87, + "learning_rate": 6.867634045193635e-05, + "loss": 0.1, + "step": 425600 + }, + { + "epoch": 2.87, + "learning_rate": 6.852413995040884e-05, + "loss": 0.1099, + "step": 425800 + }, + { + "epoch": 2.87, + "learning_rate": 6.837206996555116e-05, + "loss": 0.097, + "step": 426000 + }, + { + "epoch": 2.87, + "learning_rate": 6.822013066765024e-05, + "loss": 0.086, + "step": 426200 + }, + { + "epoch": 2.87, + "learning_rate": 6.80683222268467e-05, + "loss": 0.0963, + "step": 426400 + }, + { + "epoch": 2.87, + "learning_rate": 6.791664481313453e-05, + "loss": 0.1032, + "step": 426600 + }, + { + "epoch": 2.88, + "learning_rate": 6.776509859636122e-05, + "loss": 0.1028, + "step": 426800 + }, + { + "epoch": 2.88, + "learning_rate": 6.7613683746227e-05, + "loss": 0.0912, + "step": 427000 + }, + { + "epoch": 2.88, + "learning_rate": 6.746240043228534e-05, + "loss": 0.1039, + "step": 427200 + }, + { + "epoch": 2.88, + "learning_rate": 6.731124882394217e-05, + "loss": 0.1011, + "step": 427400 + }, + { + "epoch": 2.88, + "learning_rate": 6.716022909045609e-05, + "loss": 0.0888, + "step": 427600 + }, + { + "epoch": 2.88, + "learning_rate": 6.700934140093791e-05, + "loss": 0.0983, + "step": 427800 + }, + { + "epoch": 2.88, + "learning_rate": 6.685933937257466e-05, + "loss": 0.0931, + "step": 428000 + }, + { + "epoch": 2.88, + "learning_rate": 6.670871561540496e-05, + "loss": 0.1012, + "step": 428200 + }, + { + "epoch": 2.89, + "learning_rate": 6.655822440780486e-05, + "loss": 0.0977, + "step": 428400 + }, + { + "epoch": 2.89, + "learning_rate": 6.640861738032626e-05, + "loss": 0.1045, + "step": 428600 + }, + { + "epoch": 2.89, + "learning_rate": 6.625839111242305e-05, + "loss": 0.0931, + "step": 428800 + }, + { + "epoch": 2.89, + "learning_rate": 6.610829789835981e-05, + "loss": 0.1025, + "step": 429000 + }, + { + "epoch": 2.89, + "learning_rate": 6.595833790620997e-05, + "loss": 0.0963, + "step": 429200 + }, + { + "epoch": 2.89, + "learning_rate": 6.580851130389762e-05, + "loss": 0.0958, + "step": 429400 + }, + { + "epoch": 2.89, + "learning_rate": 6.56588182591975e-05, + "loss": 0.1033, + "step": 429600 + }, + { + "epoch": 2.9, + "learning_rate": 6.550925893973497e-05, + "loss": 0.1042, + "step": 429800 + }, + { + "epoch": 2.9, + "learning_rate": 6.535983351298554e-05, + "loss": 0.0875, + "step": 430000 + }, + { + "epoch": 2.9, + "learning_rate": 6.521054214627478e-05, + "loss": 0.0978, + "step": 430200 + }, + { + "epoch": 2.9, + "learning_rate": 6.506138500677804e-05, + "loss": 0.0912, + "step": 430400 + }, + { + "epoch": 2.9, + "learning_rate": 6.491236226152055e-05, + "loss": 0.0979, + "step": 430600 + }, + { + "epoch": 2.9, + "learning_rate": 6.476347407737704e-05, + "loss": 0.1066, + "step": 430800 + }, + { + "epoch": 2.9, + "learning_rate": 6.461472062107139e-05, + "loss": 0.1031, + "step": 431000 + }, + { + "epoch": 2.9, + "learning_rate": 6.446610205917668e-05, + "loss": 0.0903, + "step": 431200 + }, + { + "epoch": 2.91, + "learning_rate": 6.431761855811501e-05, + "loss": 0.0993, + "step": 431400 + }, + { + "epoch": 2.91, + "learning_rate": 6.416927028415727e-05, + "loss": 0.1095, + "step": 431600 + }, + { + "epoch": 2.91, + "learning_rate": 6.402179813076112e-05, + "loss": 0.0983, + "step": 431800 + }, + { + "epoch": 2.91, + "learning_rate": 6.387372013100907e-05, + "loss": 0.1057, + "step": 432000 + }, + { + "epoch": 2.91, + "learning_rate": 6.372651722892513e-05, + "loss": 0.1134, + "step": 432200 + }, + { + "epoch": 2.91, + "learning_rate": 6.357871016333328e-05, + "loss": 0.0924, + "step": 432400 + }, + { + "epoch": 2.91, + "learning_rate": 6.343103915226995e-05, + "loss": 0.1137, + "step": 432600 + }, + { + "epoch": 2.92, + "learning_rate": 6.3283504361096e-05, + "loss": 0.0977, + "step": 432800 + }, + { + "epoch": 2.92, + "learning_rate": 6.313610595502e-05, + "loss": 0.0962, + "step": 433000 + }, + { + "epoch": 2.92, + "learning_rate": 6.298884409909755e-05, + "loss": 0.0896, + "step": 433200 + }, + { + "epoch": 2.92, + "learning_rate": 6.284171895823159e-05, + "loss": 0.0928, + "step": 433400 + }, + { + "epoch": 2.92, + "learning_rate": 6.269473069717178e-05, + "loss": 0.1085, + "step": 433600 + }, + { + "epoch": 2.92, + "learning_rate": 6.254787948051462e-05, + "loss": 0.1039, + "step": 433800 + }, + { + "epoch": 2.92, + "learning_rate": 6.240116547270303e-05, + "loss": 0.1079, + "step": 434000 + }, + { + "epoch": 2.93, + "learning_rate": 6.225458883802649e-05, + "loss": 0.096, + "step": 434200 + }, + { + "epoch": 2.93, + "learning_rate": 6.210814974062042e-05, + "loss": 0.097, + "step": 434400 + }, + { + "epoch": 2.93, + "learning_rate": 6.196184834446627e-05, + "loss": 0.0947, + "step": 434600 + }, + { + "epoch": 2.93, + "learning_rate": 6.181568481339145e-05, + "loss": 0.0931, + "step": 434800 + }, + { + "epoch": 2.93, + "learning_rate": 6.16696593110689e-05, + "loss": 0.099, + "step": 435000 + }, + { + "epoch": 2.93, + "learning_rate": 6.152377200101692e-05, + "loss": 0.1054, + "step": 435200 + }, + { + "epoch": 2.93, + "learning_rate": 6.13780230465991e-05, + "loss": 0.1034, + "step": 435400 + }, + { + "epoch": 2.93, + "learning_rate": 6.123241261102416e-05, + "loss": 0.0923, + "step": 435600 + }, + { + "epoch": 2.94, + "learning_rate": 6.108694085734573e-05, + "loss": 0.1077, + "step": 435800 + }, + { + "epoch": 2.94, + "learning_rate": 6.0941607948462065e-05, + "loss": 0.1018, + "step": 436000 + }, + { + "epoch": 2.94, + "learning_rate": 6.079641404711589e-05, + "loss": 0.0982, + "step": 436200 + }, + { + "epoch": 2.94, + "learning_rate": 6.065135931589437e-05, + "loss": 0.0911, + "step": 436400 + }, + { + "epoch": 2.94, + "learning_rate": 6.0506443917228924e-05, + "loss": 0.0906, + "step": 436600 + }, + { + "epoch": 2.94, + "learning_rate": 6.036166801339474e-05, + "loss": 0.0959, + "step": 436800 + }, + { + "epoch": 2.94, + "learning_rate": 6.021775460008057e-05, + "loss": 0.0941, + "step": 437000 + }, + { + "epoch": 2.95, + "learning_rate": 6.007397961018465e-05, + "loss": 0.1101, + "step": 437200 + }, + { + "epoch": 2.95, + "learning_rate": 5.992962176232532e-05, + "loss": 0.1018, + "step": 437400 + }, + { + "epoch": 2.95, + "learning_rate": 5.978540405521875e-05, + "loss": 0.104, + "step": 437600 + }, + { + "epoch": 2.95, + "learning_rate": 5.9641326650358936e-05, + "loss": 0.0855, + "step": 437800 + }, + { + "epoch": 2.95, + "learning_rate": 5.949738970908289e-05, + "loss": 0.1033, + "step": 438000 + }, + { + "epoch": 2.95, + "learning_rate": 5.935359339257013e-05, + "loss": 0.0987, + "step": 438200 + }, + { + "epoch": 2.95, + "learning_rate": 5.920993786184275e-05, + "loss": 0.1051, + "step": 438400 + }, + { + "epoch": 2.95, + "learning_rate": 5.906642327776529e-05, + "loss": 0.1073, + "step": 438600 + }, + { + "epoch": 2.96, + "learning_rate": 5.8923049801044444e-05, + "loss": 0.0945, + "step": 438800 + }, + { + "epoch": 2.96, + "learning_rate": 5.8779817592228824e-05, + "loss": 0.0989, + "step": 439000 + }, + { + "epoch": 2.96, + "learning_rate": 5.863672681170879e-05, + "loss": 0.1021, + "step": 439200 + }, + { + "epoch": 2.96, + "learning_rate": 5.8493777619716514e-05, + "loss": 0.0914, + "step": 439400 + }, + { + "epoch": 2.96, + "learning_rate": 5.835097017632559e-05, + "loss": 0.0945, + "step": 439600 + }, + { + "epoch": 2.96, + "learning_rate": 5.820830464145072e-05, + "loss": 0.0915, + "step": 439800 + }, + { + "epoch": 2.96, + "learning_rate": 5.806578117484779e-05, + "loss": 0.1051, + "step": 440000 + }, + { + "epoch": 2.97, + "learning_rate": 5.792339993611364e-05, + "loss": 0.0911, + "step": 440200 + }, + { + "epoch": 2.97, + "learning_rate": 5.7781161084685854e-05, + "loss": 0.0983, + "step": 440400 + }, + { + "epoch": 2.97, + "learning_rate": 5.7639064779842496e-05, + "loss": 0.1068, + "step": 440600 + }, + { + "epoch": 2.97, + "learning_rate": 5.749711118070199e-05, + "loss": 0.1034, + "step": 440800 + }, + { + "epoch": 2.97, + "learning_rate": 5.735530044622303e-05, + "loss": 0.098, + "step": 441000 + }, + { + "epoch": 2.97, + "learning_rate": 5.7213632735204417e-05, + "loss": 0.1024, + "step": 441200 + }, + { + "epoch": 2.97, + "learning_rate": 5.70721082062846e-05, + "loss": 0.104, + "step": 441400 + }, + { + "epoch": 2.97, + "learning_rate": 5.693072701794174e-05, + "loss": 0.0931, + "step": 441600 + }, + { + "epoch": 2.98, + "learning_rate": 5.6789489328493565e-05, + "loss": 0.1019, + "step": 441800 + }, + { + "epoch": 2.98, + "learning_rate": 5.6648395296097156e-05, + "loss": 0.108, + "step": 442000 + }, + { + "epoch": 2.98, + "learning_rate": 5.650744507874858e-05, + "loss": 0.0971, + "step": 442200 + }, + { + "epoch": 2.98, + "learning_rate": 5.636663883428288e-05, + "loss": 0.0936, + "step": 442400 + }, + { + "epoch": 2.98, + "learning_rate": 5.622597672037397e-05, + "loss": 0.0941, + "step": 442600 + }, + { + "epoch": 2.98, + "learning_rate": 5.6086161124486716e-05, + "loss": 0.1003, + "step": 442800 + }, + { + "epoch": 2.98, + "learning_rate": 5.594578702144904e-05, + "loss": 0.091, + "step": 443000 + }, + { + "epoch": 2.99, + "learning_rate": 5.580555752023524e-05, + "loss": 0.0984, + "step": 443200 + }, + { + "epoch": 2.99, + "learning_rate": 5.566547277787321e-05, + "loss": 0.1192, + "step": 443400 + }, + { + "epoch": 2.99, + "learning_rate": 5.5525532951229e-05, + "loss": 0.0958, + "step": 443600 + }, + { + "epoch": 2.99, + "learning_rate": 5.53857381970062e-05, + "loss": 0.1088, + "step": 443800 + }, + { + "epoch": 2.99, + "learning_rate": 5.5246088671746126e-05, + "loss": 0.0956, + "step": 444000 + }, + { + "epoch": 2.99, + "learning_rate": 5.510728169062227e-05, + "loss": 0.1229, + "step": 444200 + }, + { + "epoch": 2.99, + "learning_rate": 5.496792236416436e-05, + "loss": 0.0951, + "step": 444400 + }, + { + "epoch": 3.0, + "learning_rate": 5.482870873453642e-05, + "loss": 0.0956, + "step": 444600 + }, + { + "epoch": 3.0, + "learning_rate": 5.4689640957628964e-05, + "loss": 0.1208, + "step": 444800 + }, + { + "epoch": 3.0, + "learning_rate": 5.455071918916899e-05, + "loss": 0.0985, + "step": 445000 + }, + { + "epoch": 3.0, + "learning_rate": 5.441194358472039e-05, + "loss": 0.0906, + "step": 445200 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.9445510110929896, + "eval_auc": 0.8424179078508613, + "eval_f1": 0.3554441818087852, + "eval_loss": 0.5060210824012756, + "eval_mcc": 0.39553114656159183, + "eval_precision": 0.2343126275786286, + "eval_recall": 0.7358567800530282, + "eval_runtime": 9304.6272, + "eval_samples_per_second": 23.711, + "eval_steps_per_second": 3.952, + "step": 445320 + } + ], + "logging_steps": 200, + "max_steps": 593760, + "num_train_epochs": 4, + "save_steps": 500, + "total_flos": 5.353126841884437e+17, + "trial_name": null, + "trial_params": null +}