{ "best_metric": 0.35228549223252686, "best_model_checkpoint": "esm2_t12_35M_lora_binding_sites_2023-09-24_04-19-04/checkpoint-148440", "epoch": 1.0, "eval_steps": 500, "global_step": 148440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00037015670298819826, "loss": 0.5221, "step": 200 }, { "epoch": 0.0, "learning_rate": 0.0003701563931502581, "loss": 0.3727, "step": 400 }, { "epoch": 0.0, "learning_rate": 0.00037015587606324446, "loss": 0.3152, "step": 600 }, { "epoch": 0.01, "learning_rate": 0.00037015515172773665, "loss": 0.2911, "step": 800 }, { "epoch": 0.01, "learning_rate": 0.0003701542201445457, "loss": 0.2675, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.0003701530813147147, "loss": 0.2512, "step": 1200 }, { "epoch": 0.01, "learning_rate": 0.00037015173523951894, "loss": 0.2466, "step": 1400 }, { "epoch": 0.01, "learning_rate": 0.00037015018192046586, "loss": 0.2271, "step": 1600 }, { "epoch": 0.01, "learning_rate": 0.0003701484213592947, "loss": 0.2265, "step": 1800 }, { "epoch": 0.01, "learning_rate": 0.000370146453557977, "loss": 0.2299, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.0003701442899094131, "loss": 0.22, "step": 2200 }, { "epoch": 0.02, "learning_rate": 0.0003701419086708161, "loss": 0.2157, "step": 2400 }, { "epoch": 0.02, "learning_rate": 0.00037013932019936536, "loss": 0.215, "step": 2600 }, { "epoch": 0.02, "learning_rate": 0.0003701365244979595, "loss": 0.2036, "step": 2800 }, { "epoch": 0.02, "learning_rate": 0.00037013352156972917, "loss": 0.1992, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.00037013031141803696, "loss": 0.2077, "step": 3200 }, { "epoch": 0.02, "learning_rate": 0.00037012689404647756, "loss": 0.1882, "step": 3400 }, { "epoch": 0.02, "learning_rate": 0.0003701232694588778, "loss": 0.1935, "step": 3600 }, { "epoch": 0.03, "learning_rate": 0.00037011943765929636, "loss": 0.1966, "step": 3800 }, { "epoch": 0.03, "learning_rate": 0.0003701154193624823, "loss": 0.1852, "step": 4000 }, { "epoch": 0.03, "learning_rate": 0.0003701111741880463, "loss": 0.1871, "step": 4200 }, { "epoch": 0.03, "learning_rate": 0.00037010672181517294, "loss": 0.1653, "step": 4400 }, { "epoch": 0.03, "learning_rate": 0.0003701020622488479, "loss": 0.1865, "step": 4600 }, { "epoch": 0.03, "learning_rate": 0.00037009719549428895, "loss": 0.1746, "step": 4800 }, { "epoch": 0.03, "learning_rate": 0.0003700921215569458, "loss": 0.1787, "step": 5000 }, { "epoch": 0.04, "learning_rate": 0.00037008684044250026, "loss": 0.1873, "step": 5200 }, { "epoch": 0.04, "learning_rate": 0.00037008135215686603, "loss": 0.1742, "step": 5400 }, { "epoch": 0.04, "learning_rate": 0.0003700756567061889, "loss": 0.1777, "step": 5600 }, { "epoch": 0.04, "learning_rate": 0.0003700697540968466, "loss": 0.1792, "step": 5800 }, { "epoch": 0.04, "learning_rate": 0.00037006364433544883, "loss": 0.1794, "step": 6000 }, { "epoch": 0.04, "learning_rate": 0.0003700573274288372, "loss": 0.1708, "step": 6200 }, { "epoch": 0.04, "learning_rate": 0.00037005080338408546, "loss": 0.161, "step": 6400 }, { "epoch": 0.04, "learning_rate": 0.0003700440722084991, "loss": 0.1661, "step": 6600 }, { "epoch": 0.05, "learning_rate": 0.00037003720431783943, "loss": 0.1833, "step": 6800 }, { "epoch": 0.05, "learning_rate": 0.0003700300609745444, "loss": 0.1802, "step": 7000 }, { "epoch": 0.05, "learning_rate": 0.00037002271052364195, "loss": 0.1557, "step": 7200 }, { "epoch": 0.05, "learning_rate": 0.0003700151529733631, "loss": 0.1763, "step": 7400 }, { "epoch": 0.05, "learning_rate": 0.0003700073883321708, "loss": 0.1661, "step": 7600 }, { "epoch": 0.05, "learning_rate": 0.0003699994166087597, "loss": 0.152, "step": 7800 }, { "epoch": 0.05, "learning_rate": 0.00036999123781205663, "loss": 0.1616, "step": 8000 }, { "epoch": 0.06, "learning_rate": 0.00036998285195121995, "loss": 0.1552, "step": 8200 }, { "epoch": 0.06, "learning_rate": 0.00036997425903564034, "loss": 0.1595, "step": 8400 }, { "epoch": 0.06, "learning_rate": 0.00036996545907493994, "loss": 0.1567, "step": 8600 }, { "epoch": 0.06, "learning_rate": 0.0003699564520789729, "loss": 0.1572, "step": 8800 }, { "epoch": 0.06, "learning_rate": 0.00036994723805782516, "loss": 0.1628, "step": 9000 }, { "epoch": 0.06, "learning_rate": 0.00036993781702181466, "loss": 0.1562, "step": 9200 }, { "epoch": 0.06, "learning_rate": 0.00036992818898149083, "loss": 0.1583, "step": 9400 }, { "epoch": 0.06, "learning_rate": 0.00036991835394763526, "loss": 0.1514, "step": 9600 }, { "epoch": 0.07, "learning_rate": 0.000369908311931261, "loss": 0.1651, "step": 9800 }, { "epoch": 0.07, "learning_rate": 0.0003698980629436131, "loss": 0.1488, "step": 10000 }, { "epoch": 0.07, "learning_rate": 0.0003698876069961683, "loss": 0.1471, "step": 10200 }, { "epoch": 0.07, "learning_rate": 0.0003698769441006351, "loss": 0.1516, "step": 10400 }, { "epoch": 0.07, "learning_rate": 0.0003698660742689538, "loss": 0.1411, "step": 10600 }, { "epoch": 0.07, "learning_rate": 0.0003698549975132963, "loss": 0.1573, "step": 10800 }, { "epoch": 0.07, "learning_rate": 0.00036984377077907405, "loss": 0.1472, "step": 11000 }, { "epoch": 0.08, "learning_rate": 0.0003698322812473697, "loss": 0.1445, "step": 11200 }, { "epoch": 0.08, "learning_rate": 0.0003698205848295303, "loss": 0.1544, "step": 11400 }, { "epoch": 0.08, "learning_rate": 0.0003698086815386536, "loss": 0.1545, "step": 11600 }, { "epoch": 0.08, "learning_rate": 0.00036979657138806857, "loss": 0.1405, "step": 11800 }, { "epoch": 0.08, "learning_rate": 0.00036978425439133624, "loss": 0.1558, "step": 12000 }, { "epoch": 0.08, "learning_rate": 0.00036977173056224906, "loss": 0.154, "step": 12200 }, { "epoch": 0.08, "learning_rate": 0.0003697589999148311, "loss": 0.1572, "step": 12400 }, { "epoch": 0.08, "learning_rate": 0.0003697460624633381, "loss": 0.1462, "step": 12600 }, { "epoch": 0.09, "learning_rate": 0.0003697329182222573, "loss": 0.1413, "step": 12800 }, { "epoch": 0.09, "learning_rate": 0.0003697195672063075, "loss": 0.1513, "step": 13000 }, { "epoch": 0.09, "learning_rate": 0.00036970600943043915, "loss": 0.1496, "step": 13200 }, { "epoch": 0.09, "learning_rate": 0.00036969231424668924, "loss": 0.1537, "step": 13400 }, { "epoch": 0.09, "learning_rate": 0.0003696783440303689, "loss": 0.147, "step": 13600 }, { "epoch": 0.09, "learning_rate": 0.0003696641671002913, "loss": 0.153, "step": 13800 }, { "epoch": 0.09, "learning_rate": 0.0003696497834723318, "loss": 0.1424, "step": 14000 }, { "epoch": 0.1, "learning_rate": 0.0003696351931625971, "loss": 0.1413, "step": 14200 }, { "epoch": 0.1, "learning_rate": 0.00036962039618742523, "loss": 0.1382, "step": 14400 }, { "epoch": 0.1, "learning_rate": 0.00036960539256338583, "loss": 0.1432, "step": 14600 }, { "epoch": 0.1, "learning_rate": 0.0003695901823072799, "loss": 0.1377, "step": 14800 }, { "epoch": 0.1, "learning_rate": 0.0003695747654361396, "loss": 0.1535, "step": 15000 }, { "epoch": 0.1, "learning_rate": 0.0003695591419672288, "loss": 0.1338, "step": 15200 }, { "epoch": 0.1, "learning_rate": 0.0003695433915821276, "loss": 0.1478, "step": 15400 }, { "epoch": 0.11, "learning_rate": 0.0003695273560031605, "loss": 0.1427, "step": 15600 }, { "epoch": 0.11, "learning_rate": 0.00036951111387951155, "loss": 0.149, "step": 15800 }, { "epoch": 0.11, "learning_rate": 0.00036949466522936866, "loss": 0.1435, "step": 16000 }, { "epoch": 0.11, "learning_rate": 0.0003694780100711509, "loss": 0.1281, "step": 16200 }, { "epoch": 0.11, "learning_rate": 0.00036946123324535805, "loss": 0.156, "step": 16400 }, { "epoch": 0.11, "learning_rate": 0.0003694441661594781, "loss": 0.1463, "step": 16600 }, { "epoch": 0.11, "learning_rate": 0.0003694268926220718, "loss": 0.1403, "step": 16800 }, { "epoch": 0.11, "learning_rate": 0.00036940941265248186, "loss": 0.1346, "step": 17000 }, { "epoch": 0.12, "learning_rate": 0.0003693917262702823, "loss": 0.1271, "step": 17200 }, { "epoch": 0.12, "learning_rate": 0.0003693738334952782, "loss": 0.1397, "step": 17400 }, { "epoch": 0.12, "learning_rate": 0.0003693557343475058, "loss": 0.1281, "step": 17600 }, { "epoch": 0.12, "learning_rate": 0.0003693374288472324, "loss": 0.1448, "step": 17800 }, { "epoch": 0.12, "learning_rate": 0.00036931891701495626, "loss": 0.1354, "step": 18000 }, { "epoch": 0.12, "learning_rate": 0.0003693001988714069, "loss": 0.1408, "step": 18200 }, { "epoch": 0.12, "learning_rate": 0.0003692812744375448, "loss": 0.131, "step": 18400 }, { "epoch": 0.13, "learning_rate": 0.0003692621437345614, "loss": 0.1347, "step": 18600 }, { "epoch": 0.13, "learning_rate": 0.00036924280678387906, "loss": 0.1622, "step": 18800 }, { "epoch": 0.13, "learning_rate": 0.00036922326360715115, "loss": 0.1395, "step": 19000 }, { "epoch": 0.13, "learning_rate": 0.0003692035142262621, "loss": 0.1348, "step": 19200 }, { "epoch": 0.13, "learning_rate": 0.00036918355866332706, "loss": 0.1491, "step": 19400 }, { "epoch": 0.13, "learning_rate": 0.0003691633969406922, "loss": 0.1351, "step": 19600 }, { "epoch": 0.13, "learning_rate": 0.00036914302908093444, "loss": 0.1387, "step": 19800 }, { "epoch": 0.13, "learning_rate": 0.0003691224551068615, "loss": 0.1466, "step": 20000 }, { "epoch": 0.14, "learning_rate": 0.0003691016750415121, "loss": 0.1238, "step": 20200 }, { "epoch": 0.14, "learning_rate": 0.00036908068890815544, "loss": 0.1465, "step": 20400 }, { "epoch": 0.14, "learning_rate": 0.00036905949673029186, "loss": 0.1405, "step": 20600 }, { "epoch": 0.14, "learning_rate": 0.0003690382060350824, "loss": 0.1367, "step": 20800 }, { "epoch": 0.14, "learning_rate": 0.0003690166028695521, "loss": 0.138, "step": 21000 }, { "epoch": 0.14, "learning_rate": 0.00036899479373127795, "loss": 0.1339, "step": 21200 }, { "epoch": 0.14, "learning_rate": 0.0003689727786446815, "loss": 0.1398, "step": 21400 }, { "epoch": 0.15, "learning_rate": 0.0003689505576344153, "loss": 0.1278, "step": 21600 }, { "epoch": 0.15, "learning_rate": 0.0003689281307253622, "loss": 0.1472, "step": 21800 }, { "epoch": 0.15, "learning_rate": 0.00036890549794263563, "loss": 0.1371, "step": 22000 }, { "epoch": 0.15, "learning_rate": 0.00036888265931157973, "loss": 0.1401, "step": 22200 }, { "epoch": 0.15, "learning_rate": 0.000368859614857769, "loss": 0.1349, "step": 22400 }, { "epoch": 0.15, "learning_rate": 0.0003688363646070085, "loss": 0.1389, "step": 22600 }, { "epoch": 0.15, "learning_rate": 0.0003688130263772538, "loss": 0.1367, "step": 22800 }, { "epoch": 0.15, "learning_rate": 0.000368789365639588, "loss": 0.138, "step": 23000 }, { "epoch": 0.16, "learning_rate": 0.000368765499183637, "loss": 0.1284, "step": 23200 }, { "epoch": 0.16, "learning_rate": 0.00036874142703612636, "loss": 0.1224, "step": 23400 }, { "epoch": 0.16, "learning_rate": 0.00036871714922401174, "loss": 0.1329, "step": 23600 }, { "epoch": 0.16, "learning_rate": 0.00036869266577447944, "loss": 0.1408, "step": 23800 }, { "epoch": 0.16, "learning_rate": 0.0003686679767149457, "loss": 0.1425, "step": 24000 }, { "epoch": 0.16, "learning_rate": 0.0003686430820730573, "loss": 0.1288, "step": 24200 }, { "epoch": 0.16, "learning_rate": 0.0003686179818766911, "loss": 0.1299, "step": 24400 }, { "epoch": 0.17, "learning_rate": 0.00036859267615395395, "loss": 0.1377, "step": 24600 }, { "epoch": 0.17, "learning_rate": 0.00036856729300041623, "loss": 0.1319, "step": 24800 }, { "epoch": 0.17, "learning_rate": 0.00036854157733745503, "loss": 0.1284, "step": 25000 }, { "epoch": 0.17, "learning_rate": 0.00036851565623368017, "loss": 0.1223, "step": 25200 }, { "epoch": 0.17, "learning_rate": 0.0003684895297181181, "loss": 0.146, "step": 25400 }, { "epoch": 0.17, "learning_rate": 0.0003684631978200251, "loss": 0.1486, "step": 25600 }, { "epoch": 0.17, "learning_rate": 0.00036843666056888733, "loss": 0.1227, "step": 25800 }, { "epoch": 0.18, "learning_rate": 0.0003684099179944211, "loss": 0.1254, "step": 26000 }, { "epoch": 0.18, "learning_rate": 0.0003683829701265725, "loss": 0.1289, "step": 26200 }, { "epoch": 0.18, "learning_rate": 0.0003683558169955177, "loss": 0.129, "step": 26400 }, { "epoch": 0.18, "learning_rate": 0.00036832845863166245, "loss": 0.1296, "step": 26600 }, { "epoch": 0.18, "learning_rate": 0.0003683008950656426, "loss": 0.1338, "step": 26800 }, { "epoch": 0.18, "learning_rate": 0.0003682731263283235, "loss": 0.1383, "step": 27000 }, { "epoch": 0.18, "learning_rate": 0.0003682452928304226, "loss": 0.1334, "step": 27200 }, { "epoch": 0.18, "learning_rate": 0.0003682171148694867, "loss": 0.1387, "step": 27400 }, { "epoch": 0.19, "learning_rate": 0.0003681887318310681, "loss": 0.1242, "step": 27600 }, { "epoch": 0.19, "learning_rate": 0.00036816014374694997, "loss": 0.1278, "step": 27800 }, { "epoch": 0.19, "learning_rate": 0.000368131350649145, "loss": 0.133, "step": 28000 }, { "epoch": 0.19, "learning_rate": 0.0003681023525698955, "loss": 0.1261, "step": 28200 }, { "epoch": 0.19, "learning_rate": 0.0003680731495416734, "loss": 0.1358, "step": 28400 }, { "epoch": 0.19, "learning_rate": 0.00036804374159717996, "loss": 0.1362, "step": 28600 }, { "epoch": 0.19, "learning_rate": 0.0003680141287693461, "loss": 0.1291, "step": 28800 }, { "epoch": 0.2, "learning_rate": 0.00036798431109133197, "loss": 0.1252, "step": 29000 }, { "epoch": 0.2, "learning_rate": 0.0003679542885965272, "loss": 0.1286, "step": 29200 }, { "epoch": 0.2, "learning_rate": 0.0003679240613185508, "loss": 0.1281, "step": 29400 }, { "epoch": 0.2, "learning_rate": 0.0003678937819606453, "loss": 0.1355, "step": 29600 }, { "epoch": 0.2, "learning_rate": 0.00036786314624159095, "loss": 0.1357, "step": 29800 }, { "epoch": 0.2, "learning_rate": 0.0003678323058414256, "loss": 0.1293, "step": 30000 }, { "epoch": 0.2, "learning_rate": 0.0003678012607946841, "loss": 0.132, "step": 30200 }, { "epoch": 0.2, "learning_rate": 0.0003677700111361304, "loss": 0.1237, "step": 30400 }, { "epoch": 0.21, "learning_rate": 0.0003677385569007577, "loss": 0.1306, "step": 30600 }, { "epoch": 0.21, "learning_rate": 0.0003677068981237883, "loss": 0.1231, "step": 30800 }, { "epoch": 0.21, "learning_rate": 0.00036767519466573924, "loss": 0.1405, "step": 31000 }, { "epoch": 0.21, "learning_rate": 0.0003676431279344224, "loss": 0.127, "step": 31200 }, { "epoch": 0.21, "learning_rate": 0.00036761085676836956, "loss": 0.1336, "step": 31400 }, { "epoch": 0.21, "learning_rate": 0.0003675783812037177, "loss": 0.1346, "step": 31600 }, { "epoch": 0.21, "learning_rate": 0.0003675457012768328, "loss": 0.117, "step": 31800 }, { "epoch": 0.22, "learning_rate": 0.00036751281702430955, "loss": 0.1277, "step": 32000 }, { "epoch": 0.22, "learning_rate": 0.0003674797284829716, "loss": 0.1181, "step": 32200 }, { "epoch": 0.22, "learning_rate": 0.0003674464356898712, "loss": 0.124, "step": 32400 }, { "epoch": 0.22, "learning_rate": 0.0003674129386822893, "loss": 0.137, "step": 32600 }, { "epoch": 0.22, "learning_rate": 0.0003673792374977358, "loss": 0.1154, "step": 32800 }, { "epoch": 0.22, "learning_rate": 0.000367345332173949, "loss": 0.1548, "step": 33000 }, { "epoch": 0.22, "learning_rate": 0.0003673112227488958, "loss": 0.1304, "step": 33200 }, { "epoch": 0.23, "learning_rate": 0.0003672769092607718, "loss": 0.113, "step": 33400 }, { "epoch": 0.23, "learning_rate": 0.0003672423917480009, "loss": 0.1383, "step": 33600 }, { "epoch": 0.23, "learning_rate": 0.0003672076702492356, "loss": 0.1286, "step": 33800 }, { "epoch": 0.23, "learning_rate": 0.0003671727448033568, "loss": 0.1233, "step": 34000 }, { "epoch": 0.23, "learning_rate": 0.0003671376154494739, "loss": 0.1311, "step": 34200 }, { "epoch": 0.23, "learning_rate": 0.00036710228222692435, "loss": 0.1342, "step": 34400 }, { "epoch": 0.23, "learning_rate": 0.0003670669233674915, "loss": 0.1317, "step": 34600 }, { "epoch": 0.23, "learning_rate": 0.0003670311835453819, "loss": 0.1343, "step": 34800 }, { "epoch": 0.24, "learning_rate": 0.00036699523997378743, "loss": 0.1177, "step": 35000 }, { "epoch": 0.24, "learning_rate": 0.00036695909269295746, "loss": 0.1269, "step": 35200 }, { "epoch": 0.24, "learning_rate": 0.0003669227417433694, "loss": 0.1374, "step": 35400 }, { "epoch": 0.24, "learning_rate": 0.0003668861871657289, "loss": 0.1216, "step": 35600 }, { "epoch": 0.24, "learning_rate": 0.00036684942900096955, "loss": 0.1234, "step": 35800 }, { "epoch": 0.24, "learning_rate": 0.00036681246729025277, "loss": 0.1312, "step": 36000 }, { "epoch": 0.24, "learning_rate": 0.0003667753020749681, "loss": 0.1219, "step": 36200 }, { "epoch": 0.25, "learning_rate": 0.0003667379333967329, "loss": 0.1231, "step": 36400 }, { "epoch": 0.25, "learning_rate": 0.00036670036129739236, "loss": 0.1236, "step": 36600 }, { "epoch": 0.25, "learning_rate": 0.0003666625858190194, "loss": 0.1299, "step": 36800 }, { "epoch": 0.25, "learning_rate": 0.00036662460700391487, "loss": 0.1284, "step": 37000 }, { "epoch": 0.25, "learning_rate": 0.0003665864248946071, "loss": 0.1276, "step": 37200 }, { "epoch": 0.25, "learning_rate": 0.00036654803953385206, "loss": 0.1227, "step": 37400 }, { "epoch": 0.25, "learning_rate": 0.00036650945096463363, "loss": 0.1275, "step": 37600 }, { "epoch": 0.25, "learning_rate": 0.0003664706592301629, "loss": 0.1338, "step": 37800 }, { "epoch": 0.26, "learning_rate": 0.00036643166437387857, "loss": 0.1248, "step": 38000 }, { "epoch": 0.26, "learning_rate": 0.00036639246643944696, "loss": 0.1234, "step": 38200 }, { "epoch": 0.26, "learning_rate": 0.00036635306547076144, "loss": 0.1274, "step": 38400 }, { "epoch": 0.26, "learning_rate": 0.0003663134615119431, "loss": 0.1211, "step": 38600 }, { "epoch": 0.26, "learning_rate": 0.00036627365460734014, "loss": 0.1162, "step": 38800 }, { "epoch": 0.26, "learning_rate": 0.0003662336448015281, "loss": 0.1093, "step": 39000 }, { "epoch": 0.26, "learning_rate": 0.00036619363370715154, "loss": 0.1281, "step": 39200 }, { "epoch": 0.27, "learning_rate": 0.000366153219247501, "loss": 0.1143, "step": 39400 }, { "epoch": 0.27, "learning_rate": 0.00036611260202150397, "loss": 0.1301, "step": 39600 }, { "epoch": 0.27, "learning_rate": 0.0003660717820746435, "loss": 0.1312, "step": 39800 }, { "epoch": 0.27, "learning_rate": 0.0003660307594526293, "loss": 0.1249, "step": 40000 }, { "epoch": 0.27, "learning_rate": 0.00036598953420139834, "loss": 0.13, "step": 40200 }, { "epoch": 0.27, "learning_rate": 0.00036594810636711434, "loss": 0.1215, "step": 40400 }, { "epoch": 0.27, "learning_rate": 0.000365906475996168, "loss": 0.1347, "step": 40600 }, { "epoch": 0.27, "learning_rate": 0.00036586464313517655, "loss": 0.1219, "step": 40800 }, { "epoch": 0.28, "learning_rate": 0.00036582260783098424, "loss": 0.1228, "step": 41000 }, { "epoch": 0.28, "learning_rate": 0.000365780370130662, "loss": 0.1263, "step": 41200 }, { "epoch": 0.28, "learning_rate": 0.0003657379300815072, "loss": 0.1278, "step": 41400 }, { "epoch": 0.28, "learning_rate": 0.00036569528773104393, "loss": 0.1191, "step": 41600 }, { "epoch": 0.28, "learning_rate": 0.0003656526578530694, "loss": 0.1304, "step": 41800 }, { "epoch": 0.28, "learning_rate": 0.00036560961205437574, "loss": 0.1327, "step": 42000 }, { "epoch": 0.28, "learning_rate": 0.0003655663640980633, "loss": 0.1281, "step": 42200 }, { "epoch": 0.29, "learning_rate": 0.0003655229140325608, "loss": 0.1266, "step": 42400 }, { "epoch": 0.29, "learning_rate": 0.0003654792619065234, "loss": 0.1142, "step": 42600 }, { "epoch": 0.29, "learning_rate": 0.0003654354077688324, "loss": 0.1087, "step": 42800 }, { "epoch": 0.29, "learning_rate": 0.0003653913516685954, "loss": 0.1197, "step": 43000 }, { "epoch": 0.29, "learning_rate": 0.00036534709365514605, "loss": 0.1249, "step": 43200 }, { "epoch": 0.29, "learning_rate": 0.0003653026337780443, "loss": 0.1217, "step": 43400 }, { "epoch": 0.29, "learning_rate": 0.00036525797208707584, "loss": 0.1167, "step": 43600 }, { "epoch": 0.3, "learning_rate": 0.0003652133334513314, "loss": 0.1265, "step": 43800 }, { "epoch": 0.3, "learning_rate": 0.00036516826929133405, "loss": 0.1287, "step": 44000 }, { "epoch": 0.3, "learning_rate": 0.00036512300346793045, "loss": 0.1254, "step": 44200 }, { "epoch": 0.3, "learning_rate": 0.00036507753603180906, "loss": 0.1234, "step": 44400 }, { "epoch": 0.3, "learning_rate": 0.000365031867033884, "loss": 0.1256, "step": 44600 }, { "epoch": 0.3, "learning_rate": 0.00036498599652529513, "loss": 0.1295, "step": 44800 }, { "epoch": 0.3, "learning_rate": 0.00036493992455740784, "loss": 0.129, "step": 45000 }, { "epoch": 0.3, "learning_rate": 0.00036489365118181326, "loss": 0.1252, "step": 45200 }, { "epoch": 0.31, "learning_rate": 0.000364847176450328, "loss": 0.1276, "step": 45400 }, { "epoch": 0.31, "learning_rate": 0.00036480050041499414, "loss": 0.1102, "step": 45600 }, { "epoch": 0.31, "learning_rate": 0.0003647536231280792, "loss": 0.125, "step": 45800 }, { "epoch": 0.31, "learning_rate": 0.00036470678053490165, "loss": 0.1421, "step": 46000 }, { "epoch": 0.31, "learning_rate": 0.00036465950190812875, "loss": 0.1281, "step": 46200 }, { "epoch": 0.31, "learning_rate": 0.000364612022187664, "loss": 0.1147, "step": 46400 }, { "epoch": 0.31, "learning_rate": 0.0003645643414266747, "loss": 0.1263, "step": 46600 }, { "epoch": 0.32, "learning_rate": 0.0003645164596785535, "loss": 0.1255, "step": 46800 }, { "epoch": 0.32, "learning_rate": 0.00036446837699691823, "loss": 0.1227, "step": 47000 }, { "epoch": 0.32, "learning_rate": 0.00036442009343561136, "loss": 0.1171, "step": 47200 }, { "epoch": 0.32, "learning_rate": 0.00036437160904870047, "loss": 0.1179, "step": 47400 }, { "epoch": 0.32, "learning_rate": 0.00036432292389047827, "loss": 0.1258, "step": 47600 }, { "epoch": 0.32, "learning_rate": 0.0003642740380154618, "loss": 0.1297, "step": 47800 }, { "epoch": 0.32, "learning_rate": 0.00036422495147839327, "loss": 0.1272, "step": 48000 }, { "epoch": 0.32, "learning_rate": 0.0003641756643342394, "loss": 0.1152, "step": 48200 }, { "epoch": 0.33, "learning_rate": 0.0003641264245754532, "loss": 0.1162, "step": 48400 }, { "epoch": 0.33, "learning_rate": 0.00036407673738527173, "loss": 0.1195, "step": 48600 }, { "epoch": 0.33, "learning_rate": 0.00036402684975397404, "loss": 0.1252, "step": 48800 }, { "epoch": 0.33, "learning_rate": 0.0003639767617374241, "loss": 0.1175, "step": 49000 }, { "epoch": 0.33, "learning_rate": 0.00036392647339170993, "loss": 0.1249, "step": 49200 }, { "epoch": 0.33, "learning_rate": 0.0003638759847731442, "loss": 0.1199, "step": 49400 }, { "epoch": 0.33, "learning_rate": 0.0003638252959382637, "loss": 0.122, "step": 49600 }, { "epoch": 0.34, "learning_rate": 0.00036377440694382947, "loss": 0.1109, "step": 49800 }, { "epoch": 0.34, "learning_rate": 0.0003637233178468266, "loss": 0.1171, "step": 50000 }, { "epoch": 0.34, "learning_rate": 0.00036367202870446437, "loss": 0.1213, "step": 50200 }, { "epoch": 0.34, "learning_rate": 0.0003636207975172021, "loss": 0.1156, "step": 50400 }, { "epoch": 0.34, "learning_rate": 0.0003635691094561523, "loss": 0.1257, "step": 50600 }, { "epoch": 0.34, "learning_rate": 0.00036351722152242464, "loss": 0.1179, "step": 50800 }, { "epoch": 0.34, "learning_rate": 0.0003634651337741228, "loss": 0.1287, "step": 51000 }, { "epoch": 0.34, "learning_rate": 0.0003634128462695743, "loss": 0.1182, "step": 51200 }, { "epoch": 0.35, "learning_rate": 0.0003633606219999926, "loss": 0.1371, "step": 51400 }, { "epoch": 0.35, "learning_rate": 0.00036330793615687614, "loss": 0.1055, "step": 51600 }, { "epoch": 0.35, "learning_rate": 0.00036325505073354187, "loss": 0.1216, "step": 51800 }, { "epoch": 0.35, "learning_rate": 0.00036320196578921055, "loss": 0.1207, "step": 52000 }, { "epoch": 0.35, "learning_rate": 0.0003631486813833263, "loss": 0.131, "step": 52200 }, { "epoch": 0.35, "learning_rate": 0.00036309519757555674, "loss": 0.12, "step": 52400 }, { "epoch": 0.35, "learning_rate": 0.00036304151442579253, "loss": 0.1199, "step": 52600 }, { "epoch": 0.36, "learning_rate": 0.0003629876319941478, "loss": 0.116, "step": 52800 }, { "epoch": 0.36, "learning_rate": 0.00036293355034095967, "loss": 0.1134, "step": 53000 }, { "epoch": 0.36, "learning_rate": 0.00036287926952678845, "loss": 0.1133, "step": 53200 }, { "epoch": 0.36, "learning_rate": 0.00036282478961241737, "loss": 0.1254, "step": 53400 }, { "epoch": 0.36, "learning_rate": 0.00036277011065885274, "loss": 0.1207, "step": 53600 }, { "epoch": 0.36, "learning_rate": 0.00036271523272732363, "loss": 0.1222, "step": 53800 }, { "epoch": 0.36, "learning_rate": 0.00036266015587928203, "loss": 0.1161, "step": 54000 }, { "epoch": 0.37, "learning_rate": 0.0003626048801764025, "loss": 0.1216, "step": 54200 }, { "epoch": 0.37, "learning_rate": 0.00036254940568058254, "loss": 0.1253, "step": 54400 }, { "epoch": 0.37, "learning_rate": 0.000362493732453942, "loss": 0.1154, "step": 54600 }, { "epoch": 0.37, "learning_rate": 0.00036243786055882343, "loss": 0.1163, "step": 54800 }, { "epoch": 0.37, "learning_rate": 0.00036238179005779176, "loss": 0.1154, "step": 55000 }, { "epoch": 0.37, "learning_rate": 0.00036232552101363446, "loss": 0.1115, "step": 55200 }, { "epoch": 0.37, "learning_rate": 0.0003622693363205972, "loss": 0.1203, "step": 55400 }, { "epoch": 0.37, "learning_rate": 0.00036221267137136654, "loss": 0.1241, "step": 55600 }, { "epoch": 0.38, "learning_rate": 0.00036215580806838815, "loss": 0.1113, "step": 55800 }, { "epoch": 0.38, "learning_rate": 0.0003620987464753372, "loss": 0.1275, "step": 56000 }, { "epoch": 0.38, "learning_rate": 0.0003620417734481885, "loss": 0.1238, "step": 56200 }, { "epoch": 0.38, "learning_rate": 0.0003619843164575562, "loss": 0.1158, "step": 56400 }, { "epoch": 0.38, "learning_rate": 0.00036192666136888646, "loss": 0.113, "step": 56600 }, { "epoch": 0.38, "learning_rate": 0.000361868808246741, "loss": 0.114, "step": 56800 }, { "epoch": 0.38, "learning_rate": 0.00036181075715590336, "loss": 0.1304, "step": 57000 }, { "epoch": 0.39, "learning_rate": 0.00036175250816137877, "loss": 0.1105, "step": 57200 }, { "epoch": 0.39, "learning_rate": 0.0003616940613283941, "loss": 0.1187, "step": 57400 }, { "epoch": 0.39, "learning_rate": 0.00036163541672239765, "loss": 0.1166, "step": 57600 }, { "epoch": 0.39, "learning_rate": 0.00036157657440905933, "loss": 0.1209, "step": 57800 }, { "epoch": 0.39, "learning_rate": 0.00036151753445427023, "loss": 0.1363, "step": 58000 }, { "epoch": 0.39, "learning_rate": 0.00036145829692414304, "loss": 0.1096, "step": 58200 }, { "epoch": 0.39, "learning_rate": 0.0003613988618850114, "loss": 0.1188, "step": 58400 }, { "epoch": 0.39, "learning_rate": 0.00036133922940343035, "loss": 0.1083, "step": 58600 }, { "epoch": 0.4, "learning_rate": 0.0003612793995461759, "loss": 0.1197, "step": 58800 }, { "epoch": 0.4, "learning_rate": 0.00036121937238024515, "loss": 0.1279, "step": 59000 }, { "epoch": 0.4, "learning_rate": 0.00036115914797285614, "loss": 0.1189, "step": 59200 }, { "epoch": 0.4, "learning_rate": 0.00036109872639144766, "loss": 0.1356, "step": 59400 }, { "epoch": 0.4, "learning_rate": 0.00036103810770367947, "loss": 0.1222, "step": 59600 }, { "epoch": 0.4, "learning_rate": 0.00036097729197743193, "loss": 0.1201, "step": 59800 }, { "epoch": 0.4, "learning_rate": 0.000360916279280806, "loss": 0.1156, "step": 60000 }, { "epoch": 0.41, "learning_rate": 0.0003608550696821234, "loss": 0.1168, "step": 60200 }, { "epoch": 0.41, "learning_rate": 0.00036079366324992627, "loss": 0.1174, "step": 60400 }, { "epoch": 0.41, "learning_rate": 0.00036073236855829993, "loss": 0.1202, "step": 60600 }, { "epoch": 0.41, "learning_rate": 0.00036067056964888823, "loss": 0.1073, "step": 60800 }, { "epoch": 0.41, "learning_rate": 0.00036060857411256375, "loss": 0.1288, "step": 61000 }, { "epoch": 0.41, "learning_rate": 0.0003605463820187487, "loss": 0.1174, "step": 61200 }, { "epoch": 0.41, "learning_rate": 0.0003604839934370854, "loss": 0.1136, "step": 61400 }, { "epoch": 0.41, "learning_rate": 0.0003604214084374361, "loss": 0.112, "step": 61600 }, { "epoch": 0.42, "learning_rate": 0.00036035894148492016, "loss": 0.1091, "step": 61800 }, { "epoch": 0.42, "learning_rate": 0.0003602959648409783, "loss": 0.1329, "step": 62000 }, { "epoch": 0.42, "learning_rate": 0.00036023279198960366, "loss": 0.1262, "step": 62200 }, { "epoch": 0.42, "learning_rate": 0.0003601694230015367, "loss": 0.1231, "step": 62400 }, { "epoch": 0.42, "learning_rate": 0.00036010585794773764, "loss": 0.1107, "step": 62600 }, { "epoch": 0.42, "learning_rate": 0.0003600420968993861, "loss": 0.1299, "step": 62800 }, { "epoch": 0.42, "learning_rate": 0.0003599781399278814, "loss": 0.1161, "step": 63000 }, { "epoch": 0.43, "learning_rate": 0.00035991398710484205, "loss": 0.115, "step": 63200 }, { "epoch": 0.43, "learning_rate": 0.00035984963850210587, "loss": 0.1223, "step": 63400 }, { "epoch": 0.43, "learning_rate": 0.0003597850941917301, "loss": 0.1154, "step": 63600 }, { "epoch": 0.43, "learning_rate": 0.00035972035424599094, "loss": 0.1167, "step": 63800 }, { "epoch": 0.43, "learning_rate": 0.0003596554187373838, "loss": 0.1129, "step": 64000 }, { "epoch": 0.43, "learning_rate": 0.000359590287738623, "loss": 0.1255, "step": 64200 }, { "epoch": 0.43, "learning_rate": 0.0003595249613226417, "loss": 0.1212, "step": 64400 }, { "epoch": 0.44, "learning_rate": 0.0003594594395625922, "loss": 0.1202, "step": 64600 }, { "epoch": 0.44, "learning_rate": 0.0003593937225318451, "loss": 0.1145, "step": 64800 }, { "epoch": 0.44, "learning_rate": 0.00035932781030399, "loss": 0.1076, "step": 65000 }, { "epoch": 0.44, "learning_rate": 0.0003592617029528349, "loss": 0.1167, "step": 65200 }, { "epoch": 0.44, "learning_rate": 0.00035919540055240643, "loss": 0.1182, "step": 65400 }, { "epoch": 0.44, "learning_rate": 0.00035912890317694947, "loss": 0.1249, "step": 65600 }, { "epoch": 0.44, "learning_rate": 0.00035906221090092747, "loss": 0.1267, "step": 65800 }, { "epoch": 0.44, "learning_rate": 0.0003589953237990218, "loss": 0.1111, "step": 66000 }, { "epoch": 0.45, "learning_rate": 0.0003589282419461323, "loss": 0.1155, "step": 66200 }, { "epoch": 0.45, "learning_rate": 0.0003588613022841523, "loss": 0.1156, "step": 66400 }, { "epoch": 0.45, "learning_rate": 0.0003587938321276814, "loss": 0.1381, "step": 66600 }, { "epoch": 0.45, "learning_rate": 0.0003587261674458557, "loss": 0.1141, "step": 66800 }, { "epoch": 0.45, "learning_rate": 0.00035865830831444557, "loss": 0.1039, "step": 67000 }, { "epoch": 0.45, "learning_rate": 0.0003585902548094393, "loss": 0.1329, "step": 67200 }, { "epoch": 0.45, "learning_rate": 0.0003585220070070426, "loss": 0.1255, "step": 67400 }, { "epoch": 0.46, "learning_rate": 0.00035845356498367904, "loss": 0.12, "step": 67600 }, { "epoch": 0.46, "learning_rate": 0.00035838492881598946, "loss": 0.1142, "step": 67800 }, { "epoch": 0.46, "learning_rate": 0.00035831609858083215, "loss": 0.1199, "step": 68000 }, { "epoch": 0.46, "learning_rate": 0.0003582470743552828, "loss": 0.1206, "step": 68200 }, { "epoch": 0.46, "learning_rate": 0.00035817820278955816, "loss": 0.1298, "step": 68400 }, { "epoch": 0.46, "learning_rate": 0.0003581087917843051, "loss": 0.1344, "step": 68600 }, { "epoch": 0.46, "learning_rate": 0.00035803918702080075, "loss": 0.127, "step": 68800 }, { "epoch": 0.46, "learning_rate": 0.00035796938857698805, "loss": 0.1171, "step": 69000 }, { "epoch": 0.47, "learning_rate": 0.00035789939653102667, "loss": 0.1142, "step": 69200 }, { "epoch": 0.47, "learning_rate": 0.0003578292109612934, "loss": 0.119, "step": 69400 }, { "epoch": 0.47, "learning_rate": 0.0003577588319463815, "loss": 0.1232, "step": 69600 }, { "epoch": 0.47, "learning_rate": 0.0003576882595651009, "loss": 0.1206, "step": 69800 }, { "epoch": 0.47, "learning_rate": 0.0003576174938964781, "loss": 0.1093, "step": 70000 }, { "epoch": 0.47, "learning_rate": 0.00035754653501975596, "loss": 0.1352, "step": 70200 }, { "epoch": 0.47, "learning_rate": 0.0003574753830143938, "loss": 0.1207, "step": 70400 }, { "epoch": 0.48, "learning_rate": 0.00035740403796006713, "loss": 0.1077, "step": 70600 }, { "epoch": 0.48, "learning_rate": 0.00035733249993666767, "loss": 0.1159, "step": 70800 }, { "epoch": 0.48, "learning_rate": 0.00035726112815854356, "loss": 0.1196, "step": 71000 }, { "epoch": 0.48, "learning_rate": 0.0003571892054013809, "loss": 0.1162, "step": 71200 }, { "epoch": 0.48, "learning_rate": 0.0003571170899157135, "loss": 0.1439, "step": 71400 }, { "epoch": 0.48, "learning_rate": 0.00035704478178229574, "loss": 0.1113, "step": 71600 }, { "epoch": 0.48, "learning_rate": 0.00035697228108209786, "loss": 0.1195, "step": 71800 }, { "epoch": 0.49, "learning_rate": 0.00035689958789630546, "loss": 0.1121, "step": 72000 }, { "epoch": 0.49, "learning_rate": 0.0003568267023063201, "loss": 0.1024, "step": 72200 }, { "epoch": 0.49, "learning_rate": 0.0003567536243937585, "loss": 0.1103, "step": 72400 }, { "epoch": 0.49, "learning_rate": 0.00035668035424045273, "loss": 0.1306, "step": 72600 }, { "epoch": 0.49, "learning_rate": 0.0003566068919284503, "loss": 0.1145, "step": 72800 }, { "epoch": 0.49, "learning_rate": 0.0003565332375400137, "loss": 0.1157, "step": 73000 }, { "epoch": 0.49, "learning_rate": 0.0003564593911576208, "loss": 0.119, "step": 73200 }, { "epoch": 0.49, "learning_rate": 0.00035638535286396404, "loss": 0.1117, "step": 73400 }, { "epoch": 0.5, "learning_rate": 0.0003563114943695969, "loss": 0.117, "step": 73600 }, { "epoch": 0.5, "learning_rate": 0.00035623707346086925, "loss": 0.108, "step": 73800 }, { "epoch": 0.5, "learning_rate": 0.00035616246088982775, "loss": 0.106, "step": 74000 }, { "epoch": 0.5, "learning_rate": 0.0003560876567400229, "loss": 0.1316, "step": 74200 }, { "epoch": 0.5, "learning_rate": 0.0003560126610952201, "loss": 0.1121, "step": 74400 }, { "epoch": 0.5, "learning_rate": 0.0003559374740393988, "loss": 0.124, "step": 74600 }, { "epoch": 0.5, "learning_rate": 0.000355862095656753, "loss": 0.1163, "step": 74800 }, { "epoch": 0.51, "learning_rate": 0.0003557865260316909, "loss": 0.1191, "step": 75000 }, { "epoch": 0.51, "learning_rate": 0.0003557107652488348, "loss": 0.1189, "step": 75200 }, { "epoch": 0.51, "learning_rate": 0.00035563481339302114, "loss": 0.1118, "step": 75400 }, { "epoch": 0.51, "learning_rate": 0.00035555867054930034, "loss": 0.1242, "step": 75600 }, { "epoch": 0.51, "learning_rate": 0.0003554823368029366, "loss": 0.1064, "step": 75800 }, { "epoch": 0.51, "learning_rate": 0.00035540581223940794, "loss": 0.1078, "step": 76000 }, { "epoch": 0.51, "learning_rate": 0.000355329096944406, "loss": 0.1145, "step": 76200 }, { "epoch": 0.51, "learning_rate": 0.00035525219100383614, "loss": 0.122, "step": 76400 }, { "epoch": 0.52, "learning_rate": 0.00035517548046019095, "loss": 0.1132, "step": 76600 }, { "epoch": 0.52, "learning_rate": 0.0003550981944392052, "loss": 0.1183, "step": 76800 }, { "epoch": 0.52, "learning_rate": 0.00035502071803121456, "loss": 0.1086, "step": 77000 }, { "epoch": 0.52, "learning_rate": 0.0003549430513229765, "loss": 0.11, "step": 77200 }, { "epoch": 0.52, "learning_rate": 0.0003548651944014617, "loss": 0.1144, "step": 77400 }, { "epoch": 0.52, "learning_rate": 0.0003547871473538539, "loss": 0.1114, "step": 77600 }, { "epoch": 0.52, "learning_rate": 0.0003547089102675495, "loss": 0.1131, "step": 77800 }, { "epoch": 0.53, "learning_rate": 0.000354630483230158, "loss": 0.1178, "step": 78000 }, { "epoch": 0.53, "learning_rate": 0.0003545518663295014, "loss": 0.1184, "step": 78200 }, { "epoch": 0.53, "learning_rate": 0.0003544730596536143, "loss": 0.1131, "step": 78400 }, { "epoch": 0.53, "learning_rate": 0.000354394063290744, "loss": 0.1103, "step": 78600 }, { "epoch": 0.53, "learning_rate": 0.00035431487732935006, "loss": 0.1099, "step": 78800 }, { "epoch": 0.53, "learning_rate": 0.00035423589920671934, "loss": 0.124, "step": 79000 }, { "epoch": 0.53, "learning_rate": 0.0003541563352613894, "loss": 0.1216, "step": 79200 }, { "epoch": 0.53, "learning_rate": 0.00035407658198374195, "loss": 0.1153, "step": 79400 }, { "epoch": 0.54, "learning_rate": 0.00035399663946308424, "loss": 0.107, "step": 79600 }, { "epoch": 0.54, "learning_rate": 0.00035391650778893543, "loss": 0.1105, "step": 79800 }, { "epoch": 0.54, "learning_rate": 0.00035383618705102626, "loss": 0.1361, "step": 80000 }, { "epoch": 0.54, "learning_rate": 0.00035375567733929953, "loss": 0.1221, "step": 80200 }, { "epoch": 0.54, "learning_rate": 0.0003536749787439093, "loss": 0.1085, "step": 80400 }, { "epoch": 0.54, "learning_rate": 0.0003535940913552215, "loss": 0.1115, "step": 80600 }, { "epoch": 0.54, "learning_rate": 0.00035351301526381307, "loss": 0.1107, "step": 80800 }, { "epoch": 0.55, "learning_rate": 0.00035343175056047263, "loss": 0.1211, "step": 81000 }, { "epoch": 0.55, "learning_rate": 0.0003533502973361997, "loss": 0.117, "step": 81200 }, { "epoch": 0.55, "learning_rate": 0.00035326865568220515, "loss": 0.1154, "step": 81400 }, { "epoch": 0.55, "learning_rate": 0.0003531868256899108, "loss": 0.1079, "step": 81600 }, { "epoch": 0.55, "learning_rate": 0.00035310480745094907, "loss": 0.1115, "step": 81800 }, { "epoch": 0.55, "learning_rate": 0.00035302260105716366, "loss": 0.1247, "step": 82000 }, { "epoch": 0.55, "learning_rate": 0.00035294061904054505, "loss": 0.119, "step": 82200 }, { "epoch": 0.56, "learning_rate": 0.00035285803755310787, "loss": 0.1219, "step": 82400 }, { "epoch": 0.56, "learning_rate": 0.00035277526818717824, "loss": 0.1113, "step": 82600 }, { "epoch": 0.56, "learning_rate": 0.00035269231103544056, "loss": 0.1112, "step": 82800 }, { "epoch": 0.56, "learning_rate": 0.00035260958238174535, "loss": 0.1099, "step": 83000 }, { "epoch": 0.56, "learning_rate": 0.00035252625087505373, "loss": 0.1519, "step": 83200 }, { "epoch": 0.56, "learning_rate": 0.00035244273186140206, "loss": 0.1178, "step": 83400 }, { "epoch": 0.56, "learning_rate": 0.0003523590254343144, "loss": 0.1095, "step": 83600 }, { "epoch": 0.56, "learning_rate": 0.00035227513168752453, "loss": 0.1161, "step": 83800 }, { "epoch": 0.57, "learning_rate": 0.0003521910507149763, "loss": 0.1091, "step": 84000 }, { "epoch": 0.57, "learning_rate": 0.00035210678261082275, "loss": 0.1176, "step": 84200 }, { "epoch": 0.57, "learning_rate": 0.00035202232746942697, "loss": 0.1197, "step": 84400 }, { "epoch": 0.57, "learning_rate": 0.0003519376853853612, "loss": 0.1163, "step": 84600 }, { "epoch": 0.57, "learning_rate": 0.0003518528564534069, "loss": 0.1152, "step": 84800 }, { "epoch": 0.57, "learning_rate": 0.0003517678407685551, "loss": 0.1086, "step": 85000 }, { "epoch": 0.57, "learning_rate": 0.0003516826384260057, "loss": 0.1023, "step": 85200 }, { "epoch": 0.58, "learning_rate": 0.00035159724952116777, "loss": 0.1111, "step": 85400 }, { "epoch": 0.58, "learning_rate": 0.0003515116741496592, "loss": 0.1195, "step": 85600 }, { "epoch": 0.58, "learning_rate": 0.0003514259124073067, "loss": 0.1031, "step": 85800 }, { "epoch": 0.58, "learning_rate": 0.00035133996439014564, "loss": 0.1232, "step": 86000 }, { "epoch": 0.58, "learning_rate": 0.00035125383019442015, "loss": 0.1214, "step": 86200 }, { "epoch": 0.58, "learning_rate": 0.0003511675099165826, "loss": 0.1129, "step": 86400 }, { "epoch": 0.58, "learning_rate": 0.0003510810036532938, "loss": 0.1312, "step": 86600 }, { "epoch": 0.58, "learning_rate": 0.00035099431150142305, "loss": 0.1087, "step": 86800 }, { "epoch": 0.59, "learning_rate": 0.00035090743355804756, "loss": 0.1117, "step": 87000 }, { "epoch": 0.59, "learning_rate": 0.0003508203699204526, "loss": 0.1073, "step": 87200 }, { "epoch": 0.59, "learning_rate": 0.0003507331206861315, "loss": 0.1101, "step": 87400 }, { "epoch": 0.59, "learning_rate": 0.000350646123587719, "loss": 0.1253, "step": 87600 }, { "epoch": 0.59, "learning_rate": 0.0003505585043800183, "loss": 0.1035, "step": 87800 }, { "epoch": 0.59, "learning_rate": 0.00035047069986882655, "loss": 0.1128, "step": 88000 }, { "epoch": 0.59, "learning_rate": 0.00035038271015246694, "loss": 0.1128, "step": 88200 }, { "epoch": 0.6, "learning_rate": 0.0003502945353294695, "loss": 0.1097, "step": 88400 }, { "epoch": 0.6, "learning_rate": 0.00035020617549857193, "loss": 0.1153, "step": 88600 }, { "epoch": 0.6, "learning_rate": 0.00035011763075871896, "loss": 0.1261, "step": 88800 }, { "epoch": 0.6, "learning_rate": 0.00035002890120906235, "loss": 0.1111, "step": 89000 }, { "epoch": 0.6, "learning_rate": 0.00034993998694896083, "loss": 0.121, "step": 89200 }, { "epoch": 0.6, "learning_rate": 0.00034985088807798, "loss": 0.1327, "step": 89400 }, { "epoch": 0.6, "learning_rate": 0.0003497616046958922, "loss": 0.1166, "step": 89600 }, { "epoch": 0.6, "learning_rate": 0.00034967213690267624, "loss": 0.1173, "step": 89800 }, { "epoch": 0.61, "learning_rate": 0.0003495824847985177, "loss": 0.1138, "step": 90000 }, { "epoch": 0.61, "learning_rate": 0.0003494926484838084, "loss": 0.1134, "step": 90200 }, { "epoch": 0.61, "learning_rate": 0.00034940262805914626, "loss": 0.1032, "step": 90400 }, { "epoch": 0.61, "learning_rate": 0.0003493124236253357, "loss": 0.1209, "step": 90600 }, { "epoch": 0.61, "learning_rate": 0.0003492224876824008, "loss": 0.1307, "step": 90800 }, { "epoch": 0.61, "learning_rate": 0.0003491319164523126, "loss": 0.1144, "step": 91000 }, { "epoch": 0.61, "learning_rate": 0.00034904116151621703, "loss": 0.1174, "step": 91200 }, { "epoch": 0.62, "learning_rate": 0.00034895067812499057, "loss": 0.1099, "step": 91400 }, { "epoch": 0.62, "learning_rate": 0.00034885955699922493, "loss": 0.1104, "step": 91600 }, { "epoch": 0.62, "learning_rate": 0.00034876825247243814, "loss": 0.1115, "step": 91800 }, { "epoch": 0.62, "learning_rate": 0.0003486767646468723, "loss": 0.1149, "step": 92000 }, { "epoch": 0.62, "learning_rate": 0.0003485850936249749, "loss": 0.1115, "step": 92200 }, { "epoch": 0.62, "learning_rate": 0.00034849323950939855, "loss": 0.124, "step": 92400 }, { "epoch": 0.62, "learning_rate": 0.00034840120240300075, "loss": 0.1309, "step": 92600 }, { "epoch": 0.63, "learning_rate": 0.0003483089824088442, "loss": 0.1079, "step": 92800 }, { "epoch": 0.63, "learning_rate": 0.000348216579630196, "loss": 0.1093, "step": 93000 }, { "epoch": 0.63, "learning_rate": 0.00034812399417052833, "loss": 0.1159, "step": 93200 }, { "epoch": 0.63, "learning_rate": 0.00034803122613351773, "loss": 0.1172, "step": 93400 }, { "epoch": 0.63, "learning_rate": 0.00034793827562304517, "loss": 0.1127, "step": 93600 }, { "epoch": 0.63, "learning_rate": 0.000347845142743196, "loss": 0.1152, "step": 93800 }, { "epoch": 0.63, "learning_rate": 0.0003477518275982598, "loss": 0.1123, "step": 94000 }, { "epoch": 0.63, "learning_rate": 0.00034765833029273027, "loss": 0.1321, "step": 94200 }, { "epoch": 0.64, "learning_rate": 0.00034756465093130503, "loss": 0.1116, "step": 94400 }, { "epoch": 0.64, "learning_rate": 0.0003474707896188856, "loss": 0.1063, "step": 94600 }, { "epoch": 0.64, "learning_rate": 0.00034737721712853624, "loss": 0.1339, "step": 94800 }, { "epoch": 0.64, "learning_rate": 0.0003472829931380883, "loss": 0.1165, "step": 95000 }, { "epoch": 0.64, "learning_rate": 0.0003471885875120447, "loss": 0.1187, "step": 95200 }, { "epoch": 0.64, "learning_rate": 0.0003470940003561202, "loss": 0.1201, "step": 95400 }, { "epoch": 0.64, "learning_rate": 0.0003469992317762327, "loss": 0.1115, "step": 95600 }, { "epoch": 0.65, "learning_rate": 0.0003469042818785036, "loss": 0.1031, "step": 95800 }, { "epoch": 0.65, "learning_rate": 0.000346809150769257, "loss": 0.1212, "step": 96000 }, { "epoch": 0.65, "learning_rate": 0.00034671383855502014, "loss": 0.1148, "step": 96200 }, { "epoch": 0.65, "learning_rate": 0.000346618345342523, "loss": 0.1273, "step": 96400 }, { "epoch": 0.65, "learning_rate": 0.0003465226712386982, "loss": 0.1204, "step": 96600 }, { "epoch": 0.65, "learning_rate": 0.00034642681635068095, "loss": 0.1101, "step": 96800 }, { "epoch": 0.65, "learning_rate": 0.0003463307807858089, "loss": 0.1171, "step": 97000 }, { "epoch": 0.65, "learning_rate": 0.00034623456465162207, "loss": 0.1197, "step": 97200 }, { "epoch": 0.66, "learning_rate": 0.00034613816805586253, "loss": 0.1153, "step": 97400 }, { "epoch": 0.66, "learning_rate": 0.0003460415911064746, "loss": 0.1155, "step": 97600 }, { "epoch": 0.66, "learning_rate": 0.00034594483391160453, "loss": 0.1134, "step": 97800 }, { "epoch": 0.66, "learning_rate": 0.0003458478965796003, "loss": 0.1058, "step": 98000 }, { "epoch": 0.66, "learning_rate": 0.00034575077921901166, "loss": 0.1172, "step": 98200 }, { "epoch": 0.66, "learning_rate": 0.0003456534819385901, "loss": 0.1182, "step": 98400 }, { "epoch": 0.66, "learning_rate": 0.00034555600484728827, "loss": 0.1244, "step": 98600 }, { "epoch": 0.67, "learning_rate": 0.0003454583480542605, "loss": 0.1117, "step": 98800 }, { "epoch": 0.67, "learning_rate": 0.000345360511668862, "loss": 0.1088, "step": 99000 }, { "epoch": 0.67, "learning_rate": 0.0003452624958006496, "loss": 0.1136, "step": 99200 }, { "epoch": 0.67, "learning_rate": 0.0003451643005593805, "loss": 0.1148, "step": 99400 }, { "epoch": 0.67, "learning_rate": 0.0003450659260550133, "loss": 0.105, "step": 99600 }, { "epoch": 0.67, "learning_rate": 0.00034496786561145394, "loss": 0.1195, "step": 99800 }, { "epoch": 0.67, "learning_rate": 0.0003448691338065063, "loss": 0.1182, "step": 100000 }, { "epoch": 0.68, "learning_rate": 0.0003447702230689861, "loss": 0.1163, "step": 100200 }, { "epoch": 0.68, "learning_rate": 0.000344671133509653, "loss": 0.1245, "step": 100400 }, { "epoch": 0.68, "learning_rate": 0.0003445718652394668, "loss": 0.1046, "step": 100600 }, { "epoch": 0.68, "learning_rate": 0.0003444724183695874, "loss": 0.1182, "step": 100800 }, { "epoch": 0.68, "learning_rate": 0.0003443727930113748, "loss": 0.1207, "step": 101000 }, { "epoch": 0.68, "learning_rate": 0.00034427298927638874, "loss": 0.1079, "step": 101200 }, { "epoch": 0.68, "learning_rate": 0.0003441730072763888, "loss": 0.104, "step": 101400 }, { "epoch": 0.68, "learning_rate": 0.0003440728471233341, "loss": 0.1102, "step": 101600 }, { "epoch": 0.69, "learning_rate": 0.000343973011063044, "loss": 0.1252, "step": 101800 }, { "epoch": 0.69, "learning_rate": 0.0003438724958299181, "loss": 0.1109, "step": 102000 }, { "epoch": 0.69, "learning_rate": 0.00034377230668762864, "loss": 0.1208, "step": 102200 }, { "epoch": 0.69, "learning_rate": 0.00034367143682240804, "loss": 0.1033, "step": 102400 }, { "epoch": 0.69, "learning_rate": 0.0003435703893657879, "loss": 0.1112, "step": 102600 }, { "epoch": 0.69, "learning_rate": 0.0003434691644309205, "loss": 0.1153, "step": 102800 }, { "epoch": 0.69, "learning_rate": 0.00034336776213115676, "loss": 0.1053, "step": 103000 }, { "epoch": 0.7, "learning_rate": 0.0003432661825800463, "loss": 0.1188, "step": 103200 }, { "epoch": 0.7, "learning_rate": 0.00034316442589133725, "loss": 0.1069, "step": 103400 }, { "epoch": 0.7, "learning_rate": 0.000343062492178976, "loss": 0.1195, "step": 103600 }, { "epoch": 0.7, "learning_rate": 0.00034296038155710726, "loss": 0.1099, "step": 103800 }, { "epoch": 0.7, "learning_rate": 0.0003428580941400738, "loss": 0.1185, "step": 104000 }, { "epoch": 0.7, "learning_rate": 0.0003427556300424164, "loss": 0.1247, "step": 104200 }, { "epoch": 0.7, "learning_rate": 0.0003426529893788736, "loss": 0.111, "step": 104400 }, { "epoch": 0.7, "learning_rate": 0.00034255017226438175, "loss": 0.1043, "step": 104600 }, { "epoch": 0.71, "learning_rate": 0.00034244717881407483, "loss": 0.1183, "step": 104800 }, { "epoch": 0.71, "learning_rate": 0.00034234452542979544, "loss": 0.1165, "step": 105000 }, { "epoch": 0.71, "learning_rate": 0.00034224118053428676, "loss": 0.1178, "step": 105200 }, { "epoch": 0.71, "learning_rate": 0.0003421376596489696, "loss": 0.1283, "step": 105400 }, { "epoch": 0.71, "learning_rate": 0.00034203396288976624, "loss": 0.1091, "step": 105600 }, { "epoch": 0.71, "learning_rate": 0.00034193009037279546, "loss": 0.1101, "step": 105800 }, { "epoch": 0.71, "learning_rate": 0.00034182604221437305, "loss": 0.1147, "step": 106000 }, { "epoch": 0.72, "learning_rate": 0.0003417218185310113, "loss": 0.1052, "step": 106200 }, { "epoch": 0.72, "learning_rate": 0.00034161741943941935, "loss": 0.1089, "step": 106400 }, { "epoch": 0.72, "learning_rate": 0.0003415128450565025, "loss": 0.1223, "step": 106600 }, { "epoch": 0.72, "learning_rate": 0.00034140809549936245, "loss": 0.1067, "step": 106800 }, { "epoch": 0.72, "learning_rate": 0.00034130317088529706, "loss": 0.1182, "step": 107000 }, { "epoch": 0.72, "learning_rate": 0.0003411980713318002, "loss": 0.1026, "step": 107200 }, { "epoch": 0.72, "learning_rate": 0.00034109279695656154, "loss": 0.114, "step": 107400 }, { "epoch": 0.72, "learning_rate": 0.00034098734787746675, "loss": 0.1046, "step": 107600 }, { "epoch": 0.73, "learning_rate": 0.00034088172421259687, "loss": 0.114, "step": 107800 }, { "epoch": 0.73, "learning_rate": 0.00034077592608022865, "loss": 0.1079, "step": 108000 }, { "epoch": 0.73, "learning_rate": 0.00034066995359883405, "loss": 0.1134, "step": 108200 }, { "epoch": 0.73, "learning_rate": 0.00034056380688708037, "loss": 0.1188, "step": 108400 }, { "epoch": 0.73, "learning_rate": 0.00034045748606383004, "loss": 0.1123, "step": 108600 }, { "epoch": 0.73, "learning_rate": 0.00034035099124814027, "loss": 0.109, "step": 108800 }, { "epoch": 0.73, "learning_rate": 0.0003402443225592633, "loss": 0.1105, "step": 109000 }, { "epoch": 0.74, "learning_rate": 0.00034013748011664594, "loss": 0.117, "step": 109200 }, { "epoch": 0.74, "learning_rate": 0.00034003099955202997, "loss": 0.1196, "step": 109400 }, { "epoch": 0.74, "learning_rate": 0.0003399238108283236, "loss": 0.112, "step": 109600 }, { "epoch": 0.74, "learning_rate": 0.00033981644870978374, "loss": 0.1267, "step": 109800 }, { "epoch": 0.74, "learning_rate": 0.000339709451424421, "loss": 0.1266, "step": 110000 }, { "epoch": 0.74, "learning_rate": 0.0003396017437425494, "loss": 0.1283, "step": 110200 }, { "epoch": 0.74, "learning_rate": 0.0003394938630264929, "loss": 0.1104, "step": 110400 }, { "epoch": 0.75, "learning_rate": 0.0003393858093970554, "loss": 0.1183, "step": 110600 }, { "epoch": 0.75, "learning_rate": 0.0003392775829752349, "loss": 0.1186, "step": 110800 }, { "epoch": 0.75, "learning_rate": 0.00033916918388222233, "loss": 0.1141, "step": 111000 }, { "epoch": 0.75, "learning_rate": 0.0003390606122394025, "loss": 0.114, "step": 111200 }, { "epoch": 0.75, "learning_rate": 0.000338951868168353, "loss": 0.1064, "step": 111400 }, { "epoch": 0.75, "learning_rate": 0.00033884295179084477, "loss": 0.1095, "step": 111600 }, { "epoch": 0.75, "learning_rate": 0.00033873386322884173, "loss": 0.1064, "step": 111800 }, { "epoch": 0.75, "learning_rate": 0.0003386246026045003, "loss": 0.1207, "step": 112000 }, { "epoch": 0.76, "learning_rate": 0.00033851517004016994, "loss": 0.1147, "step": 112200 }, { "epoch": 0.76, "learning_rate": 0.0003384055656583925, "loss": 0.1238, "step": 112400 }, { "epoch": 0.76, "learning_rate": 0.0003382957895819021, "loss": 0.1208, "step": 112600 }, { "epoch": 0.76, "learning_rate": 0.00033818584193362537, "loss": 0.1013, "step": 112800 }, { "epoch": 0.76, "learning_rate": 0.00033807572283668094, "loss": 0.1144, "step": 113000 }, { "epoch": 0.76, "learning_rate": 0.0003379654324143793, "loss": 0.1062, "step": 113200 }, { "epoch": 0.76, "learning_rate": 0.00033785497079022296, "loss": 0.1134, "step": 113400 }, { "epoch": 0.77, "learning_rate": 0.00033774433808790615, "loss": 0.1087, "step": 113600 }, { "epoch": 0.77, "learning_rate": 0.0003376335344313145, "loss": 0.1135, "step": 113800 }, { "epoch": 0.77, "learning_rate": 0.0003375225599445254, "loss": 0.1042, "step": 114000 }, { "epoch": 0.77, "learning_rate": 0.0003374114147518071, "loss": 0.1081, "step": 114200 }, { "epoch": 0.77, "learning_rate": 0.0003373006559806056, "loss": 0.1206, "step": 114400 }, { "epoch": 0.77, "learning_rate": 0.00033718917060157275, "loss": 0.1112, "step": 114600 }, { "epoch": 0.77, "learning_rate": 0.0003370775148899379, "loss": 0.1075, "step": 114800 }, { "epoch": 0.77, "learning_rate": 0.0003369656889707324, "loss": 0.1091, "step": 115000 }, { "epoch": 0.78, "learning_rate": 0.0003368536929691781, "loss": 0.1104, "step": 115200 }, { "epoch": 0.78, "learning_rate": 0.0003367415270106874, "loss": 0.1055, "step": 115400 }, { "epoch": 0.78, "learning_rate": 0.00033662919122086295, "loss": 0.11, "step": 115600 }, { "epoch": 0.78, "learning_rate": 0.00033651668572549757, "loss": 0.1178, "step": 115800 }, { "epoch": 0.78, "learning_rate": 0.0003364040106505742, "loss": 0.1063, "step": 116000 }, { "epoch": 0.78, "learning_rate": 0.00033629116612226564, "loss": 0.1232, "step": 116200 }, { "epoch": 0.78, "learning_rate": 0.0003361781522669344, "loss": 0.1143, "step": 116400 }, { "epoch": 0.79, "learning_rate": 0.0003360655355470882, "loss": 0.1233, "step": 116600 }, { "epoch": 0.79, "learning_rate": 0.0003359521842626105, "loss": 0.1191, "step": 116800 }, { "epoch": 0.79, "learning_rate": 0.00033583866403069976, "loss": 0.1114, "step": 117000 }, { "epoch": 0.79, "learning_rate": 0.0003357249749784751, "loss": 0.1075, "step": 117200 }, { "epoch": 0.79, "learning_rate": 0.0003356111172332447, "loss": 0.1084, "step": 117400 }, { "epoch": 0.79, "learning_rate": 0.0003354970909225057, "loss": 0.1037, "step": 117600 }, { "epoch": 0.79, "learning_rate": 0.0003353828961739441, "loss": 0.1187, "step": 117800 }, { "epoch": 0.79, "learning_rate": 0.00033526853311543423, "loss": 0.1078, "step": 118000 }, { "epoch": 0.8, "learning_rate": 0.00033515400187503924, "loss": 0.1083, "step": 118200 }, { "epoch": 0.8, "learning_rate": 0.00033503930258101024, "loss": 0.1163, "step": 118400 }, { "epoch": 0.8, "learning_rate": 0.0003349244353617869, "loss": 0.101, "step": 118600 }, { "epoch": 0.8, "learning_rate": 0.00033480997593825657, "loss": 0.1162, "step": 118800 }, { "epoch": 0.8, "learning_rate": 0.00033469477409273286, "loss": 0.1088, "step": 119000 }, { "epoch": 0.8, "learning_rate": 0.00033457940470781543, "loss": 0.1168, "step": 119200 }, { "epoch": 0.8, "learning_rate": 0.0003344638679126941, "loss": 0.1154, "step": 119400 }, { "epoch": 0.81, "learning_rate": 0.0003343481638367461, "loss": 0.1118, "step": 119600 }, { "epoch": 0.81, "learning_rate": 0.0003342322926095362, "loss": 0.1446, "step": 119800 }, { "epoch": 0.81, "learning_rate": 0.0003341162543608161, "loss": 0.1079, "step": 120000 }, { "epoch": 0.81, "learning_rate": 0.0003340000492205248, "loss": 0.1049, "step": 120200 }, { "epoch": 0.81, "learning_rate": 0.0003338836773187878, "loss": 0.1123, "step": 120400 }, { "epoch": 0.81, "learning_rate": 0.00033376713878591777, "loss": 0.1151, "step": 120600 }, { "epoch": 0.81, "learning_rate": 0.00033365043375241366, "loss": 0.0998, "step": 120800 }, { "epoch": 0.82, "learning_rate": 0.000333533562348961, "loss": 0.1189, "step": 121000 }, { "epoch": 0.82, "learning_rate": 0.0003334165247064315, "loss": 0.118, "step": 121200 }, { "epoch": 0.82, "learning_rate": 0.0003332993209558833, "loss": 0.1189, "step": 121400 }, { "epoch": 0.82, "learning_rate": 0.0003331819512285602, "loss": 0.113, "step": 121600 }, { "epoch": 0.82, "learning_rate": 0.0003330650037460781, "loss": 0.1162, "step": 121800 }, { "epoch": 0.82, "learning_rate": 0.00033294730328792147, "loss": 0.1105, "step": 122000 }, { "epoch": 0.82, "learning_rate": 0.0003328294372471769, "loss": 0.1041, "step": 122200 }, { "epoch": 0.82, "learning_rate": 0.00033271140575583016, "loss": 0.1064, "step": 122400 }, { "epoch": 0.83, "learning_rate": 0.000332593208946052, "loss": 0.1035, "step": 122600 }, { "epoch": 0.83, "learning_rate": 0.0003324748469501984, "loss": 0.1061, "step": 122800 }, { "epoch": 0.83, "learning_rate": 0.00033235631990081034, "loss": 0.1199, "step": 123000 }, { "epoch": 0.83, "learning_rate": 0.00033223762793061363, "loss": 0.1074, "step": 123200 }, { "epoch": 0.83, "learning_rate": 0.00033211877117251865, "loss": 0.1195, "step": 123400 }, { "epoch": 0.83, "learning_rate": 0.00033199974975962047, "loss": 0.1132, "step": 123600 }, { "epoch": 0.83, "learning_rate": 0.0003318811601638971, "loss": 0.1206, "step": 123800 }, { "epoch": 0.84, "learning_rate": 0.0003317618106630228, "loss": 0.1195, "step": 124000 }, { "epoch": 0.84, "learning_rate": 0.0003316428948842102, "loss": 0.1057, "step": 124200 }, { "epoch": 0.84, "learning_rate": 0.000331523217827277, "loss": 0.1067, "step": 124400 }, { "epoch": 0.84, "learning_rate": 0.00033140337678243725, "loss": 0.1185, "step": 124600 }, { "epoch": 0.84, "learning_rate": 0.0003312833718838879, "loss": 0.1089, "step": 124800 }, { "epoch": 0.84, "learning_rate": 0.00033116320326600986, "loss": 0.104, "step": 125000 }, { "epoch": 0.84, "learning_rate": 0.000331042871063367, "loss": 0.1254, "step": 125200 }, { "epoch": 0.84, "learning_rate": 0.00033092237541070647, "loss": 0.1085, "step": 125400 }, { "epoch": 0.85, "learning_rate": 0.0003308017164429586, "loss": 0.1132, "step": 125600 }, { "epoch": 0.85, "learning_rate": 0.00033068089429523636, "loss": 0.0999, "step": 125800 }, { "epoch": 0.85, "learning_rate": 0.00033055990910283567, "loss": 0.106, "step": 126000 }, { "epoch": 0.85, "learning_rate": 0.00033043876100123486, "loss": 0.1066, "step": 126200 }, { "epoch": 0.85, "learning_rate": 0.0003303174501260947, "loss": 0.1109, "step": 126400 }, { "epoch": 0.85, "learning_rate": 0.00033019597661325845, "loss": 0.1097, "step": 126600 }, { "epoch": 0.85, "learning_rate": 0.00033007434059875116, "loss": 0.1142, "step": 126800 }, { "epoch": 0.86, "learning_rate": 0.00032995254221878013, "loss": 0.1156, "step": 127000 }, { "epoch": 0.86, "learning_rate": 0.00032983058160973426, "loss": 0.1143, "step": 127200 }, { "epoch": 0.86, "learning_rate": 0.00032970845890818425, "loss": 0.1193, "step": 127400 }, { "epoch": 0.86, "learning_rate": 0.00032958617425088226, "loss": 0.1052, "step": 127600 }, { "epoch": 0.86, "learning_rate": 0.00032946372777476184, "loss": 0.1078, "step": 127800 }, { "epoch": 0.86, "learning_rate": 0.0003293411196169377, "loss": 0.1017, "step": 128000 }, { "epoch": 0.86, "learning_rate": 0.00032921834991470565, "loss": 0.1029, "step": 128200 }, { "epoch": 0.86, "learning_rate": 0.0003290960338623602, "loss": 0.0962, "step": 128400 }, { "epoch": 0.87, "learning_rate": 0.00032897294228992666, "loss": 0.1078, "step": 128600 }, { "epoch": 0.87, "learning_rate": 0.00032884968958536763, "loss": 0.1129, "step": 128800 }, { "epoch": 0.87, "learning_rate": 0.00032872627588670074, "loss": 0.1107, "step": 129000 }, { "epoch": 0.87, "learning_rate": 0.00032860270133212367, "loss": 0.1232, "step": 129200 }, { "epoch": 0.87, "learning_rate": 0.0003284789660600145, "loss": 0.1185, "step": 129400 }, { "epoch": 0.87, "learning_rate": 0.00032835507020893096, "loss": 0.111, "step": 129600 }, { "epoch": 0.87, "learning_rate": 0.000328231013917611, "loss": 0.1082, "step": 129800 }, { "epoch": 0.88, "learning_rate": 0.00032810741880645453, "loss": 0.1128, "step": 130000 }, { "epoch": 0.88, "learning_rate": 0.000327983042852058, "loss": 0.1224, "step": 130200 }, { "epoch": 0.88, "learning_rate": 0.0003278585068740186, "loss": 0.1127, "step": 130400 }, { "epoch": 0.88, "learning_rate": 0.00032773381101179096, "loss": 0.1122, "step": 130600 }, { "epoch": 0.88, "learning_rate": 0.00032760895540500854, "loss": 0.103, "step": 130800 }, { "epoch": 0.88, "learning_rate": 0.000327483940193484, "loss": 0.1129, "step": 131000 }, { "epoch": 0.88, "learning_rate": 0.00032735876551720825, "loss": 0.1118, "step": 131200 }, { "epoch": 0.89, "learning_rate": 0.00032723343151635126, "loss": 0.1141, "step": 131400 }, { "epoch": 0.89, "learning_rate": 0.00032710793833126114, "loss": 0.1043, "step": 131600 }, { "epoch": 0.89, "learning_rate": 0.0003269822861024642, "loss": 0.1223, "step": 131800 }, { "epoch": 0.89, "learning_rate": 0.0003268564749706651, "loss": 0.1129, "step": 132000 }, { "epoch": 0.89, "learning_rate": 0.0003267305050767462, "loss": 0.1159, "step": 132200 }, { "epoch": 0.89, "learning_rate": 0.00032660437656176775, "loss": 0.1026, "step": 132400 }, { "epoch": 0.89, "learning_rate": 0.00032647808956696767, "loss": 0.1107, "step": 132600 }, { "epoch": 0.89, "learning_rate": 0.00032635164423376107, "loss": 0.1001, "step": 132800 }, { "epoch": 0.9, "learning_rate": 0.0003262250407037408, "loss": 0.1046, "step": 133000 }, { "epoch": 0.9, "learning_rate": 0.0003260982791186765, "loss": 0.1198, "step": 133200 }, { "epoch": 0.9, "learning_rate": 0.000325971359620515, "loss": 0.1114, "step": 133400 }, { "epoch": 0.9, "learning_rate": 0.0003258442823513799, "loss": 0.1076, "step": 133600 }, { "epoch": 0.9, "learning_rate": 0.00032571704745357134, "loss": 0.1073, "step": 133800 }, { "epoch": 0.9, "learning_rate": 0.00032558965506956623, "loss": 0.1153, "step": 134000 }, { "epoch": 0.9, "learning_rate": 0.0003254621053420176, "loss": 0.111, "step": 134200 }, { "epoch": 0.91, "learning_rate": 0.00032533503733919646, "loss": 0.1122, "step": 134400 }, { "epoch": 0.91, "learning_rate": 0.0003252071741381574, "loss": 0.1042, "step": 134600 }, { "epoch": 0.91, "learning_rate": 0.0003250791540218744, "loss": 0.1103, "step": 134800 }, { "epoch": 0.91, "learning_rate": 0.0003249509771337035, "loss": 0.11, "step": 135000 }, { "epoch": 0.91, "learning_rate": 0.0003248226436171763, "loss": 0.1291, "step": 135200 }, { "epoch": 0.91, "learning_rate": 0.0003246941536159999, "loss": 0.1095, "step": 135400 }, { "epoch": 0.91, "learning_rate": 0.0003245655072740564, "loss": 0.1084, "step": 135600 }, { "epoch": 0.91, "learning_rate": 0.0003244367047354031, "loss": 0.0981, "step": 135800 }, { "epoch": 0.92, "learning_rate": 0.00032430839132516974, "loss": 0.1288, "step": 136000 }, { "epoch": 0.92, "learning_rate": 0.00032417927760514917, "loss": 0.1036, "step": 136200 }, { "epoch": 0.92, "learning_rate": 0.0003240500081209162, "loss": 0.1092, "step": 136400 }, { "epoch": 0.92, "learning_rate": 0.00032392058301722597, "loss": 0.1103, "step": 136600 }, { "epoch": 0.92, "learning_rate": 0.00032379100243900774, "loss": 0.1238, "step": 136800 }, { "epoch": 0.92, "learning_rate": 0.000323661266531365, "loss": 0.1125, "step": 137000 }, { "epoch": 0.92, "learning_rate": 0.0003235313754395751, "loss": 0.1181, "step": 137200 }, { "epoch": 0.93, "learning_rate": 0.0003234013293090893, "loss": 0.1107, "step": 137400 }, { "epoch": 0.93, "learning_rate": 0.00032327112828553224, "loss": 0.1189, "step": 137600 }, { "epoch": 0.93, "learning_rate": 0.00032314077251470224, "loss": 0.1177, "step": 137800 }, { "epoch": 0.93, "learning_rate": 0.00032301026214257077, "loss": 0.1147, "step": 138000 }, { "epoch": 0.93, "learning_rate": 0.00032287959731528245, "loss": 0.1098, "step": 138200 }, { "epoch": 0.93, "learning_rate": 0.00032274877817915483, "loss": 0.1114, "step": 138400 }, { "epoch": 0.93, "learning_rate": 0.0003226178048806784, "loss": 0.1136, "step": 138600 }, { "epoch": 0.94, "learning_rate": 0.000322486677566516, "loss": 0.1111, "step": 138800 }, { "epoch": 0.94, "learning_rate": 0.0003223553963835033, "loss": 0.1106, "step": 139000 }, { "epoch": 0.94, "learning_rate": 0.0003222239614786479, "loss": 0.1227, "step": 139200 }, { "epoch": 0.94, "learning_rate": 0.0003220923729991298, "loss": 0.1115, "step": 139400 }, { "epoch": 0.94, "learning_rate": 0.00032196063109230086, "loss": 0.119, "step": 139600 }, { "epoch": 0.94, "learning_rate": 0.0003218287359056848, "loss": 0.111, "step": 139800 }, { "epoch": 0.94, "learning_rate": 0.00032169668758697683, "loss": 0.1146, "step": 140000 }, { "epoch": 0.94, "learning_rate": 0.0003215651476708621, "loss": 0.0971, "step": 140200 }, { "epoch": 0.95, "learning_rate": 0.0003214327942955547, "loss": 0.1157, "step": 140400 }, { "epoch": 0.95, "learning_rate": 0.0003213002882315282, "loss": 0.1192, "step": 140600 }, { "epoch": 0.95, "learning_rate": 0.00032116762962716206, "loss": 0.1189, "step": 140800 }, { "epoch": 0.95, "learning_rate": 0.0003210348186310064, "loss": 0.113, "step": 141000 }, { "epoch": 0.95, "learning_rate": 0.0003209018553917821, "loss": 0.1198, "step": 141200 }, { "epoch": 0.95, "learning_rate": 0.00032076874005838044, "loss": 0.1082, "step": 141400 }, { "epoch": 0.95, "learning_rate": 0.0003206354727798631, "loss": 0.1188, "step": 141600 }, { "epoch": 0.96, "learning_rate": 0.00032050205370546186, "loss": 0.112, "step": 141800 }, { "epoch": 0.96, "learning_rate": 0.0003203684829845785, "loss": 0.1139, "step": 142000 }, { "epoch": 0.96, "learning_rate": 0.0003202347607667846, "loss": 0.1087, "step": 142200 }, { "epoch": 0.96, "learning_rate": 0.00032010222668614745, "loss": 0.1351, "step": 142400 }, { "epoch": 0.96, "learning_rate": 0.00031996820343515567, "loss": 0.1126, "step": 142600 }, { "epoch": 0.96, "learning_rate": 0.00031983402913548365, "loss": 0.1261, "step": 142800 }, { "epoch": 0.96, "learning_rate": 0.0003196997039373788, "loss": 0.1123, "step": 143000 }, { "epoch": 0.96, "learning_rate": 0.0003195652279912577, "loss": 0.1252, "step": 143200 }, { "epoch": 0.97, "learning_rate": 0.00031943060144770555, "loss": 0.1031, "step": 143400 }, { "epoch": 0.97, "learning_rate": 0.00031929582445747623, "loss": 0.1078, "step": 143600 }, { "epoch": 0.97, "learning_rate": 0.0003191608971714921, "loss": 0.1157, "step": 143800 }, { "epoch": 0.97, "learning_rate": 0.00031902581974084374, "loss": 0.107, "step": 144000 }, { "epoch": 0.97, "learning_rate": 0.00031889059231679, "loss": 0.1033, "step": 144200 }, { "epoch": 0.97, "learning_rate": 0.00031875521505075774, "loss": 0.1127, "step": 144400 }, { "epoch": 0.97, "learning_rate": 0.0003186196880943413, "loss": 0.119, "step": 144600 }, { "epoch": 0.98, "learning_rate": 0.00031848401159930296, "loss": 0.1187, "step": 144800 }, { "epoch": 0.98, "learning_rate": 0.00031834818571757227, "loss": 0.1131, "step": 145000 }, { "epoch": 0.98, "learning_rate": 0.0003182122106012462, "loss": 0.1141, "step": 145200 }, { "epoch": 0.98, "learning_rate": 0.00031807608640258873, "loss": 0.1158, "step": 145400 }, { "epoch": 0.98, "learning_rate": 0.00031793981327403085, "loss": 0.112, "step": 145600 }, { "epoch": 0.98, "learning_rate": 0.0003178033913681702, "loss": 0.1089, "step": 145800 }, { "epoch": 0.98, "learning_rate": 0.0003176668208377711, "loss": 0.1117, "step": 146000 }, { "epoch": 0.98, "learning_rate": 0.0003175301018357644, "loss": 0.1141, "step": 146200 }, { "epoch": 0.99, "learning_rate": 0.000317393234515247, "loss": 0.1145, "step": 146400 }, { "epoch": 0.99, "learning_rate": 0.0003172562190294821, "loss": 0.1055, "step": 146600 }, { "epoch": 0.99, "learning_rate": 0.0003171197417173119, "loss": 0.1074, "step": 146800 }, { "epoch": 0.99, "learning_rate": 0.0003169824311004134, "loss": 0.1166, "step": 147000 }, { "epoch": 0.99, "learning_rate": 0.0003168449727782822, "loss": 0.1075, "step": 147200 }, { "epoch": 0.99, "learning_rate": 0.00031670736690484315, "loss": 0.0993, "step": 147400 }, { "epoch": 0.99, "learning_rate": 0.0003165696136341865, "loss": 0.1093, "step": 147600 }, { "epoch": 1.0, "learning_rate": 0.0003164317131205673, "loss": 0.109, "step": 147800 }, { "epoch": 1.0, "learning_rate": 0.00031629366551840567, "loss": 0.1129, "step": 148000 }, { "epoch": 1.0, "learning_rate": 0.00031615547098228636, "loss": 0.1125, "step": 148200 }, { "epoch": 1.0, "learning_rate": 0.0003160171296669587, "loss": 0.1057, "step": 148400 }, { "epoch": 1.0, "eval_accuracy": 0.9420512498988455, "eval_auc": 0.8522129916967822, "eval_f1": 0.35228549223252686, "eval_loss": 0.4365999102592468, "eval_mcc": 0.39725936154292724, "eval_precision": 0.2294216317399737, "eval_recall": 0.7584797572261781, "eval_runtime": 9338.8173, "eval_samples_per_second": 23.624, "eval_steps_per_second": 3.937, "step": 148440 } ], "logging_steps": 200, "max_steps": 593760, "num_train_epochs": 4, "save_steps": 500, "total_flos": 1.784375613961479e+17, "trial_name": null, "trial_params": null }