| { | |
| "best_metric": 0.20471014082431793, | |
| "best_model_checkpoint": "./results_all/checkpoint-2000", | |
| "epoch": 9.945750452079565, | |
| "eval_steps": 1000, | |
| "global_step": 11000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05, | |
| "grad_norm": 5.776618480682373, | |
| "learning_rate": 9.954792043399639e-06, | |
| "loss": 0.4562, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "grad_norm": 4.7439141273498535, | |
| "learning_rate": 9.909584086799278e-06, | |
| "loss": 0.2825, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "grad_norm": 8.098787307739258, | |
| "learning_rate": 9.864376130198916e-06, | |
| "loss": 0.248, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "grad_norm": 4.512809753417969, | |
| "learning_rate": 9.819168173598554e-06, | |
| "loss": 0.2309, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 4.11778450012207, | |
| "learning_rate": 9.773960216998193e-06, | |
| "loss": 0.2357, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 2.7208287715911865, | |
| "learning_rate": 9.728752260397831e-06, | |
| "loss": 0.2285, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 5.254836082458496, | |
| "learning_rate": 9.68354430379747e-06, | |
| "loss": 0.2189, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 6.350110054016113, | |
| "learning_rate": 9.638336347197107e-06, | |
| "loss": 0.2197, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "grad_norm": 3.0524585247039795, | |
| "learning_rate": 9.593128390596746e-06, | |
| "loss": 0.2108, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "grad_norm": 3.113680362701416, | |
| "learning_rate": 9.547920433996384e-06, | |
| "loss": 0.206, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 3.020354986190796, | |
| "learning_rate": 9.502712477396022e-06, | |
| "loss": 0.2123, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 3.340101480484009, | |
| "learning_rate": 9.457504520795661e-06, | |
| "loss": 0.2137, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 4.386758804321289, | |
| "learning_rate": 9.412296564195299e-06, | |
| "loss": 0.2071, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 4.073632717132568, | |
| "learning_rate": 9.367088607594937e-06, | |
| "loss": 0.2063, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "grad_norm": 2.6999106407165527, | |
| "learning_rate": 9.321880650994576e-06, | |
| "loss": 0.2089, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "grad_norm": 2.527625322341919, | |
| "learning_rate": 9.276672694394214e-06, | |
| "loss": 0.2025, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 3.8473989963531494, | |
| "learning_rate": 9.231464737793852e-06, | |
| "loss": 0.2059, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "grad_norm": 2.255798816680908, | |
| "learning_rate": 9.186256781193491e-06, | |
| "loss": 0.2018, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 3.915482521057129, | |
| "learning_rate": 9.14104882459313e-06, | |
| "loss": 0.1985, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 2.375387191772461, | |
| "learning_rate": 9.095840867992769e-06, | |
| "loss": 0.2014, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.9132575757575757, | |
| "eval_auc": 0.9716989722609015, | |
| "eval_f1": 0.875638101444553, | |
| "eval_loss": 0.20884743332862854, | |
| "eval_runtime": 53.4002, | |
| "eval_samples_per_second": 247.19, | |
| "eval_steps_per_second": 3.876, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "grad_norm": 3.186845302581787, | |
| "learning_rate": 9.050632911392407e-06, | |
| "loss": 0.1916, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "grad_norm": 3.6988000869750977, | |
| "learning_rate": 9.005424954792044e-06, | |
| "loss": 0.1913, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "grad_norm": 3.2452192306518555, | |
| "learning_rate": 8.960216998191682e-06, | |
| "loss": 0.1836, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "grad_norm": 3.898977041244507, | |
| "learning_rate": 8.91500904159132e-06, | |
| "loss": 0.1848, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "grad_norm": 1.8791488409042358, | |
| "learning_rate": 8.86980108499096e-06, | |
| "loss": 0.1812, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "grad_norm": 3.3532564640045166, | |
| "learning_rate": 8.824593128390597e-06, | |
| "loss": 0.1785, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "grad_norm": 3.177605390548706, | |
| "learning_rate": 8.779385171790235e-06, | |
| "loss": 0.1888, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "grad_norm": 4.205639362335205, | |
| "learning_rate": 8.734177215189874e-06, | |
| "loss": 0.1807, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "grad_norm": 2.800039052963257, | |
| "learning_rate": 8.688969258589512e-06, | |
| "loss": 0.1851, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "grad_norm": 3.714484930038452, | |
| "learning_rate": 8.64376130198915e-06, | |
| "loss": 0.1854, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "grad_norm": 3.9688687324523926, | |
| "learning_rate": 8.59855334538879e-06, | |
| "loss": 0.1781, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "grad_norm": 3.0902647972106934, | |
| "learning_rate": 8.553345388788427e-06, | |
| "loss": 0.1789, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "grad_norm": 2.457174301147461, | |
| "learning_rate": 8.508137432188067e-06, | |
| "loss": 0.1743, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "grad_norm": 2.403961420059204, | |
| "learning_rate": 8.462929475587705e-06, | |
| "loss": 0.1854, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "grad_norm": 4.13264274597168, | |
| "learning_rate": 8.417721518987342e-06, | |
| "loss": 0.1833, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "grad_norm": 3.0983669757843018, | |
| "learning_rate": 8.372513562386982e-06, | |
| "loss": 0.1796, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "grad_norm": 2.797614812850952, | |
| "learning_rate": 8.327305605786618e-06, | |
| "loss": 0.1745, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "grad_norm": 2.1761841773986816, | |
| "learning_rate": 8.282097649186258e-06, | |
| "loss": 0.1738, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "grad_norm": 2.973947286605835, | |
| "learning_rate": 8.236889692585895e-06, | |
| "loss": 0.1756, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "grad_norm": 3.152864694595337, | |
| "learning_rate": 8.191681735985533e-06, | |
| "loss": 0.1755, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_accuracy": 0.9200757575757575, | |
| "eval_auc": 0.974728335511236, | |
| "eval_f1": 0.8865225341508013, | |
| "eval_loss": 0.20471014082431793, | |
| "eval_runtime": 53.1495, | |
| "eval_samples_per_second": 248.356, | |
| "eval_steps_per_second": 3.895, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "grad_norm": 2.979734420776367, | |
| "learning_rate": 8.146473779385173e-06, | |
| "loss": 0.1762, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "grad_norm": 2.802384853363037, | |
| "learning_rate": 8.10126582278481e-06, | |
| "loss": 0.1764, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "grad_norm": 4.056249618530273, | |
| "learning_rate": 8.056057866184448e-06, | |
| "loss": 0.1786, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "grad_norm": 3.7627487182617188, | |
| "learning_rate": 8.010849909584088e-06, | |
| "loss": 0.1755, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "grad_norm": 2.634819984436035, | |
| "learning_rate": 7.965641952983726e-06, | |
| "loss": 0.1642, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "grad_norm": 3.3615341186523438, | |
| "learning_rate": 7.920433996383365e-06, | |
| "loss": 0.1581, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "grad_norm": 3.702347993850708, | |
| "learning_rate": 7.875226039783003e-06, | |
| "loss": 0.1597, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "grad_norm": 11.943075180053711, | |
| "learning_rate": 7.83001808318264e-06, | |
| "loss": 0.1565, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "grad_norm": 4.544873237609863, | |
| "learning_rate": 7.78481012658228e-06, | |
| "loss": 0.1607, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "grad_norm": 3.886201858520508, | |
| "learning_rate": 7.739602169981918e-06, | |
| "loss": 0.155, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "grad_norm": 2.688396692276001, | |
| "learning_rate": 7.694394213381556e-06, | |
| "loss": 0.1637, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "grad_norm": 4.257401466369629, | |
| "learning_rate": 7.649186256781194e-06, | |
| "loss": 0.1518, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "grad_norm": 3.5023510456085205, | |
| "learning_rate": 7.603978300180832e-06, | |
| "loss": 0.1545, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "grad_norm": 3.3911523818969727, | |
| "learning_rate": 7.558770343580471e-06, | |
| "loss": 0.1587, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "grad_norm": 3.0427675247192383, | |
| "learning_rate": 7.513562386980109e-06, | |
| "loss": 0.1592, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "grad_norm": 3.7051773071289062, | |
| "learning_rate": 7.468354430379747e-06, | |
| "loss": 0.1643, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "grad_norm": 3.8492684364318848, | |
| "learning_rate": 7.423146473779386e-06, | |
| "loss": 0.1554, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "grad_norm": 2.938525676727295, | |
| "learning_rate": 7.377938517179024e-06, | |
| "loss": 0.1612, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "grad_norm": 2.807177782058716, | |
| "learning_rate": 7.3327305605786624e-06, | |
| "loss": 0.1559, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "grad_norm": 2.838815212249756, | |
| "learning_rate": 7.287522603978301e-06, | |
| "loss": 0.1586, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 0.9209848484848485, | |
| "eval_auc": 0.9746563484736904, | |
| "eval_f1": 0.885447556287754, | |
| "eval_loss": 0.2061145007610321, | |
| "eval_runtime": 53.3625, | |
| "eval_samples_per_second": 247.365, | |
| "eval_steps_per_second": 3.879, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "grad_norm": 3.3024938106536865, | |
| "learning_rate": 7.24231464737794e-06, | |
| "loss": 0.1613, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "grad_norm": 3.188176393508911, | |
| "learning_rate": 7.1971066907775775e-06, | |
| "loss": 0.1613, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "grad_norm": 2.8831701278686523, | |
| "learning_rate": 7.151898734177216e-06, | |
| "loss": 0.152, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "grad_norm": 3.3415660858154297, | |
| "learning_rate": 7.106690777576855e-06, | |
| "loss": 0.1606, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "grad_norm": 3.208709478378296, | |
| "learning_rate": 7.061482820976493e-06, | |
| "loss": 0.1569, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "grad_norm": 3.284241199493408, | |
| "learning_rate": 7.01627486437613e-06, | |
| "loss": 0.1586, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "grad_norm": 4.061830520629883, | |
| "learning_rate": 6.971066907775769e-06, | |
| "loss": 0.1494, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "grad_norm": 4.065818786621094, | |
| "learning_rate": 6.925858951175407e-06, | |
| "loss": 0.142, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "grad_norm": 4.541987895965576, | |
| "learning_rate": 6.8806509945750455e-06, | |
| "loss": 0.1436, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "grad_norm": 3.972423791885376, | |
| "learning_rate": 6.835443037974684e-06, | |
| "loss": 0.1323, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "grad_norm": 6.7635579109191895, | |
| "learning_rate": 6.790235081374322e-06, | |
| "loss": 0.1398, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "grad_norm": 3.5160984992980957, | |
| "learning_rate": 6.745027124773961e-06, | |
| "loss": 0.1418, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "grad_norm": 2.7026939392089844, | |
| "learning_rate": 6.699819168173599e-06, | |
| "loss": 0.1352, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "grad_norm": 3.9507486820220947, | |
| "learning_rate": 6.654611211573238e-06, | |
| "loss": 0.1406, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "grad_norm": 3.7145042419433594, | |
| "learning_rate": 6.609403254972876e-06, | |
| "loss": 0.1355, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "grad_norm": 3.2809576988220215, | |
| "learning_rate": 6.564195298372514e-06, | |
| "loss": 0.1406, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "grad_norm": 3.0519375801086426, | |
| "learning_rate": 6.518987341772153e-06, | |
| "loss": 0.1352, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "grad_norm": 3.6294689178466797, | |
| "learning_rate": 6.473779385171791e-06, | |
| "loss": 0.1396, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 3.57, | |
| "grad_norm": 3.4787845611572266, | |
| "learning_rate": 6.4285714285714295e-06, | |
| "loss": 0.1409, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "grad_norm": 3.510904550552368, | |
| "learning_rate": 6.383363471971068e-06, | |
| "loss": 0.141, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "eval_accuracy": 0.9195454545454546, | |
| "eval_auc": 0.9743002838306037, | |
| "eval_f1": 0.8847406121120035, | |
| "eval_loss": 0.22910813987255096, | |
| "eval_runtime": 53.2967, | |
| "eval_samples_per_second": 247.67, | |
| "eval_steps_per_second": 3.884, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "grad_norm": 3.591719150543213, | |
| "learning_rate": 6.338155515370705e-06, | |
| "loss": 0.1368, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 3.71, | |
| "grad_norm": 4.009765625, | |
| "learning_rate": 6.292947558770344e-06, | |
| "loss": 0.1344, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "grad_norm": 3.1466119289398193, | |
| "learning_rate": 6.247739602169982e-06, | |
| "loss": 0.1424, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "grad_norm": 3.8974554538726807, | |
| "learning_rate": 6.20253164556962e-06, | |
| "loss": 0.1374, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 3.8210461139678955, | |
| "learning_rate": 6.157323688969259e-06, | |
| "loss": 0.1365, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "grad_norm": 3.865953207015991, | |
| "learning_rate": 6.1121157323688975e-06, | |
| "loss": 0.1416, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "grad_norm": 3.4419188499450684, | |
| "learning_rate": 6.066907775768536e-06, | |
| "loss": 0.1394, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 3.98, | |
| "grad_norm": 3.2424869537353516, | |
| "learning_rate": 6.021699819168174e-06, | |
| "loss": 0.1379, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "grad_norm": 4.413413047790527, | |
| "learning_rate": 5.9764918625678126e-06, | |
| "loss": 0.1305, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 4.07, | |
| "grad_norm": 5.382500648498535, | |
| "learning_rate": 5.931283905967451e-06, | |
| "loss": 0.1246, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "grad_norm": 3.337273120880127, | |
| "learning_rate": 5.886075949367089e-06, | |
| "loss": 0.121, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "grad_norm": 4.4842705726623535, | |
| "learning_rate": 5.840867992766728e-06, | |
| "loss": 0.12, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "grad_norm": 3.6167163848876953, | |
| "learning_rate": 5.795660036166366e-06, | |
| "loss": 0.1223, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "grad_norm": 3.679234027862549, | |
| "learning_rate": 5.750452079566005e-06, | |
| "loss": 0.123, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "grad_norm": 4.960000991821289, | |
| "learning_rate": 5.705244122965642e-06, | |
| "loss": 0.1175, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "grad_norm": 5.214667320251465, | |
| "learning_rate": 5.6600361663652806e-06, | |
| "loss": 0.1237, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "grad_norm": 3.636551856994629, | |
| "learning_rate": 5.614828209764918e-06, | |
| "loss": 0.121, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "grad_norm": 4.095456123352051, | |
| "learning_rate": 5.569620253164557e-06, | |
| "loss": 0.118, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "grad_norm": 3.4457345008850098, | |
| "learning_rate": 5.524412296564196e-06, | |
| "loss": 0.1226, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "grad_norm": 4.705195903778076, | |
| "learning_rate": 5.479204339963834e-06, | |
| "loss": 0.1232, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.52, | |
| "eval_accuracy": 0.9215909090909091, | |
| "eval_auc": 0.9733304450242736, | |
| "eval_f1": 0.8873653281096964, | |
| "eval_loss": 0.26092976331710815, | |
| "eval_runtime": 52.7152, | |
| "eval_samples_per_second": 250.402, | |
| "eval_steps_per_second": 3.927, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "grad_norm": 5.460992336273193, | |
| "learning_rate": 5.433996383363472e-06, | |
| "loss": 0.1191, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 4.61, | |
| "grad_norm": 4.26489782333374, | |
| "learning_rate": 5.388788426763111e-06, | |
| "loss": 0.1206, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "grad_norm": 5.637485027313232, | |
| "learning_rate": 5.343580470162749e-06, | |
| "loss": 0.1237, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "grad_norm": 3.848707675933838, | |
| "learning_rate": 5.298372513562387e-06, | |
| "loss": 0.1204, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "grad_norm": 3.9218106269836426, | |
| "learning_rate": 5.253164556962026e-06, | |
| "loss": 0.1226, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "grad_norm": 6.16193151473999, | |
| "learning_rate": 5.2079566003616645e-06, | |
| "loss": 0.1267, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "grad_norm": 3.432070016860962, | |
| "learning_rate": 5.162748643761303e-06, | |
| "loss": 0.1232, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "grad_norm": 4.200852394104004, | |
| "learning_rate": 5.117540687160941e-06, | |
| "loss": 0.1219, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "grad_norm": 5.724985599517822, | |
| "learning_rate": 5.07233273056058e-06, | |
| "loss": 0.1215, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "grad_norm": 5.0651445388793945, | |
| "learning_rate": 5.0271247739602165e-06, | |
| "loss": 0.1236, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 5.02, | |
| "grad_norm": 4.59727144241333, | |
| "learning_rate": 4.981916817359856e-06, | |
| "loss": 0.1177, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "grad_norm": 4.37638521194458, | |
| "learning_rate": 4.936708860759495e-06, | |
| "loss": 0.1035, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "grad_norm": 4.5621337890625, | |
| "learning_rate": 4.8915009041591325e-06, | |
| "loss": 0.108, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 5.15, | |
| "grad_norm": 3.9966683387756348, | |
| "learning_rate": 4.84629294755877e-06, | |
| "loss": 0.1057, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 5.2, | |
| "grad_norm": 3.630200147628784, | |
| "learning_rate": 4.801084990958409e-06, | |
| "loss": 0.1043, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 5.24, | |
| "grad_norm": 4.338815689086914, | |
| "learning_rate": 4.755877034358048e-06, | |
| "loss": 0.1066, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 5.29, | |
| "grad_norm": 2.695749282836914, | |
| "learning_rate": 4.710669077757685e-06, | |
| "loss": 0.1059, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "grad_norm": 7.642387866973877, | |
| "learning_rate": 4.665461121157324e-06, | |
| "loss": 0.1073, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "grad_norm": 6.227341651916504, | |
| "learning_rate": 4.620253164556963e-06, | |
| "loss": 0.1115, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "grad_norm": 5.479009628295898, | |
| "learning_rate": 4.575045207956601e-06, | |
| "loss": 0.1058, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "eval_accuracy": 0.913409090909091, | |
| "eval_auc": 0.9717929602890524, | |
| "eval_f1": 0.8788553259141495, | |
| "eval_loss": 0.2780343294143677, | |
| "eval_runtime": 53.9647, | |
| "eval_samples_per_second": 244.604, | |
| "eval_steps_per_second": 3.836, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "grad_norm": 4.2524871826171875, | |
| "learning_rate": 4.529837251356239e-06, | |
| "loss": 0.1053, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 5.52, | |
| "grad_norm": 3.475076675415039, | |
| "learning_rate": 4.484629294755877e-06, | |
| "loss": 0.1068, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "grad_norm": 4.190853118896484, | |
| "learning_rate": 4.439421338155516e-06, | |
| "loss": 0.1067, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "grad_norm": 4.308005332946777, | |
| "learning_rate": 4.394213381555154e-06, | |
| "loss": 0.104, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "grad_norm": 3.767345666885376, | |
| "learning_rate": 4.349005424954793e-06, | |
| "loss": 0.1056, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "grad_norm": 3.9116408824920654, | |
| "learning_rate": 4.303797468354431e-06, | |
| "loss": 0.1123, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "grad_norm": 5.004114151000977, | |
| "learning_rate": 4.258589511754069e-06, | |
| "loss": 0.1099, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "grad_norm": 4.2018208503723145, | |
| "learning_rate": 4.213381555153707e-06, | |
| "loss": 0.11, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 5.83, | |
| "grad_norm": 3.213355779647827, | |
| "learning_rate": 4.168173598553346e-06, | |
| "loss": 0.1067, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "grad_norm": 6.60046911239624, | |
| "learning_rate": 4.122965641952984e-06, | |
| "loss": 0.1056, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "grad_norm": 3.6781232357025146, | |
| "learning_rate": 4.077757685352622e-06, | |
| "loss": 0.1053, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "grad_norm": 3.4061131477355957, | |
| "learning_rate": 4.032549728752261e-06, | |
| "loss": 0.1084, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "grad_norm": 5.02764892578125, | |
| "learning_rate": 3.9873417721518995e-06, | |
| "loss": 0.1054, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "grad_norm": 4.55075740814209, | |
| "learning_rate": 3.942133815551537e-06, | |
| "loss": 0.0913, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 6.1, | |
| "grad_norm": 4.121731758117676, | |
| "learning_rate": 3.896925858951175e-06, | |
| "loss": 0.0928, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 6.15, | |
| "grad_norm": 4.887127876281738, | |
| "learning_rate": 3.851717902350814e-06, | |
| "loss": 0.092, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "grad_norm": 6.114282131195068, | |
| "learning_rate": 3.8065099457504524e-06, | |
| "loss": 0.091, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 6.24, | |
| "grad_norm": 3.952768564224243, | |
| "learning_rate": 3.7613019891500906e-06, | |
| "loss": 0.093, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "grad_norm": 4.0614800453186035, | |
| "learning_rate": 3.7160940325497293e-06, | |
| "loss": 0.0871, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "grad_norm": 5.143472671508789, | |
| "learning_rate": 3.6708860759493675e-06, | |
| "loss": 0.093, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.33, | |
| "eval_accuracy": 0.9113636363636364, | |
| "eval_auc": 0.9692261016594985, | |
| "eval_f1": 0.8748127541194094, | |
| "eval_loss": 0.2975885272026062, | |
| "eval_runtime": 53.1958, | |
| "eval_samples_per_second": 248.14, | |
| "eval_steps_per_second": 3.891, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 6.37, | |
| "grad_norm": 3.874722957611084, | |
| "learning_rate": 3.6256781193490057e-06, | |
| "loss": 0.0949, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 6.42, | |
| "grad_norm": 3.9388911724090576, | |
| "learning_rate": 3.580470162748644e-06, | |
| "loss": 0.0963, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "grad_norm": 3.774689197540283, | |
| "learning_rate": 3.535262206148282e-06, | |
| "loss": 0.0915, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "grad_norm": 5.495402812957764, | |
| "learning_rate": 3.4900542495479204e-06, | |
| "loss": 0.0935, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "grad_norm": 5.229218006134033, | |
| "learning_rate": 3.444846292947559e-06, | |
| "loss": 0.0955, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "grad_norm": 8.326276779174805, | |
| "learning_rate": 3.3996383363471973e-06, | |
| "loss": 0.0905, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "grad_norm": 3.566875457763672, | |
| "learning_rate": 3.354430379746836e-06, | |
| "loss": 0.0988, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "grad_norm": 4.5591559410095215, | |
| "learning_rate": 3.309222423146474e-06, | |
| "loss": 0.0902, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "grad_norm": 4.157392978668213, | |
| "learning_rate": 3.264014466546113e-06, | |
| "loss": 0.0937, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "grad_norm": 3.770798444747925, | |
| "learning_rate": 3.2188065099457506e-06, | |
| "loss": 0.0928, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.83, | |
| "grad_norm": 4.334375858306885, | |
| "learning_rate": 3.173598553345389e-06, | |
| "loss": 0.0929, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "grad_norm": 4.411478519439697, | |
| "learning_rate": 3.1283905967450275e-06, | |
| "loss": 0.0921, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "grad_norm": 3.9217774868011475, | |
| "learning_rate": 3.0831826401446657e-06, | |
| "loss": 0.0907, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "grad_norm": 4.253012657165527, | |
| "learning_rate": 3.037974683544304e-06, | |
| "loss": 0.0965, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "grad_norm": 4.301031112670898, | |
| "learning_rate": 2.9927667269439426e-06, | |
| "loss": 0.0916, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 7.05, | |
| "grad_norm": 4.984923362731934, | |
| "learning_rate": 2.947558770343581e-06, | |
| "loss": 0.079, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "grad_norm": 6.965585231781006, | |
| "learning_rate": 2.9023508137432186e-06, | |
| "loss": 0.0856, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "grad_norm": 5.262626647949219, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.0814, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "grad_norm": 6.5270771980285645, | |
| "learning_rate": 2.8119349005424955e-06, | |
| "loss": 0.084, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "grad_norm": 4.8120808601379395, | |
| "learning_rate": 2.766726943942134e-06, | |
| "loss": 0.0811, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.23, | |
| "eval_accuracy": 0.915530303030303, | |
| "eval_auc": 0.969414026787581, | |
| "eval_f1": 0.8789753609030717, | |
| "eval_loss": 0.3245289623737335, | |
| "eval_runtime": 52.4229, | |
| "eval_samples_per_second": 251.799, | |
| "eval_steps_per_second": 3.949, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "grad_norm": 6.172023773193359, | |
| "learning_rate": 2.7215189873417724e-06, | |
| "loss": 0.0845, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "grad_norm": 4.027543544769287, | |
| "learning_rate": 2.676311030741411e-06, | |
| "loss": 0.081, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "grad_norm": 4.544090747833252, | |
| "learning_rate": 2.6311030741410492e-06, | |
| "loss": 0.0885, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "grad_norm": 5.340161323547363, | |
| "learning_rate": 2.585895117540687e-06, | |
| "loss": 0.0856, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "grad_norm": 7.733421325683594, | |
| "learning_rate": 2.5406871609403257e-06, | |
| "loss": 0.0815, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "grad_norm": 5.390356063842773, | |
| "learning_rate": 2.495479204339964e-06, | |
| "loss": 0.0812, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 7.55, | |
| "grad_norm": 4.908448696136475, | |
| "learning_rate": 2.4502712477396025e-06, | |
| "loss": 0.0814, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "grad_norm": 7.446599960327148, | |
| "learning_rate": 2.4050632911392408e-06, | |
| "loss": 0.0833, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 7.64, | |
| "grad_norm": 4.445633888244629, | |
| "learning_rate": 2.359855334538879e-06, | |
| "loss": 0.0804, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "grad_norm": 3.859055757522583, | |
| "learning_rate": 2.3146473779385172e-06, | |
| "loss": 0.0813, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.73, | |
| "grad_norm": 8.008405685424805, | |
| "learning_rate": 2.269439421338156e-06, | |
| "loss": 0.0835, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "grad_norm": 4.7624711990356445, | |
| "learning_rate": 2.224231464737794e-06, | |
| "loss": 0.0827, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "grad_norm": 4.379971504211426, | |
| "learning_rate": 2.1790235081374323e-06, | |
| "loss": 0.079, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 7.87, | |
| "grad_norm": 5.949471473693848, | |
| "learning_rate": 2.1338155515370705e-06, | |
| "loss": 0.0789, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "grad_norm": 5.32230281829834, | |
| "learning_rate": 2.088607594936709e-06, | |
| "loss": 0.0799, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 7.96, | |
| "grad_norm": 8.27724552154541, | |
| "learning_rate": 2.0433996383363474e-06, | |
| "loss": 0.0842, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 6.394008159637451, | |
| "learning_rate": 1.9981916817359856e-06, | |
| "loss": 0.0808, | |
| "step": 8850 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "grad_norm": 4.033376693725586, | |
| "learning_rate": 1.952983725135624e-06, | |
| "loss": 0.0687, | |
| "step": 8900 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "grad_norm": 5.514036178588867, | |
| "learning_rate": 1.9077757685352625e-06, | |
| "loss": 0.0757, | |
| "step": 8950 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "grad_norm": 3.8214776515960693, | |
| "learning_rate": 1.8625678119349007e-06, | |
| "loss": 0.0721, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.14, | |
| "eval_accuracy": 0.9152272727272728, | |
| "eval_auc": 0.9681984211284476, | |
| "eval_f1": 0.8782239634345413, | |
| "eval_loss": 0.34035009145736694, | |
| "eval_runtime": 52.8174, | |
| "eval_samples_per_second": 249.918, | |
| "eval_steps_per_second": 3.919, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 8.18, | |
| "grad_norm": 5.467037200927734, | |
| "learning_rate": 1.817359855334539e-06, | |
| "loss": 0.0761, | |
| "step": 9050 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "grad_norm": 5.79172945022583, | |
| "learning_rate": 1.7721518987341774e-06, | |
| "loss": 0.074, | |
| "step": 9100 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "grad_norm": 3.8891448974609375, | |
| "learning_rate": 1.7269439421338158e-06, | |
| "loss": 0.0767, | |
| "step": 9150 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "grad_norm": 4.498980522155762, | |
| "learning_rate": 1.6817359855334538e-06, | |
| "loss": 0.074, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 8.36, | |
| "grad_norm": 5.459845542907715, | |
| "learning_rate": 1.6365280289330923e-06, | |
| "loss": 0.075, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "grad_norm": 6.381141662597656, | |
| "learning_rate": 1.5913200723327307e-06, | |
| "loss": 0.0767, | |
| "step": 9300 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "grad_norm": 5.742413520812988, | |
| "learning_rate": 1.5461121157323692e-06, | |
| "loss": 0.0763, | |
| "step": 9350 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "grad_norm": 5.142722129821777, | |
| "learning_rate": 1.5009041591320072e-06, | |
| "loss": 0.0747, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 8.54, | |
| "grad_norm": 5.048556804656982, | |
| "learning_rate": 1.4556962025316456e-06, | |
| "loss": 0.0748, | |
| "step": 9450 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "grad_norm": 5.201704502105713, | |
| "learning_rate": 1.410488245931284e-06, | |
| "loss": 0.0748, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 8.63, | |
| "grad_norm": 5.143392086029053, | |
| "learning_rate": 1.3652802893309225e-06, | |
| "loss": 0.076, | |
| "step": 9550 | |
| }, | |
| { | |
| "epoch": 8.68, | |
| "grad_norm": 5.493886470794678, | |
| "learning_rate": 1.3200723327305607e-06, | |
| "loss": 0.0718, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "grad_norm": 3.705559492111206, | |
| "learning_rate": 1.274864376130199e-06, | |
| "loss": 0.0712, | |
| "step": 9650 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "grad_norm": 5.664219856262207, | |
| "learning_rate": 1.2296564195298374e-06, | |
| "loss": 0.0729, | |
| "step": 9700 | |
| }, | |
| { | |
| "epoch": 8.82, | |
| "grad_norm": 5.6803812980651855, | |
| "learning_rate": 1.1844484629294758e-06, | |
| "loss": 0.0719, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 8.86, | |
| "grad_norm": 4.449408054351807, | |
| "learning_rate": 1.139240506329114e-06, | |
| "loss": 0.0732, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "grad_norm": 6.274992942810059, | |
| "learning_rate": 1.0940325497287525e-06, | |
| "loss": 0.0764, | |
| "step": 9850 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "grad_norm": 6.5285797119140625, | |
| "learning_rate": 1.0488245931283907e-06, | |
| "loss": 0.072, | |
| "step": 9900 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 10.324697494506836, | |
| "learning_rate": 1.0036166365280291e-06, | |
| "loss": 0.073, | |
| "step": 9950 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "grad_norm": 12.908041954040527, | |
| "learning_rate": 9.584086799276674e-07, | |
| "loss": 0.0689, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.04, | |
| "eval_accuracy": 0.915530303030303, | |
| "eval_auc": 0.9687716182332089, | |
| "eval_f1": 0.8797843665768194, | |
| "eval_loss": 0.3541575074195862, | |
| "eval_runtime": 52.8789, | |
| "eval_samples_per_second": 249.627, | |
| "eval_steps_per_second": 3.915, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "grad_norm": 4.13251256942749, | |
| "learning_rate": 9.132007233273058e-07, | |
| "loss": 0.0687, | |
| "step": 10050 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "grad_norm": 6.25527286529541, | |
| "learning_rate": 8.67992766726944e-07, | |
| "loss": 0.0731, | |
| "step": 10100 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "grad_norm": 5.105614185333252, | |
| "learning_rate": 8.227848101265823e-07, | |
| "loss": 0.0715, | |
| "step": 10150 | |
| }, | |
| { | |
| "epoch": 9.22, | |
| "grad_norm": 4.943532943725586, | |
| "learning_rate": 7.775768535262207e-07, | |
| "loss": 0.0653, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 9.27, | |
| "grad_norm": 5.668446063995361, | |
| "learning_rate": 7.32368896925859e-07, | |
| "loss": 0.0694, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "grad_norm": 5.078279972076416, | |
| "learning_rate": 6.871609403254973e-07, | |
| "loss": 0.0659, | |
| "step": 10300 | |
| }, | |
| { | |
| "epoch": 9.36, | |
| "grad_norm": 4.663586616516113, | |
| "learning_rate": 6.419529837251357e-07, | |
| "loss": 0.0661, | |
| "step": 10350 | |
| }, | |
| { | |
| "epoch": 9.4, | |
| "grad_norm": 4.528202533721924, | |
| "learning_rate": 5.96745027124774e-07, | |
| "loss": 0.0701, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 9.45, | |
| "grad_norm": 5.685123920440674, | |
| "learning_rate": 5.515370705244123e-07, | |
| "loss": 0.069, | |
| "step": 10450 | |
| }, | |
| { | |
| "epoch": 9.49, | |
| "grad_norm": 5.798079967498779, | |
| "learning_rate": 5.063291139240507e-07, | |
| "loss": 0.0628, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 9.54, | |
| "grad_norm": 4.510618686676025, | |
| "learning_rate": 4.61121157323689e-07, | |
| "loss": 0.0677, | |
| "step": 10550 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "grad_norm": 5.0236382484436035, | |
| "learning_rate": 4.1591320072332737e-07, | |
| "loss": 0.0685, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "grad_norm": 5.243931770324707, | |
| "learning_rate": 3.707052441229657e-07, | |
| "loss": 0.0687, | |
| "step": 10650 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "grad_norm": 4.094812870025635, | |
| "learning_rate": 3.2549728752260403e-07, | |
| "loss": 0.0663, | |
| "step": 10700 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "grad_norm": 4.533218860626221, | |
| "learning_rate": 2.802893309222423e-07, | |
| "loss": 0.0685, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "grad_norm": 6.6322021484375, | |
| "learning_rate": 2.3508137432188067e-07, | |
| "loss": 0.0664, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "grad_norm": 6.374199390411377, | |
| "learning_rate": 1.89873417721519e-07, | |
| "loss": 0.0688, | |
| "step": 10850 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "grad_norm": 5.66272497177124, | |
| "learning_rate": 1.4466546112115733e-07, | |
| "loss": 0.0696, | |
| "step": 10900 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "grad_norm": 7.157235622406006, | |
| "learning_rate": 9.945750452079567e-08, | |
| "loss": 0.0654, | |
| "step": 10950 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "grad_norm": 6.200794696807861, | |
| "learning_rate": 5.4249547920434e-08, | |
| "loss": 0.0712, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "eval_accuracy": 0.9162121212121213, | |
| "eval_auc": 0.9685840496026594, | |
| "eval_f1": 0.8797826086956522, | |
| "eval_loss": 0.35418105125427246, | |
| "eval_runtime": 52.517, | |
| "eval_samples_per_second": 251.347, | |
| "eval_steps_per_second": 3.942, | |
| "step": 11000 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 11060, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 1000, | |
| "total_flos": 1.3113541708679086e+18, | |
| "train_batch_size": 512, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |