{ "best_metric": 0.036893460899591446, "best_model_checkpoint": "./cats_vs_dogs_outputs/checkpoint-2488", "epoch": 1.0, "global_step": 2488, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00019919614147909968, "loss": 0.4243, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.00019839228295819936, "loss": 0.2515, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00019758842443729903, "loss": 0.2728, "step": 30 }, { "epoch": 0.02, "learning_rate": 0.00019678456591639874, "loss": 0.2011, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.0001959807073954984, "loss": 0.1596, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.00019517684887459809, "loss": 0.2641, "step": 60 }, { "epoch": 0.03, "learning_rate": 0.00019437299035369776, "loss": 0.2891, "step": 70 }, { "epoch": 0.03, "learning_rate": 0.00019356913183279743, "loss": 0.1983, "step": 80 }, { "epoch": 0.04, "learning_rate": 0.0001927652733118971, "loss": 0.1355, "step": 90 }, { "epoch": 0.04, "learning_rate": 0.00019196141479099678, "loss": 0.1282, "step": 100 }, { "epoch": 0.04, "learning_rate": 0.00019115755627009649, "loss": 0.1681, "step": 110 }, { "epoch": 0.05, "learning_rate": 0.00019035369774919616, "loss": 0.4071, "step": 120 }, { "epoch": 0.05, "learning_rate": 0.00018954983922829584, "loss": 0.467, "step": 130 }, { "epoch": 0.06, "learning_rate": 0.0001887459807073955, "loss": 0.1196, "step": 140 }, { "epoch": 0.06, "learning_rate": 0.00018794212218649519, "loss": 0.2998, "step": 150 }, { "epoch": 0.06, "learning_rate": 0.00018713826366559486, "loss": 0.2774, "step": 160 }, { "epoch": 0.07, "learning_rate": 0.00018633440514469454, "loss": 0.3209, "step": 170 }, { "epoch": 0.07, "learning_rate": 0.0001855305466237942, "loss": 0.1764, "step": 180 }, { "epoch": 0.08, "learning_rate": 0.0001847266881028939, "loss": 0.3375, "step": 190 }, { "epoch": 0.08, "learning_rate": 0.0001839228295819936, "loss": 0.2445, "step": 200 }, { "epoch": 0.08, "learning_rate": 0.00018311897106109326, "loss": 0.2098, "step": 210 }, { "epoch": 0.09, "learning_rate": 0.00018231511254019294, "loss": 0.1775, "step": 220 }, { "epoch": 0.09, "learning_rate": 0.0001815112540192926, "loss": 0.2583, "step": 230 }, { "epoch": 0.1, "learning_rate": 0.00018070739549839229, "loss": 0.1765, "step": 240 }, { "epoch": 0.1, "learning_rate": 0.00017990353697749196, "loss": 0.1831, "step": 250 }, { "epoch": 0.1, "learning_rate": 0.00017909967845659166, "loss": 0.1802, "step": 260 }, { "epoch": 0.11, "learning_rate": 0.00017829581993569134, "loss": 0.283, "step": 270 }, { "epoch": 0.11, "learning_rate": 0.000177491961414791, "loss": 0.2873, "step": 280 }, { "epoch": 0.12, "learning_rate": 0.0001766881028938907, "loss": 0.2789, "step": 290 }, { "epoch": 0.12, "learning_rate": 0.00017588424437299036, "loss": 0.4105, "step": 300 }, { "epoch": 0.12, "learning_rate": 0.00017508038585209004, "loss": 0.2762, "step": 310 }, { "epoch": 0.13, "learning_rate": 0.0001742765273311897, "loss": 0.1414, "step": 320 }, { "epoch": 0.13, "learning_rate": 0.0001734726688102894, "loss": 0.2126, "step": 330 }, { "epoch": 0.14, "learning_rate": 0.0001726688102893891, "loss": 0.3115, "step": 340 }, { "epoch": 0.14, "learning_rate": 0.00017186495176848876, "loss": 0.2029, "step": 350 }, { "epoch": 0.14, "learning_rate": 0.00017106109324758844, "loss": 0.0867, "step": 360 }, { "epoch": 0.15, "learning_rate": 0.0001702572347266881, "loss": 0.1312, "step": 370 }, { "epoch": 0.15, "learning_rate": 0.0001694533762057878, "loss": 0.5391, "step": 380 }, { "epoch": 0.16, "learning_rate": 0.00016864951768488746, "loss": 0.2131, "step": 390 }, { "epoch": 0.16, "learning_rate": 0.00016784565916398716, "loss": 0.2431, "step": 400 }, { "epoch": 0.16, "learning_rate": 0.00016704180064308684, "loss": 0.2088, "step": 410 }, { "epoch": 0.17, "learning_rate": 0.0001662379421221865, "loss": 0.1451, "step": 420 }, { "epoch": 0.17, "learning_rate": 0.0001654340836012862, "loss": 0.1635, "step": 430 }, { "epoch": 0.18, "learning_rate": 0.00016463022508038586, "loss": 0.2134, "step": 440 }, { "epoch": 0.18, "learning_rate": 0.00016382636655948554, "loss": 0.3112, "step": 450 }, { "epoch": 0.18, "learning_rate": 0.0001630225080385852, "loss": 0.2105, "step": 460 }, { "epoch": 0.19, "learning_rate": 0.0001622186495176849, "loss": 0.1925, "step": 470 }, { "epoch": 0.19, "learning_rate": 0.0001614147909967846, "loss": 0.0771, "step": 480 }, { "epoch": 0.2, "learning_rate": 0.00016061093247588426, "loss": 0.1727, "step": 490 }, { "epoch": 0.2, "learning_rate": 0.00015980707395498394, "loss": 0.1623, "step": 500 }, { "epoch": 0.2, "learning_rate": 0.0001590032154340836, "loss": 0.3692, "step": 510 }, { "epoch": 0.21, "learning_rate": 0.0001581993569131833, "loss": 0.0848, "step": 520 }, { "epoch": 0.21, "learning_rate": 0.00015739549839228296, "loss": 0.1735, "step": 530 }, { "epoch": 0.22, "learning_rate": 0.00015659163987138264, "loss": 0.2304, "step": 540 }, { "epoch": 0.22, "learning_rate": 0.00015578778135048234, "loss": 0.1427, "step": 550 }, { "epoch": 0.23, "learning_rate": 0.00015498392282958201, "loss": 0.11, "step": 560 }, { "epoch": 0.23, "learning_rate": 0.0001541800643086817, "loss": 0.1318, "step": 570 }, { "epoch": 0.23, "learning_rate": 0.00015337620578778136, "loss": 0.3012, "step": 580 }, { "epoch": 0.24, "learning_rate": 0.00015257234726688104, "loss": 0.1292, "step": 590 }, { "epoch": 0.24, "learning_rate": 0.0001517684887459807, "loss": 0.0977, "step": 600 }, { "epoch": 0.25, "learning_rate": 0.0001509646302250804, "loss": 0.121, "step": 610 }, { "epoch": 0.25, "learning_rate": 0.0001501607717041801, "loss": 0.1502, "step": 620 }, { "epoch": 0.25, "learning_rate": 0.00014935691318327976, "loss": 0.0814, "step": 630 }, { "epoch": 0.26, "learning_rate": 0.00014855305466237944, "loss": 0.2753, "step": 640 }, { "epoch": 0.26, "learning_rate": 0.00014774919614147911, "loss": 0.1735, "step": 650 }, { "epoch": 0.27, "learning_rate": 0.0001469453376205788, "loss": 0.065, "step": 660 }, { "epoch": 0.27, "learning_rate": 0.00014614147909967846, "loss": 0.1937, "step": 670 }, { "epoch": 0.27, "learning_rate": 0.00014533762057877814, "loss": 0.1878, "step": 680 }, { "epoch": 0.28, "learning_rate": 0.00014453376205787784, "loss": 0.2737, "step": 690 }, { "epoch": 0.28, "learning_rate": 0.00014372990353697752, "loss": 0.08, "step": 700 }, { "epoch": 0.29, "learning_rate": 0.0001429260450160772, "loss": 0.2417, "step": 710 }, { "epoch": 0.29, "learning_rate": 0.00014212218649517686, "loss": 0.1485, "step": 720 }, { "epoch": 0.29, "learning_rate": 0.00014131832797427654, "loss": 0.1943, "step": 730 }, { "epoch": 0.3, "learning_rate": 0.00014051446945337621, "loss": 0.1168, "step": 740 }, { "epoch": 0.3, "learning_rate": 0.0001397106109324759, "loss": 0.271, "step": 750 }, { "epoch": 0.31, "learning_rate": 0.0001389067524115756, "loss": 0.1807, "step": 760 }, { "epoch": 0.31, "learning_rate": 0.00013810289389067527, "loss": 0.2312, "step": 770 }, { "epoch": 0.31, "learning_rate": 0.00013729903536977494, "loss": 0.2052, "step": 780 }, { "epoch": 0.32, "learning_rate": 0.00013649517684887462, "loss": 0.2927, "step": 790 }, { "epoch": 0.32, "learning_rate": 0.0001356913183279743, "loss": 0.2083, "step": 800 }, { "epoch": 0.33, "learning_rate": 0.00013488745980707396, "loss": 0.1997, "step": 810 }, { "epoch": 0.33, "learning_rate": 0.00013408360128617364, "loss": 0.0869, "step": 820 }, { "epoch": 0.33, "learning_rate": 0.00013327974276527331, "loss": 0.1731, "step": 830 }, { "epoch": 0.34, "learning_rate": 0.00013247588424437302, "loss": 0.2085, "step": 840 }, { "epoch": 0.34, "learning_rate": 0.0001316720257234727, "loss": 0.0884, "step": 850 }, { "epoch": 0.35, "learning_rate": 0.00013086816720257237, "loss": 0.1051, "step": 860 }, { "epoch": 0.35, "learning_rate": 0.00013006430868167204, "loss": 0.2395, "step": 870 }, { "epoch": 0.35, "learning_rate": 0.00012926045016077172, "loss": 0.1216, "step": 880 }, { "epoch": 0.36, "learning_rate": 0.0001284565916398714, "loss": 0.234, "step": 890 }, { "epoch": 0.36, "learning_rate": 0.00012765273311897106, "loss": 0.1559, "step": 900 }, { "epoch": 0.37, "learning_rate": 0.00012684887459807077, "loss": 0.2665, "step": 910 }, { "epoch": 0.37, "learning_rate": 0.00012604501607717044, "loss": 0.0772, "step": 920 }, { "epoch": 0.37, "learning_rate": 0.00012524115755627012, "loss": 0.1792, "step": 930 }, { "epoch": 0.38, "learning_rate": 0.0001244372990353698, "loss": 0.1338, "step": 940 }, { "epoch": 0.38, "learning_rate": 0.00012363344051446947, "loss": 0.1326, "step": 950 }, { "epoch": 0.39, "learning_rate": 0.00012282958199356914, "loss": 0.1114, "step": 960 }, { "epoch": 0.39, "learning_rate": 0.0001220257234726688, "loss": 0.1606, "step": 970 }, { "epoch": 0.39, "learning_rate": 0.0001212218649517685, "loss": 0.0651, "step": 980 }, { "epoch": 0.4, "learning_rate": 0.00012041800643086818, "loss": 0.1518, "step": 990 }, { "epoch": 0.4, "learning_rate": 0.00011961414790996785, "loss": 0.2242, "step": 1000 }, { "epoch": 0.41, "learning_rate": 0.00011881028938906753, "loss": 0.2432, "step": 1010 }, { "epoch": 0.41, "learning_rate": 0.0001180064308681672, "loss": 0.2233, "step": 1020 }, { "epoch": 0.41, "learning_rate": 0.00011720257234726688, "loss": 0.2343, "step": 1030 }, { "epoch": 0.42, "learning_rate": 0.00011639871382636655, "loss": 0.1798, "step": 1040 }, { "epoch": 0.42, "learning_rate": 0.00011559485530546625, "loss": 0.2162, "step": 1050 }, { "epoch": 0.43, "learning_rate": 0.00011479099678456593, "loss": 0.178, "step": 1060 }, { "epoch": 0.43, "learning_rate": 0.0001139871382636656, "loss": 0.1113, "step": 1070 }, { "epoch": 0.43, "learning_rate": 0.00011318327974276528, "loss": 0.1912, "step": 1080 }, { "epoch": 0.44, "learning_rate": 0.00011237942122186495, "loss": 0.1249, "step": 1090 }, { "epoch": 0.44, "learning_rate": 0.00011157556270096463, "loss": 0.1505, "step": 1100 }, { "epoch": 0.45, "learning_rate": 0.0001107717041800643, "loss": 0.2325, "step": 1110 }, { "epoch": 0.45, "learning_rate": 0.00010996784565916398, "loss": 0.1284, "step": 1120 }, { "epoch": 0.45, "learning_rate": 0.00010916398713826368, "loss": 0.0767, "step": 1130 }, { "epoch": 0.46, "learning_rate": 0.00010836012861736335, "loss": 0.0773, "step": 1140 }, { "epoch": 0.46, "learning_rate": 0.00010755627009646303, "loss": 0.1112, "step": 1150 }, { "epoch": 0.47, "learning_rate": 0.0001067524115755627, "loss": 0.0873, "step": 1160 }, { "epoch": 0.47, "learning_rate": 0.00010594855305466238, "loss": 0.1782, "step": 1170 }, { "epoch": 0.47, "learning_rate": 0.00010514469453376205, "loss": 0.1493, "step": 1180 }, { "epoch": 0.48, "learning_rate": 0.00010434083601286173, "loss": 0.2793, "step": 1190 }, { "epoch": 0.48, "learning_rate": 0.00010353697749196143, "loss": 0.1563, "step": 1200 }, { "epoch": 0.49, "learning_rate": 0.0001027331189710611, "loss": 0.1311, "step": 1210 }, { "epoch": 0.49, "learning_rate": 0.00010192926045016078, "loss": 0.2596, "step": 1220 }, { "epoch": 0.49, "learning_rate": 0.00010112540192926045, "loss": 0.0822, "step": 1230 }, { "epoch": 0.5, "learning_rate": 0.00010032154340836013, "loss": 0.1717, "step": 1240 }, { "epoch": 0.5, "learning_rate": 9.951768488745982e-05, "loss": 0.1029, "step": 1250 }, { "epoch": 0.51, "learning_rate": 9.871382636655949e-05, "loss": 0.068, "step": 1260 }, { "epoch": 0.51, "learning_rate": 9.790996784565917e-05, "loss": 0.2052, "step": 1270 }, { "epoch": 0.51, "learning_rate": 9.710610932475884e-05, "loss": 0.1236, "step": 1280 }, { "epoch": 0.52, "learning_rate": 9.630225080385853e-05, "loss": 0.1484, "step": 1290 }, { "epoch": 0.52, "learning_rate": 9.54983922829582e-05, "loss": 0.2226, "step": 1300 }, { "epoch": 0.53, "learning_rate": 9.469453376205788e-05, "loss": 0.0759, "step": 1310 }, { "epoch": 0.53, "learning_rate": 9.389067524115757e-05, "loss": 0.0796, "step": 1320 }, { "epoch": 0.53, "learning_rate": 9.308681672025724e-05, "loss": 0.15, "step": 1330 }, { "epoch": 0.54, "learning_rate": 9.228295819935692e-05, "loss": 0.1217, "step": 1340 }, { "epoch": 0.54, "learning_rate": 9.147909967845659e-05, "loss": 0.1487, "step": 1350 }, { "epoch": 0.55, "learning_rate": 9.067524115755628e-05, "loss": 0.178, "step": 1360 }, { "epoch": 0.55, "learning_rate": 8.987138263665596e-05, "loss": 0.0732, "step": 1370 }, { "epoch": 0.55, "learning_rate": 8.906752411575563e-05, "loss": 0.1639, "step": 1380 }, { "epoch": 0.56, "learning_rate": 8.82636655948553e-05, "loss": 0.1026, "step": 1390 }, { "epoch": 0.56, "learning_rate": 8.7459807073955e-05, "loss": 0.0329, "step": 1400 }, { "epoch": 0.57, "learning_rate": 8.665594855305467e-05, "loss": 0.2264, "step": 1410 }, { "epoch": 0.57, "learning_rate": 8.585209003215434e-05, "loss": 0.0799, "step": 1420 }, { "epoch": 0.57, "learning_rate": 8.504823151125403e-05, "loss": 0.0258, "step": 1430 }, { "epoch": 0.58, "learning_rate": 8.42443729903537e-05, "loss": 0.1056, "step": 1440 }, { "epoch": 0.58, "learning_rate": 8.344051446945338e-05, "loss": 0.0807, "step": 1450 }, { "epoch": 0.59, "learning_rate": 8.263665594855306e-05, "loss": 0.1491, "step": 1460 }, { "epoch": 0.59, "learning_rate": 8.183279742765274e-05, "loss": 0.1216, "step": 1470 }, { "epoch": 0.59, "learning_rate": 8.102893890675242e-05, "loss": 0.1722, "step": 1480 }, { "epoch": 0.6, "learning_rate": 8.02250803858521e-05, "loss": 0.0542, "step": 1490 }, { "epoch": 0.6, "learning_rate": 7.942122186495177e-05, "loss": 0.1965, "step": 1500 }, { "epoch": 0.61, "learning_rate": 7.861736334405146e-05, "loss": 0.1252, "step": 1510 }, { "epoch": 0.61, "learning_rate": 7.781350482315113e-05, "loss": 0.0457, "step": 1520 }, { "epoch": 0.61, "learning_rate": 7.70096463022508e-05, "loss": 0.1899, "step": 1530 }, { "epoch": 0.62, "learning_rate": 7.62057877813505e-05, "loss": 0.1058, "step": 1540 }, { "epoch": 0.62, "learning_rate": 7.540192926045017e-05, "loss": 0.1748, "step": 1550 }, { "epoch": 0.63, "learning_rate": 7.459807073954984e-05, "loss": 0.078, "step": 1560 }, { "epoch": 0.63, "learning_rate": 7.379421221864952e-05, "loss": 0.2113, "step": 1570 }, { "epoch": 0.64, "learning_rate": 7.299035369774921e-05, "loss": 0.101, "step": 1580 }, { "epoch": 0.64, "learning_rate": 7.218649517684888e-05, "loss": 0.0684, "step": 1590 }, { "epoch": 0.64, "learning_rate": 7.138263665594856e-05, "loss": 0.3001, "step": 1600 }, { "epoch": 0.65, "learning_rate": 7.057877813504825e-05, "loss": 0.1088, "step": 1610 }, { "epoch": 0.65, "learning_rate": 6.977491961414792e-05, "loss": 0.0099, "step": 1620 }, { "epoch": 0.66, "learning_rate": 6.89710610932476e-05, "loss": 0.0918, "step": 1630 }, { "epoch": 0.66, "learning_rate": 6.816720257234727e-05, "loss": 0.0402, "step": 1640 }, { "epoch": 0.66, "learning_rate": 6.736334405144696e-05, "loss": 0.0505, "step": 1650 }, { "epoch": 0.67, "learning_rate": 6.655948553054663e-05, "loss": 0.1337, "step": 1660 }, { "epoch": 0.67, "learning_rate": 6.575562700964631e-05, "loss": 0.0712, "step": 1670 }, { "epoch": 0.68, "learning_rate": 6.495176848874598e-05, "loss": 0.0177, "step": 1680 }, { "epoch": 0.68, "learning_rate": 6.414790996784567e-05, "loss": 0.2133, "step": 1690 }, { "epoch": 0.68, "learning_rate": 6.334405144694535e-05, "loss": 0.0598, "step": 1700 }, { "epoch": 0.69, "learning_rate": 6.254019292604502e-05, "loss": 0.1066, "step": 1710 }, { "epoch": 0.69, "learning_rate": 6.173633440514471e-05, "loss": 0.1304, "step": 1720 }, { "epoch": 0.7, "learning_rate": 6.0932475884244377e-05, "loss": 0.2402, "step": 1730 }, { "epoch": 0.7, "learning_rate": 6.012861736334405e-05, "loss": 0.0907, "step": 1740 }, { "epoch": 0.7, "learning_rate": 5.9324758842443726e-05, "loss": 0.1261, "step": 1750 }, { "epoch": 0.71, "learning_rate": 5.8520900321543414e-05, "loss": 0.0255, "step": 1760 }, { "epoch": 0.71, "learning_rate": 5.771704180064309e-05, "loss": 0.1224, "step": 1770 }, { "epoch": 0.72, "learning_rate": 5.6913183279742764e-05, "loss": 0.1896, "step": 1780 }, { "epoch": 0.72, "learning_rate": 5.610932475884244e-05, "loss": 0.1877, "step": 1790 }, { "epoch": 0.72, "learning_rate": 5.530546623794213e-05, "loss": 0.1275, "step": 1800 }, { "epoch": 0.73, "learning_rate": 5.45016077170418e-05, "loss": 0.1746, "step": 1810 }, { "epoch": 0.73, "learning_rate": 5.369774919614148e-05, "loss": 0.0513, "step": 1820 }, { "epoch": 0.74, "learning_rate": 5.2893890675241165e-05, "loss": 0.1638, "step": 1830 }, { "epoch": 0.74, "learning_rate": 5.209003215434084e-05, "loss": 0.0546, "step": 1840 }, { "epoch": 0.74, "learning_rate": 5.1286173633440515e-05, "loss": 0.0156, "step": 1850 }, { "epoch": 0.75, "learning_rate": 5.048231511254019e-05, "loss": 0.0511, "step": 1860 }, { "epoch": 0.75, "learning_rate": 4.967845659163987e-05, "loss": 0.1748, "step": 1870 }, { "epoch": 0.76, "learning_rate": 4.887459807073955e-05, "loss": 0.087, "step": 1880 }, { "epoch": 0.76, "learning_rate": 4.8070739549839234e-05, "loss": 0.1595, "step": 1890 }, { "epoch": 0.76, "learning_rate": 4.726688102893891e-05, "loss": 0.1603, "step": 1900 }, { "epoch": 0.77, "learning_rate": 4.646302250803859e-05, "loss": 0.0242, "step": 1910 }, { "epoch": 0.77, "learning_rate": 4.5659163987138265e-05, "loss": 0.1388, "step": 1920 }, { "epoch": 0.78, "learning_rate": 4.485530546623795e-05, "loss": 0.1766, "step": 1930 }, { "epoch": 0.78, "learning_rate": 4.405144694533762e-05, "loss": 0.1017, "step": 1940 }, { "epoch": 0.78, "learning_rate": 4.32475884244373e-05, "loss": 0.0819, "step": 1950 }, { "epoch": 0.79, "learning_rate": 4.244372990353698e-05, "loss": 0.0785, "step": 1960 }, { "epoch": 0.79, "learning_rate": 4.163987138263666e-05, "loss": 0.0906, "step": 1970 }, { "epoch": 0.8, "learning_rate": 4.083601286173634e-05, "loss": 0.0545, "step": 1980 }, { "epoch": 0.8, "learning_rate": 4.0032154340836016e-05, "loss": 0.0917, "step": 1990 }, { "epoch": 0.8, "learning_rate": 3.92282958199357e-05, "loss": 0.1921, "step": 2000 }, { "epoch": 0.81, "learning_rate": 3.842443729903537e-05, "loss": 0.1346, "step": 2010 }, { "epoch": 0.81, "learning_rate": 3.7620578778135054e-05, "loss": 0.0455, "step": 2020 }, { "epoch": 0.82, "learning_rate": 3.681672025723473e-05, "loss": 0.0267, "step": 2030 }, { "epoch": 0.82, "learning_rate": 3.601286173633441e-05, "loss": 0.0781, "step": 2040 }, { "epoch": 0.82, "learning_rate": 3.5209003215434085e-05, "loss": 0.0123, "step": 2050 }, { "epoch": 0.83, "learning_rate": 3.4405144694533766e-05, "loss": 0.0145, "step": 2060 }, { "epoch": 0.83, "learning_rate": 3.360128617363344e-05, "loss": 0.0802, "step": 2070 }, { "epoch": 0.84, "learning_rate": 3.279742765273312e-05, "loss": 0.0797, "step": 2080 }, { "epoch": 0.84, "learning_rate": 3.1993569131832804e-05, "loss": 0.2245, "step": 2090 }, { "epoch": 0.84, "learning_rate": 3.118971061093248e-05, "loss": 0.0771, "step": 2100 }, { "epoch": 0.85, "learning_rate": 3.0385852090032157e-05, "loss": 0.0732, "step": 2110 }, { "epoch": 0.85, "learning_rate": 2.9581993569131832e-05, "loss": 0.1055, "step": 2120 }, { "epoch": 0.86, "learning_rate": 2.8778135048231513e-05, "loss": 0.0194, "step": 2130 }, { "epoch": 0.86, "learning_rate": 2.7974276527331188e-05, "loss": 0.1721, "step": 2140 }, { "epoch": 0.86, "learning_rate": 2.717041800643087e-05, "loss": 0.0708, "step": 2150 }, { "epoch": 0.87, "learning_rate": 2.6366559485530545e-05, "loss": 0.0392, "step": 2160 }, { "epoch": 0.87, "learning_rate": 2.5562700964630226e-05, "loss": 0.0824, "step": 2170 }, { "epoch": 0.88, "learning_rate": 2.4758842443729904e-05, "loss": 0.0319, "step": 2180 }, { "epoch": 0.88, "learning_rate": 2.3954983922829582e-05, "loss": 0.1465, "step": 2190 }, { "epoch": 0.88, "learning_rate": 2.315112540192926e-05, "loss": 0.0373, "step": 2200 }, { "epoch": 0.89, "learning_rate": 2.234726688102894e-05, "loss": 0.0872, "step": 2210 }, { "epoch": 0.89, "learning_rate": 2.154340836012862e-05, "loss": 0.0213, "step": 2220 }, { "epoch": 0.9, "learning_rate": 2.07395498392283e-05, "loss": 0.0919, "step": 2230 }, { "epoch": 0.9, "learning_rate": 1.9935691318327977e-05, "loss": 0.0234, "step": 2240 }, { "epoch": 0.9, "learning_rate": 1.9131832797427655e-05, "loss": 0.0677, "step": 2250 }, { "epoch": 0.91, "learning_rate": 1.8327974276527333e-05, "loss": 0.0754, "step": 2260 }, { "epoch": 0.91, "learning_rate": 1.752411575562701e-05, "loss": 0.0257, "step": 2270 }, { "epoch": 0.92, "learning_rate": 1.672025723472669e-05, "loss": 0.1474, "step": 2280 }, { "epoch": 0.92, "learning_rate": 1.5916398713826368e-05, "loss": 0.3303, "step": 2290 }, { "epoch": 0.92, "learning_rate": 1.5112540192926044e-05, "loss": 0.1173, "step": 2300 }, { "epoch": 0.93, "learning_rate": 1.4308681672025726e-05, "loss": 0.0271, "step": 2310 }, { "epoch": 0.93, "learning_rate": 1.3504823151125404e-05, "loss": 0.0182, "step": 2320 }, { "epoch": 0.94, "learning_rate": 1.2700964630225082e-05, "loss": 0.1104, "step": 2330 }, { "epoch": 0.94, "learning_rate": 1.189710610932476e-05, "loss": 0.0909, "step": 2340 }, { "epoch": 0.94, "learning_rate": 1.1093247588424438e-05, "loss": 0.0389, "step": 2350 }, { "epoch": 0.95, "learning_rate": 1.0289389067524116e-05, "loss": 0.067, "step": 2360 }, { "epoch": 0.95, "learning_rate": 9.485530546623795e-06, "loss": 0.07, "step": 2370 }, { "epoch": 0.96, "learning_rate": 8.681672025723474e-06, "loss": 0.0457, "step": 2380 }, { "epoch": 0.96, "learning_rate": 7.877813504823153e-06, "loss": 0.1058, "step": 2390 }, { "epoch": 0.96, "learning_rate": 7.07395498392283e-06, "loss": 0.0225, "step": 2400 }, { "epoch": 0.97, "learning_rate": 6.270096463022508e-06, "loss": 0.1176, "step": 2410 }, { "epoch": 0.97, "learning_rate": 5.466237942122187e-06, "loss": 0.0267, "step": 2420 }, { "epoch": 0.98, "learning_rate": 4.662379421221865e-06, "loss": 0.0751, "step": 2430 }, { "epoch": 0.98, "learning_rate": 3.858520900321544e-06, "loss": 0.0904, "step": 2440 }, { "epoch": 0.98, "learning_rate": 3.054662379421222e-06, "loss": 0.0607, "step": 2450 }, { "epoch": 0.99, "learning_rate": 2.2508038585209006e-06, "loss": 0.1344, "step": 2460 }, { "epoch": 0.99, "learning_rate": 1.4469453376205788e-06, "loss": 0.0816, "step": 2470 }, { "epoch": 1.0, "learning_rate": 6.430868167202573e-07, "loss": 0.0949, "step": 2480 }, { "epoch": 1.0, "eval_accuracy": 0.9883257403189066, "eval_loss": 0.036893460899591446, "eval_runtime": 109.7581, "eval_samples_per_second": 31.998, "eval_steps_per_second": 4.0, "step": 2488 }, { "epoch": 1.0, "step": 2488, "total_flos": 0.0, "train_loss": 0.1489328353447163, "train_runtime": 1701.4159, "train_samples_per_second": 11.695, "train_steps_per_second": 1.462 } ], "max_steps": 2488, "num_train_epochs": 1, "total_flos": 0.0, "trial_name": null, "trial_params": null }