{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9998720081914757, "global_step": 3906, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 9.2076, "step": 1 }, { "epoch": 0.0, "learning_rate": 2e-05, "loss": 9.3403, "step": 10 }, { "epoch": 0.01, "learning_rate": 4.5e-05, "loss": 7.9144, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.500000000000001e-05, "loss": 6.7554, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 5.7716, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00017, "loss": 5.0089, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.00019999947171819797, "loss": 4.383, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.00019999352861202634, "loss": 4.1286, "step": 70 }, { "epoch": 0.02, "learning_rate": 0.0001999809824411913, "loss": 3.7428, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.0001999618340341782, "loss": 3.4558, "step": 90 }, { "epoch": 0.03, "learning_rate": 0.00019993608465545054, "loss": 3.3284, "step": 100 }, { "epoch": 0.03, "learning_rate": 0.00019990373600536657, "loss": 3.202, "step": 110 }, { "epoch": 0.03, "learning_rate": 0.00019986479022006677, "loss": 3.1471, "step": 120 }, { "epoch": 0.03, "learning_rate": 0.00019981924987133289, "loss": 3.0477, "step": 130 }, { "epoch": 0.04, "learning_rate": 0.00019976711796641832, "loss": 2.9572, "step": 140 }, { "epoch": 0.04, "learning_rate": 0.00019970839794784917, "loss": 2.9283, "step": 150 }, { "epoch": 0.04, "learning_rate": 0.00019964309369319722, "loss": 2.882, "step": 160 }, { "epoch": 0.04, "learning_rate": 0.00019957120951482363, "loss": 2.799, "step": 170 }, { "epoch": 0.05, "learning_rate": 0.00019949275015959442, "loss": 2.7808, "step": 180 }, { "epoch": 0.05, "learning_rate": 0.0001994077208085668, "loss": 2.7251, "step": 190 }, { "epoch": 0.05, "learning_rate": 0.0001993161270766472, "loss": 2.6977, "step": 200 }, { "epoch": 0.05, "learning_rate": 0.00019921797501222036, "loss": 2.7366, "step": 210 }, { "epoch": 0.06, "learning_rate": 0.00019911327109675003, "loss": 2.7007, "step": 220 }, { "epoch": 0.06, "learning_rate": 0.00019900202224435086, "loss": 2.6237, "step": 230 }, { "epoch": 0.06, "learning_rate": 0.00019888423580133194, "loss": 2.4928, "step": 240 }, { "epoch": 0.06, "learning_rate": 0.0001987599195457116, "loss": 2.6081, "step": 250 }, { "epoch": 0.07, "learning_rate": 0.00019862908168670384, "loss": 2.59, "step": 260 }, { "epoch": 0.07, "learning_rate": 0.00019849173086417622, "loss": 2.5477, "step": 270 }, { "epoch": 0.07, "learning_rate": 0.0001983478761480793, "loss": 2.5678, "step": 280 }, { "epoch": 0.07, "learning_rate": 0.00019819752703784777, "loss": 2.4678, "step": 290 }, { "epoch": 0.08, "learning_rate": 0.0001980406934617731, "loss": 2.4486, "step": 300 }, { "epoch": 0.08, "learning_rate": 0.00019787738577634794, "loss": 2.5024, "step": 310 }, { "epoch": 0.08, "learning_rate": 0.00019770761476558223, "loss": 2.5042, "step": 320 }, { "epoch": 0.08, "learning_rate": 0.00019753139164029108, "loss": 2.4463, "step": 330 }, { "epoch": 0.09, "learning_rate": 0.00019734872803735444, "loss": 2.4035, "step": 340 }, { "epoch": 0.09, "learning_rate": 0.0001971596360189488, "loss": 2.4444, "step": 350 }, { "epoch": 0.09, "learning_rate": 0.0001969641280717504, "loss": 2.4404, "step": 360 }, { "epoch": 0.09, "learning_rate": 0.00019676221710611093, "loss": 2.4378, "step": 370 }, { "epoch": 0.1, "learning_rate": 0.00019655391645520486, "loss": 2.3842, "step": 380 }, { "epoch": 0.1, "learning_rate": 0.000196339239874149, "loss": 2.3743, "step": 390 }, { "epoch": 0.1, "learning_rate": 0.00019611820153909418, "loss": 2.3983, "step": 400 }, { "epoch": 0.1, "learning_rate": 0.0001958908160462892, "loss": 2.3224, "step": 410 }, { "epoch": 0.11, "learning_rate": 0.0001956570984111169, "loss": 2.3779, "step": 420 }, { "epoch": 0.11, "learning_rate": 0.00019541706406710256, "loss": 2.3706, "step": 430 }, { "epoch": 0.11, "learning_rate": 0.000195170728864895, "loss": 2.3213, "step": 440 }, { "epoch": 0.12, "learning_rate": 0.0001949181090712195, "loss": 2.3222, "step": 450 }, { "epoch": 0.12, "learning_rate": 0.00019465922136780396, "loss": 2.3018, "step": 460 }, { "epoch": 0.12, "learning_rate": 0.00019439408285027717, "loss": 2.3268, "step": 470 }, { "epoch": 0.12, "learning_rate": 0.00019412271102703992, "loss": 2.2956, "step": 480 }, { "epoch": 0.13, "learning_rate": 0.00019384512381810887, "loss": 2.2676, "step": 490 }, { "epoch": 0.13, "learning_rate": 0.00019356133955393312, "loss": 2.2877, "step": 500 }, { "epoch": 0.13, "learning_rate": 0.0001932713769741839, "loss": 2.3348, "step": 510 }, { "epoch": 0.13, "learning_rate": 0.0001929752552265169, "loss": 2.3026, "step": 520 }, { "epoch": 0.14, "learning_rate": 0.00019267299386530813, "loss": 2.3344, "step": 530 }, { "epoch": 0.14, "learning_rate": 0.00019236461285036233, "loss": 2.258, "step": 540 }, { "epoch": 0.14, "learning_rate": 0.0001920501325455952, "loss": 2.2424, "step": 550 }, { "epoch": 0.14, "learning_rate": 0.00019172957371768848, "loss": 2.2824, "step": 560 }, { "epoch": 0.15, "learning_rate": 0.00019140295753471872, "loss": 2.2618, "step": 570 }, { "epoch": 0.15, "learning_rate": 0.0001910703055647595, "loss": 2.243, "step": 580 }, { "epoch": 0.15, "learning_rate": 0.00019073163977445696, "loss": 2.2295, "step": 590 }, { "epoch": 0.15, "learning_rate": 0.00019038698252757952, "loss": 2.2533, "step": 600 }, { "epoch": 0.16, "learning_rate": 0.00019003635658354094, "loss": 2.2098, "step": 610 }, { "epoch": 0.16, "learning_rate": 0.0001896797850958973, "loss": 2.3026, "step": 620 }, { "epoch": 0.16, "learning_rate": 0.00018931729161081835, "loss": 2.19, "step": 630 }, { "epoch": 0.16, "learning_rate": 0.00018894890006553237, "loss": 2.1298, "step": 640 }, { "epoch": 0.17, "learning_rate": 0.00018857463478674552, "loss": 2.1882, "step": 650 }, { "epoch": 0.17, "learning_rate": 0.00018819452048903561, "loss": 2.1378, "step": 660 }, { "epoch": 0.17, "learning_rate": 0.00018780858227321988, "loss": 2.1886, "step": 670 }, { "epoch": 0.17, "learning_rate": 0.0001874168456246975, "loss": 2.1542, "step": 680 }, { "epoch": 0.18, "learning_rate": 0.00018701933641176676, "loss": 2.1299, "step": 690 }, { "epoch": 0.18, "learning_rate": 0.00018661608088391671, "loss": 2.164, "step": 700 }, { "epoch": 0.18, "learning_rate": 0.0001862071056700939, "loss": 2.1497, "step": 710 }, { "epoch": 0.18, "learning_rate": 0.00018579243777694387, "loss": 2.1869, "step": 720 }, { "epoch": 0.19, "learning_rate": 0.00018537210458702773, "loss": 2.1317, "step": 730 }, { "epoch": 0.19, "learning_rate": 0.00018494613385701408, "loss": 2.138, "step": 740 }, { "epoch": 0.19, "learning_rate": 0.00018451455371584603, "loss": 2.1337, "step": 750 }, { "epoch": 0.19, "learning_rate": 0.00018407739266288365, "loss": 2.1062, "step": 760 }, { "epoch": 0.2, "learning_rate": 0.00018363467956602206, "loss": 2.0968, "step": 770 }, { "epoch": 0.2, "learning_rate": 0.0001831864436597853, "loss": 2.095, "step": 780 }, { "epoch": 0.2, "learning_rate": 0.00018273271454339552, "loss": 2.1348, "step": 790 }, { "epoch": 0.2, "learning_rate": 0.0001822735221788186, "loss": 2.0977, "step": 800 }, { "epoch": 0.21, "learning_rate": 0.0001818088968887857, "loss": 2.1029, "step": 810 }, { "epoch": 0.21, "learning_rate": 0.00018133886935479057, "loss": 2.1493, "step": 820 }, { "epoch": 0.21, "learning_rate": 0.0001808634706150639, "loss": 2.088, "step": 830 }, { "epoch": 0.22, "learning_rate": 0.0001803827320625234, "loss": 2.1212, "step": 840 }, { "epoch": 0.22, "learning_rate": 0.00017989668544270097, "loss": 2.0288, "step": 850 }, { "epoch": 0.22, "learning_rate": 0.0001794053628516462, "loss": 2.0432, "step": 860 }, { "epoch": 0.22, "learning_rate": 0.00017890879673380719, "loss": 2.0355, "step": 870 }, { "epoch": 0.23, "learning_rate": 0.00017840701987988772, "loss": 2.0755, "step": 880 }, { "epoch": 0.23, "learning_rate": 0.0001779000654246823, "loss": 2.0453, "step": 890 }, { "epoch": 0.23, "learning_rate": 0.00017738796684488772, "loss": 2.0582, "step": 900 }, { "epoch": 0.23, "learning_rate": 0.00017687075795689278, "loss": 2.0768, "step": 910 }, { "epoch": 0.24, "learning_rate": 0.00017634847291454503, "loss": 2.091, "step": 920 }, { "epoch": 0.24, "learning_rate": 0.0001758211462068955, "loss": 2.0577, "step": 930 }, { "epoch": 0.24, "learning_rate": 0.00017528881265592108, "loss": 2.0704, "step": 940 }, { "epoch": 0.24, "learning_rate": 0.00017475150741422528, "loss": 2.0305, "step": 950 }, { "epoch": 0.25, "learning_rate": 0.0001742092659627167, "loss": 2.0256, "step": 960 }, { "epoch": 0.25, "learning_rate": 0.0001736621241082663, "loss": 2.0357, "step": 970 }, { "epoch": 0.25, "learning_rate": 0.00017311011798134263, "loss": 1.9873, "step": 980 }, { "epoch": 0.25, "learning_rate": 0.00017255328403362606, "loss": 2.0248, "step": 990 }, { "epoch": 0.26, "learning_rate": 0.00017199165903560192, "loss": 1.9927, "step": 1000 }, { "epoch": 0.26, "eval_loss": 1.7968560457229614, "eval_runtime": 2.032, "eval_samples_per_second": 49.213, "eval_steps_per_second": 1.969, "step": 1000 }, { "epoch": 0.26, "learning_rate": 0.00017142528007413192, "loss": 1.9916, "step": 1010 }, { "epoch": 0.26, "learning_rate": 0.00017085418455000553, "loss": 2.0123, "step": 1020 }, { "epoch": 0.26, "learning_rate": 0.00017027841017546998, "loss": 2.0141, "step": 1030 }, { "epoch": 0.27, "learning_rate": 0.00016969799497174005, "loss": 1.976, "step": 1040 }, { "epoch": 0.27, "learning_rate": 0.0001691129772664873, "loss": 1.9943, "step": 1050 }, { "epoch": 0.27, "learning_rate": 0.00016852339569130905, "loss": 1.9607, "step": 1060 }, { "epoch": 0.27, "learning_rate": 0.00016792928917917755, "loss": 1.9793, "step": 1070 }, { "epoch": 0.28, "learning_rate": 0.00016733069696186868, "loss": 1.988, "step": 1080 }, { "epoch": 0.28, "learning_rate": 0.00016672765856737178, "loss": 1.9143, "step": 1090 }, { "epoch": 0.28, "learning_rate": 0.00016612021381727887, "loss": 1.9971, "step": 1100 }, { "epoch": 0.28, "learning_rate": 0.0001655084028241555, "loss": 1.9675, "step": 1110 }, { "epoch": 0.29, "learning_rate": 0.0001648922659888916, "loss": 2.0046, "step": 1120 }, { "epoch": 0.29, "learning_rate": 0.00016427184399803383, "loss": 2.018, "step": 1130 }, { "epoch": 0.29, "learning_rate": 0.0001636471778210988, "loss": 1.999, "step": 1140 }, { "epoch": 0.29, "learning_rate": 0.00016301830870786742, "loss": 1.9143, "step": 1150 }, { "epoch": 0.3, "learning_rate": 0.00016238527818566138, "loss": 1.9324, "step": 1160 }, { "epoch": 0.3, "learning_rate": 0.0001617481280566005, "loss": 1.9493, "step": 1170 }, { "epoch": 0.3, "learning_rate": 0.00016110690039484267, "loss": 1.9507, "step": 1180 }, { "epoch": 0.3, "learning_rate": 0.00016046163754380514, "loss": 1.9408, "step": 1190 }, { "epoch": 0.31, "learning_rate": 0.00015981238211336873, "loss": 2.0009, "step": 1200 }, { "epoch": 0.31, "learning_rate": 0.00015915917697706386, "loss": 1.9684, "step": 1210 }, { "epoch": 0.31, "learning_rate": 0.0001585020652692394, "loss": 1.9373, "step": 1220 }, { "epoch": 0.31, "learning_rate": 0.0001578410903822145, "loss": 1.9038, "step": 1230 }, { "epoch": 0.32, "learning_rate": 0.00015717629596341288, "loss": 1.9065, "step": 1240 }, { "epoch": 0.32, "learning_rate": 0.00015650772591248085, "loss": 1.9327, "step": 1250 }, { "epoch": 0.32, "learning_rate": 0.0001558354243783882, "loss": 1.935, "step": 1260 }, { "epoch": 0.33, "learning_rate": 0.000155159435756513, "loss": 1.9339, "step": 1270 }, { "epoch": 0.33, "learning_rate": 0.00015447980468570979, "loss": 1.929, "step": 1280 }, { "epoch": 0.33, "learning_rate": 0.00015379657604536203, "loss": 1.9184, "step": 1290 }, { "epoch": 0.33, "learning_rate": 0.00015310979495241825, "loss": 1.9242, "step": 1300 }, { "epoch": 0.34, "learning_rate": 0.00015241950675841306, "loss": 1.9133, "step": 1310 }, { "epoch": 0.34, "learning_rate": 0.0001517257570464721, "loss": 1.9014, "step": 1320 }, { "epoch": 0.34, "learning_rate": 0.00015102859162830209, "loss": 1.9283, "step": 1330 }, { "epoch": 0.34, "learning_rate": 0.00015032805654116566, "loss": 1.8821, "step": 1340 }, { "epoch": 0.35, "learning_rate": 0.00014962419804484127, "loss": 1.8956, "step": 1350 }, { "epoch": 0.35, "learning_rate": 0.00014891706261856844, "loss": 1.9166, "step": 1360 }, { "epoch": 0.35, "learning_rate": 0.00014820669695797843, "loss": 1.9385, "step": 1370 }, { "epoch": 0.35, "learning_rate": 0.00014749314797201084, "loss": 1.9325, "step": 1380 }, { "epoch": 0.36, "learning_rate": 0.00014677646277981593, "loss": 1.8642, "step": 1390 }, { "epoch": 0.36, "learning_rate": 0.00014605668870764293, "loss": 1.8964, "step": 1400 }, { "epoch": 0.36, "learning_rate": 0.0001453338732857152, "loss": 1.8727, "step": 1410 }, { "epoch": 0.36, "learning_rate": 0.00014460806424509132, "loss": 1.8644, "step": 1420 }, { "epoch": 0.37, "learning_rate": 0.0001438793095145132, "loss": 1.8591, "step": 1430 }, { "epoch": 0.37, "learning_rate": 0.00014314765721724118, "loss": 1.8931, "step": 1440 }, { "epoch": 0.37, "learning_rate": 0.00014241315566787617, "loss": 1.8953, "step": 1450 }, { "epoch": 0.37, "learning_rate": 0.00014167585336916926, "loss": 1.8672, "step": 1460 }, { "epoch": 0.38, "learning_rate": 0.0001409357990088188, "loss": 1.8414, "step": 1470 }, { "epoch": 0.38, "learning_rate": 0.00014019304145625517, "loss": 1.8838, "step": 1480 }, { "epoch": 0.38, "learning_rate": 0.00013944762975941403, "loss": 1.856, "step": 1490 }, { "epoch": 0.38, "learning_rate": 0.00013877453061830693, "loss": 1.8715, "step": 1500 }, { "epoch": 0.39, "learning_rate": 0.00013802421179949775, "loss": 1.8323, "step": 1510 }, { "epoch": 0.39, "learning_rate": 0.00013727138205490392, "loss": 1.898, "step": 1520 }, { "epoch": 0.39, "learning_rate": 0.00013651609109757744, "loss": 1.8455, "step": 1530 }, { "epoch": 0.39, "learning_rate": 0.00013575838880309623, "loss": 1.8788, "step": 1540 }, { "epoch": 0.4, "learning_rate": 0.00013499832520627076, "loss": 1.8881, "step": 1550 }, { "epoch": 0.4, "learning_rate": 0.00013423595049783974, "loss": 1.8326, "step": 1560 }, { "epoch": 0.4, "learning_rate": 0.00013347131502115616, "loss": 1.845, "step": 1570 }, { "epoch": 0.4, "learning_rate": 0.00013270446926886252, "loss": 1.8768, "step": 1580 }, { "epoch": 0.41, "learning_rate": 0.00013193546387955672, "loss": 1.8571, "step": 1590 }, { "epoch": 0.41, "learning_rate": 0.00013116434963444815, "loss": 1.8596, "step": 1600 }, { "epoch": 0.41, "learning_rate": 0.00013039117745400426, "loss": 1.8515, "step": 1610 }, { "epoch": 0.41, "learning_rate": 0.00012961599839458825, "loss": 1.8281, "step": 1620 }, { "epoch": 0.42, "learning_rate": 0.00012883886364508718, "loss": 1.7872, "step": 1630 }, { "epoch": 0.42, "learning_rate": 0.00012805982452353213, "loss": 1.8333, "step": 1640 }, { "epoch": 0.42, "learning_rate": 0.00012727893247370918, "loss": 1.7989, "step": 1650 }, { "epoch": 0.42, "learning_rate": 0.00012657458799214414, "loss": 1.8662, "step": 1660 }, { "epoch": 0.43, "learning_rate": 0.00012579031754172398, "loss": 1.8667, "step": 1670 }, { "epoch": 0.43, "learning_rate": 0.0001250043440297479, "loss": 1.821, "step": 1680 }, { "epoch": 0.43, "learning_rate": 0.0001242167193579139, "loss": 1.8333, "step": 1690 }, { "epoch": 0.44, "learning_rate": 0.00012342749553695423, "loss": 1.8554, "step": 1700 }, { "epoch": 0.44, "learning_rate": 0.0001226367246832007, "loss": 1.8308, "step": 1710 }, { "epoch": 0.44, "learning_rate": 0.00012184445901514343, "loss": 1.8215, "step": 1720 }, { "epoch": 0.44, "learning_rate": 0.00012105075084998242, "loss": 1.8347, "step": 1730 }, { "epoch": 0.45, "learning_rate": 0.00012025565260017291, "loss": 1.7671, "step": 1740 }, { "epoch": 0.45, "learning_rate": 0.00011945921676996417, "loss": 1.8035, "step": 1750 }, { "epoch": 0.45, "learning_rate": 0.00011866149595193254, "loss": 1.8008, "step": 1760 }, { "epoch": 0.45, "learning_rate": 0.0001178625428235085, "loss": 1.8057, "step": 1770 }, { "epoch": 0.46, "learning_rate": 0.00011706241014349788, "loss": 1.8286, "step": 1780 }, { "epoch": 0.46, "learning_rate": 0.00011626115074859829, "loss": 1.7838, "step": 1790 }, { "epoch": 0.46, "learning_rate": 0.00011545881754990972, "loss": 1.7678, "step": 1800 }, { "epoch": 0.46, "learning_rate": 0.00011465546352944083, "loss": 1.8015, "step": 1810 }, { "epoch": 0.47, "learning_rate": 0.00011385114173661003, "loss": 1.773, "step": 1820 }, { "epoch": 0.47, "learning_rate": 0.00011304590528474257, "loss": 1.7528, "step": 1830 }, { "epoch": 0.47, "learning_rate": 0.00011223980734756319, "loss": 1.7651, "step": 1840 }, { "epoch": 0.47, "learning_rate": 0.00011143290115568473, "loss": 1.7817, "step": 1850 }, { "epoch": 0.48, "learning_rate": 0.00011062523999309291, "loss": 1.7699, "step": 1860 }, { "epoch": 0.48, "learning_rate": 0.00010981687719362807, "loss": 1.7672, "step": 1870 }, { "epoch": 0.48, "learning_rate": 0.00010900786613746299, "loss": 1.789, "step": 1880 }, { "epoch": 0.48, "learning_rate": 0.00010819826024757807, "loss": 1.7622, "step": 1890 }, { "epoch": 0.49, "learning_rate": 0.00010738811298623348, "loss": 1.7543, "step": 1900 }, { "epoch": 0.49, "learning_rate": 0.00010657747785143882, "loss": 1.7432, "step": 1910 }, { "epoch": 0.49, "learning_rate": 0.00010576640837342036, "loss": 1.7765, "step": 1920 }, { "epoch": 0.49, "learning_rate": 0.00010495495811108622, "loss": 1.768, "step": 1930 }, { "epoch": 0.5, "learning_rate": 0.00010414318064848956, "loss": 1.7852, "step": 1940 }, { "epoch": 0.5, "learning_rate": 0.0001033311295912902, "loss": 1.7551, "step": 1950 }, { "epoch": 0.5, "learning_rate": 0.0001025188585632147, "loss": 1.7474, "step": 1960 }, { "epoch": 0.5, "learning_rate": 0.00010186891940623151, "loss": 1.7737, "step": 1970 }, { "epoch": 0.51, "learning_rate": 0.00010105638760647513, "loss": 1.7802, "step": 1980 }, { "epoch": 0.51, "learning_rate": 0.00010024378604824765, "loss": 1.7723, "step": 1990 }, { "epoch": 0.51, "learning_rate": 9.943116839162797e-05, "loss": 1.7412, "step": 2000 }, { "epoch": 0.51, "eval_loss": 1.5315768718719482, "eval_runtime": 2.0299, "eval_samples_per_second": 49.264, "eval_steps_per_second": 1.971, "step": 2000 }, { "epoch": 0.51, "learning_rate": 9.869984308751394e-05, "loss": 1.7468, "step": 2010 }, { "epoch": 0.52, "learning_rate": 9.788734267841828e-05, "loss": 1.7681, "step": 2020 }, { "epoch": 0.52, "learning_rate": 9.707498177847988e-05, "loss": 1.8109, "step": 2030 }, { "epoch": 0.52, "learning_rate": 9.626281403188578e-05, "loss": 1.72, "step": 2040 }, { "epoch": 0.52, "learning_rate": 9.545089307006811e-05, "loss": 1.703, "step": 2050 }, { "epoch": 0.53, "learning_rate": 9.463927250816272e-05, "loss": 1.7624, "step": 2060 }, { "epoch": 0.53, "learning_rate": 9.382800594146841e-05, "loss": 1.7587, "step": 2070 }, { "epoch": 0.53, "learning_rate": 9.301714694190808e-05, "loss": 1.7375, "step": 2080 }, { "epoch": 0.54, "learning_rate": 9.220674905449091e-05, "loss": 1.7579, "step": 2090 }, { "epoch": 0.54, "learning_rate": 9.139686579377649e-05, "loss": 1.7396, "step": 2100 }, { "epoch": 0.54, "learning_rate": 9.058755064034127e-05, "loss": 1.6666, "step": 2110 }, { "epoch": 0.54, "learning_rate": 8.977885703724658e-05, "loss": 1.7319, "step": 2120 }, { "epoch": 0.55, "learning_rate": 8.897083838650984e-05, "loss": 1.7387, "step": 2130 }, { "epoch": 0.55, "learning_rate": 8.816354804557807e-05, "loss": 1.7204, "step": 2140 }, { "epoch": 0.55, "learning_rate": 8.743765350485347e-05, "loss": 1.7183, "step": 2150 }, { "epoch": 0.55, "learning_rate": 8.671243090320367e-05, "loss": 1.7173, "step": 2160 }, { "epoch": 0.56, "learning_rate": 8.590746326848647e-05, "loss": 1.7185, "step": 2170 }, { "epoch": 0.56, "learning_rate": 8.510342623330503e-05, "loss": 1.7228, "step": 2180 }, { "epoch": 0.56, "learning_rate": 8.430037289218072e-05, "loss": 1.7542, "step": 2190 }, { "epoch": 0.56, "learning_rate": 8.349835627467664e-05, "loss": 1.7005, "step": 2200 }, { "epoch": 0.57, "learning_rate": 8.269742934189604e-05, "loss": 1.7211, "step": 2210 }, { "epoch": 0.57, "learning_rate": 8.189764498298483e-05, "loss": 1.7455, "step": 2220 }, { "epoch": 0.57, "learning_rate": 8.109905601163912e-05, "loss": 1.6729, "step": 2230 }, { "epoch": 0.57, "learning_rate": 8.030171516261782e-05, "loss": 1.7341, "step": 2240 }, { "epoch": 0.58, "learning_rate": 7.950567508826012e-05, "loss": 1.7286, "step": 2250 }, { "epoch": 0.58, "learning_rate": 7.871098835500859e-05, "loss": 1.7079, "step": 2260 }, { "epoch": 0.58, "learning_rate": 7.791770743993817e-05, "loss": 1.7001, "step": 2270 }, { "epoch": 0.58, "learning_rate": 7.712588472729058e-05, "loss": 1.7239, "step": 2280 }, { "epoch": 0.59, "learning_rate": 7.633557250501531e-05, "loss": 1.7032, "step": 2290 }, { "epoch": 0.59, "learning_rate": 7.55468229613168e-05, "loss": 1.6624, "step": 2300 }, { "epoch": 0.59, "learning_rate": 7.475968818120798e-05, "loss": 1.7258, "step": 2310 }, { "epoch": 0.59, "learning_rate": 7.405269046437083e-05, "loss": 1.6995, "step": 2320 }, { "epoch": 0.6, "learning_rate": 7.342541200785587e-05, "loss": 1.6715, "step": 2330 }, { "epoch": 0.6, "learning_rate": 7.26428964082281e-05, "loss": 1.7005, "step": 2340 }, { "epoch": 0.6, "learning_rate": 7.186218733274769e-05, "loss": 1.6575, "step": 2350 }, { "epoch": 0.6, "learning_rate": 7.1083336335476e-05, "loss": 1.7001, "step": 2360 }, { "epoch": 0.61, "learning_rate": 7.030639484777641e-05, "loss": 1.6679, "step": 2370 }, { "epoch": 0.61, "learning_rate": 6.953141417491781e-05, "loss": 1.7034, "step": 2380 }, { "epoch": 0.61, "learning_rate": 6.875844549268706e-05, "loss": 1.6804, "step": 2390 }, { "epoch": 0.61, "learning_rate": 6.798753984400916e-05, "loss": 1.6844, "step": 2400 }, { "epoch": 0.62, "learning_rate": 6.721874813557699e-05, "loss": 1.7038, "step": 2410 }, { "epoch": 0.62, "learning_rate": 6.645212113448953e-05, "loss": 1.6728, "step": 2420 }, { "epoch": 0.62, "learning_rate": 6.568770946489948e-05, "loss": 1.682, "step": 2430 }, { "epoch": 0.62, "learning_rate": 6.492556360467025e-05, "loss": 1.6799, "step": 2440 }, { "epoch": 0.63, "learning_rate": 6.416573388204282e-05, "loss": 1.66, "step": 2450 }, { "epoch": 0.63, "learning_rate": 6.340827047231211e-05, "loss": 1.6806, "step": 2460 }, { "epoch": 0.63, "learning_rate": 6.265322339451376e-05, "loss": 1.6661, "step": 2470 }, { "epoch": 0.63, "learning_rate": 6.190064250812124e-05, "loss": 1.6696, "step": 2480 }, { "epoch": 0.64, "learning_rate": 6.115057750975312e-05, "loss": 1.6153, "step": 2490 }, { "epoch": 0.64, "learning_rate": 6.040307792989157e-05, "loss": 1.6824, "step": 2500 }, { "epoch": 0.64, "learning_rate": 5.9658193129611604e-05, "loss": 1.6886, "step": 2510 }, { "epoch": 0.65, "learning_rate": 5.891597229732135e-05, "loss": 1.6358, "step": 2520 }, { "epoch": 0.65, "learning_rate": 5.8176464445514166e-05, "loss": 1.6462, "step": 2530 }, { "epoch": 0.65, "learning_rate": 5.7439718407531906e-05, "loss": 1.6434, "step": 2540 }, { "epoch": 0.65, "learning_rate": 5.670578283434016e-05, "loss": 1.6459, "step": 2550 }, { "epoch": 0.66, "learning_rate": 5.5974706191315884e-05, "loss": 1.6705, "step": 2560 }, { "epoch": 0.66, "learning_rate": 5.5246536755046706e-05, "loss": 1.6638, "step": 2570 }, { "epoch": 0.66, "learning_rate": 5.452132261014304e-05, "loss": 1.6656, "step": 2580 }, { "epoch": 0.66, "learning_rate": 5.379911164606304e-05, "loss": 1.6572, "step": 2590 }, { "epoch": 0.67, "learning_rate": 5.315172891887351e-05, "loss": 1.643, "step": 2600 }, { "epoch": 0.67, "learning_rate": 5.2435355221012797e-05, "loss": 1.6544, "step": 2610 }, { "epoch": 0.67, "learning_rate": 5.172212245066537e-05, "loss": 1.628, "step": 2620 }, { "epoch": 0.67, "learning_rate": 5.1012077706100125e-05, "loss": 1.6378, "step": 2630 }, { "epoch": 0.68, "learning_rate": 5.0305267875065087e-05, "loss": 1.6475, "step": 2640 }, { "epoch": 0.68, "learning_rate": 4.9601739631690836e-05, "loss": 1.5959, "step": 2650 }, { "epoch": 0.68, "learning_rate": 4.897140837169796e-05, "loss": 1.657, "step": 2660 }, { "epoch": 0.68, "learning_rate": 4.827424295352793e-05, "loss": 1.6716, "step": 2670 }, { "epoch": 0.69, "learning_rate": 4.758049324158693e-05, "loss": 1.59, "step": 2680 }, { "epoch": 0.69, "learning_rate": 4.6890205047581745e-05, "loss": 1.6442, "step": 2690 }, { "epoch": 0.69, "learning_rate": 4.6203423954637995e-05, "loss": 1.6152, "step": 2700 }, { "epoch": 0.69, "learning_rate": 4.552019531429019e-05, "loss": 1.6446, "step": 2710 }, { "epoch": 0.7, "learning_rate": 4.484056424348703e-05, "loss": 1.6216, "step": 2720 }, { "epoch": 0.7, "learning_rate": 4.416457562161184e-05, "loss": 1.6534, "step": 2730 }, { "epoch": 0.7, "learning_rate": 4.349227408751919e-05, "loss": 1.6474, "step": 2740 }, { "epoch": 0.7, "learning_rate": 4.282370403658717e-05, "loss": 1.6338, "step": 2750 }, { "epoch": 0.71, "learning_rate": 4.2158909617785525e-05, "loss": 1.6473, "step": 2760 }, { "epoch": 0.71, "learning_rate": 4.149793473076058e-05, "loss": 1.6315, "step": 2770 }, { "epoch": 0.71, "learning_rate": 4.084082302293617e-05, "loss": 1.6516, "step": 2780 }, { "epoch": 0.71, "learning_rate": 4.018761788663127e-05, "loss": 1.6112, "step": 2790 }, { "epoch": 0.72, "learning_rate": 3.953836245619488e-05, "loss": 1.6077, "step": 2800 }, { "epoch": 0.72, "learning_rate": 3.889309960515738e-05, "loss": 1.6182, "step": 2810 }, { "epoch": 0.72, "learning_rate": 3.82518719433995e-05, "loss": 1.6072, "step": 2820 }, { "epoch": 0.72, "learning_rate": 3.761472181433865e-05, "loss": 1.6062, "step": 2830 }, { "epoch": 0.73, "learning_rate": 3.6981691292132604e-05, "loss": 1.6332, "step": 2840 }, { "epoch": 0.73, "learning_rate": 3.6352822178901235e-05, "loss": 1.6393, "step": 2850 }, { "epoch": 0.73, "learning_rate": 3.5728156001966154e-05, "loss": 1.6401, "step": 2860 }, { "epoch": 0.73, "learning_rate": 3.5169584051980575e-05, "loss": 1.6234, "step": 2870 }, { "epoch": 0.74, "learning_rate": 3.461447977339909e-05, "loss": 1.5814, "step": 2880 }, { "epoch": 0.74, "learning_rate": 3.4001800370596834e-05, "loss": 1.6018, "step": 2890 }, { "epoch": 0.74, "learning_rate": 3.339347915362796e-05, "loss": 1.6172, "step": 2900 }, { "epoch": 0.74, "learning_rate": 3.278955629293534e-05, "loss": 1.6042, "step": 2910 }, { "epoch": 0.75, "learning_rate": 3.219007166851673e-05, "loss": 1.6119, "step": 2920 }, { "epoch": 0.75, "learning_rate": 3.1595064867291394e-05, "loss": 1.621, "step": 2930 }, { "epoch": 0.75, "learning_rate": 3.1004575180485885e-05, "loss": 1.6046, "step": 2940 }, { "epoch": 0.76, "learning_rate": 3.0418641601039366e-05, "loss": 1.5811, "step": 2950 }, { "epoch": 0.76, "learning_rate": 2.9837302821028956e-05, "loss": 1.5635, "step": 2960 }, { "epoch": 0.76, "learning_rate": 2.926059722911447e-05, "loss": 1.6193, "step": 2970 }, { "epoch": 0.76, "learning_rate": 2.86885629080035e-05, "loss": 1.6067, "step": 2980 }, { "epoch": 0.77, "learning_rate": 2.823432416081132e-05, "loss": 1.5795, "step": 2990 }, { "epoch": 0.77, "learning_rate": 2.7670793109350358e-05, "loss": 1.5891, "step": 3000 }, { "epoch": 0.77, "eval_loss": 1.3908636569976807, "eval_runtime": 2.0291, "eval_samples_per_second": 49.282, "eval_steps_per_second": 1.971, "step": 3000 }, { "epoch": 0.77, "learning_rate": 2.7112038311280828e-05, "loss": 1.599, "step": 3010 }, { "epoch": 0.77, "learning_rate": 2.655809666393112e-05, "loss": 1.5877, "step": 3020 }, { "epoch": 0.78, "learning_rate": 2.600900474679364e-05, "loss": 1.6096, "step": 3030 }, { "epoch": 0.78, "learning_rate": 2.546479881910918e-05, "loss": 1.6317, "step": 3040 }, { "epoch": 0.78, "learning_rate": 2.4925514817472618e-05, "loss": 1.5218, "step": 3050 }, { "epoch": 0.78, "learning_rate": 2.4391188353459925e-05, "loss": 1.5526, "step": 3060 }, { "epoch": 0.79, "learning_rate": 2.3861854711276378e-05, "loss": 1.5753, "step": 3070 }, { "epoch": 0.79, "learning_rate": 2.333754884542667e-05, "loss": 1.6214, "step": 3080 }, { "epoch": 0.79, "learning_rate": 2.281830537840678e-05, "loss": 1.591, "step": 3090 }, { "epoch": 0.79, "learning_rate": 2.2355342955230186e-05, "loss": 1.5578, "step": 3100 }, { "epoch": 0.8, "learning_rate": 2.189653361595686e-05, "loss": 1.5684, "step": 3110 }, { "epoch": 0.8, "learning_rate": 2.1391646203159456e-05, "loss": 1.5654, "step": 3120 }, { "epoch": 0.8, "learning_rate": 2.089194968671713e-05, "loss": 1.5803, "step": 3130 }, { "epoch": 0.8, "learning_rate": 2.039747706404943e-05, "loss": 1.5737, "step": 3140 }, { "epoch": 0.81, "learning_rate": 1.99082609876164e-05, "loss": 1.5444, "step": 3150 }, { "epoch": 0.81, "learning_rate": 1.9472487573431274e-05, "loss": 1.5995, "step": 3160 }, { "epoch": 0.81, "learning_rate": 1.8993347647457706e-05, "loss": 1.5803, "step": 3170 }, { "epoch": 0.81, "learning_rate": 1.8519556989292508e-05, "loss": 1.5892, "step": 3180 }, { "epoch": 0.82, "learning_rate": 1.8051146885663938e-05, "loss": 1.6006, "step": 3190 }, { "epoch": 0.82, "learning_rate": 1.7588148267995695e-05, "loss": 1.567, "step": 3200 }, { "epoch": 0.82, "learning_rate": 1.7130591710364486e-05, "loss": 1.5557, "step": 3210 }, { "epoch": 0.82, "learning_rate": 1.6678507427480983e-05, "loss": 1.5794, "step": 3220 }, { "epoch": 0.83, "learning_rate": 1.6231925272694615e-05, "loss": 1.5858, "step": 3230 }, { "epoch": 0.83, "learning_rate": 1.5790874736022287e-05, "loss": 1.5791, "step": 3240 }, { "epoch": 0.83, "learning_rate": 1.535538494220089e-05, "loss": 1.5721, "step": 3250 }, { "epoch": 0.83, "learning_rate": 1.4925484648764131e-05, "loss": 1.5537, "step": 3260 }, { "epoch": 0.84, "learning_rate": 1.450120224414352e-05, "loss": 1.5698, "step": 3270 }, { "epoch": 0.84, "learning_rate": 1.4082565745793686e-05, "loss": 1.5529, "step": 3280 }, { "epoch": 0.84, "learning_rate": 1.3669602798342296e-05, "loss": 1.5702, "step": 3290 }, { "epoch": 0.84, "learning_rate": 1.3262340671764584e-05, "loss": 1.5273, "step": 3300 }, { "epoch": 0.85, "learning_rate": 1.2860806259582492e-05, "loss": 1.5401, "step": 3310 }, { "epoch": 0.85, "learning_rate": 1.2504344407159785e-05, "loss": 1.5753, "step": 3320 }, { "epoch": 0.85, "learning_rate": 1.2113765387943211e-05, "loss": 1.5564, "step": 3330 }, { "epoch": 0.85, "learning_rate": 1.172898992919923e-05, "loss": 1.5189, "step": 3340 }, { "epoch": 0.86, "learning_rate": 1.1350043439544521e-05, "loss": 1.5607, "step": 3350 }, { "epoch": 0.86, "learning_rate": 1.0976950942680197e-05, "loss": 1.539, "step": 3360 }, { "epoch": 0.86, "learning_rate": 1.0609737075739412e-05, "loss": 1.5593, "step": 3370 }, { "epoch": 0.87, "learning_rate": 1.0248426087660557e-05, "loss": 1.5345, "step": 3380 }, { "epoch": 0.87, "learning_rate": 9.89304183758577e-06, "loss": 1.5988, "step": 3390 }, { "epoch": 0.87, "learning_rate": 9.543607793285626e-06, "loss": 1.5306, "step": 3400 }, { "epoch": 0.87, "learning_rate": 9.200147029609264e-06, "loss": 1.5702, "step": 3410 }, { "epoch": 0.88, "learning_rate": 8.896158250762244e-06, "loss": 1.5378, "step": 3420 }, { "epoch": 0.88, "learning_rate": 8.59704246528129e-06, "loss": 1.5693, "step": 3430 }, { "epoch": 0.88, "learning_rate": 8.270426282311539e-06, "loss": 1.5517, "step": 3440 }, { "epoch": 0.88, "learning_rate": 7.949867454404824e-06, "loss": 1.5576, "step": 3450 }, { "epoch": 0.89, "learning_rate": 7.635387149637685e-06, "loss": 1.5763, "step": 3460 }, { "epoch": 0.89, "learning_rate": 7.327006134691883e-06, "loss": 1.5768, "step": 3470 }, { "epoch": 0.89, "learning_rate": 7.024744773483105e-06, "loss": 1.5393, "step": 3480 }, { "epoch": 0.89, "learning_rate": 6.7286230258161385e-06, "loss": 1.5617, "step": 3490 }, { "epoch": 0.9, "learning_rate": 6.438660446066891e-06, "loss": 1.5404, "step": 3500 }, { "epoch": 0.9, "learning_rate": 6.154876181891145e-06, "loss": 1.5765, "step": 3510 }, { "epoch": 0.9, "learning_rate": 5.877288972960071e-06, "loss": 1.5942, "step": 3520 }, { "epoch": 0.9, "learning_rate": 5.632774125747675e-06, "loss": 1.5557, "step": 3530 }, { "epoch": 0.91, "learning_rate": 5.367011482971008e-06, "loss": 1.5438, "step": 3540 }, { "epoch": 0.91, "learning_rate": 5.107497922021364e-06, "loss": 1.5351, "step": 3550 }, { "epoch": 0.91, "learning_rate": 4.854250579856034e-06, "loss": 1.5304, "step": 3560 }, { "epoch": 0.91, "learning_rate": 4.6072861796429665e-06, "loss": 1.554, "step": 3570 }, { "epoch": 0.92, "learning_rate": 4.366621029656582e-06, "loss": 1.5185, "step": 3580 }, { "epoch": 0.92, "learning_rate": 4.1322710222008065e-06, "loss": 1.5746, "step": 3590 }, { "epoch": 0.92, "learning_rate": 3.904251632559652e-06, "loss": 1.5413, "step": 3600 }, { "epoch": 0.92, "learning_rate": 3.6825779179752716e-06, "loss": 1.5533, "step": 3610 }, { "epoch": 0.93, "learning_rate": 3.467264516653668e-06, "loss": 1.5432, "step": 3620 }, { "epoch": 0.93, "learning_rate": 3.2583256467980773e-06, "loss": 1.5869, "step": 3630 }, { "epoch": 0.93, "learning_rate": 3.055775105670056e-06, "loss": 1.5374, "step": 3640 }, { "epoch": 0.93, "learning_rate": 2.8596262686783837e-06, "loss": 1.5425, "step": 3650 }, { "epoch": 0.94, "learning_rate": 2.6698920884958177e-06, "loss": 1.5906, "step": 3660 }, { "epoch": 0.94, "learning_rate": 2.486585094203786e-06, "loss": 1.5787, "step": 3670 }, { "epoch": 0.94, "learning_rate": 2.309717390464983e-06, "loss": 1.5579, "step": 3680 }, { "epoch": 0.94, "learning_rate": 2.1393006567240635e-06, "loss": 1.5391, "step": 3690 }, { "epoch": 0.95, "learning_rate": 1.9753461464364408e-06, "loss": 1.5478, "step": 3700 }, { "epoch": 0.95, "learning_rate": 1.8178646863250548e-06, "loss": 1.5777, "step": 3710 }, { "epoch": 0.95, "learning_rate": 1.6668666756655572e-06, "loss": 1.5239, "step": 3720 }, { "epoch": 0.95, "learning_rate": 1.5365200653588708e-06, "loss": 1.4992, "step": 3730 }, { "epoch": 0.96, "learning_rate": 1.397867724769042e-06, "loss": 1.5272, "step": 3740 }, { "epoch": 0.96, "learning_rate": 1.2657265680968589e-06, "loss": 1.541, "step": 3750 }, { "epoch": 0.96, "learning_rate": 1.1523738102167225e-06, "loss": 1.5219, "step": 3760 }, { "epoch": 0.97, "learning_rate": 1.044308247886483e-06, "loss": 1.5524, "step": 3770 }, { "epoch": 0.97, "learning_rate": 9.30443453495422e-07, "loss": 1.5508, "step": 3780 }, { "epoch": 0.97, "learning_rate": 8.231207093463699e-07, "loss": 1.5758, "step": 3790 }, { "epoch": 0.97, "learning_rate": 7.223471024881412e-07, "loss": 1.5658, "step": 3800 }, { "epoch": 0.98, "learning_rate": 6.281292874978029e-07, "loss": 1.5232, "step": 3810 }, { "epoch": 0.98, "learning_rate": 5.404734860412375e-07, "loss": 1.5646, "step": 3820 }, { "epoch": 0.98, "learning_rate": 4.5938548646227154e-07, "loss": 1.5771, "step": 3830 }, { "epoch": 0.98, "learning_rate": 3.8487064340047006e-07, "loss": 1.5611, "step": 3840 }, { "epoch": 0.99, "learning_rate": 3.16933877437553e-07, "loss": 1.6229, "step": 3850 }, { "epoch": 0.99, "learning_rate": 2.555796747724104e-07, "loss": 1.5496, "step": 3860 }, { "epoch": 0.99, "learning_rate": 2.0081208692490638e-07, "loss": 1.5312, "step": 3870 }, { "epoch": 0.99, "learning_rate": 1.5263473046833732e-07, "loss": 1.5681, "step": 3880 }, { "epoch": 1.0, "learning_rate": 1.1105078679056747e-07, "loss": 1.5128, "step": 3890 }, { "epoch": 1.0, "learning_rate": 7.606300188400805e-08, "loss": 1.5764, "step": 3900 }, { "epoch": 1.0, "step": 3906, "total_flos": 331952415375360.0, "train_loss": 1.9416432221974707, "train_runtime": 74872.2082, "train_samples_per_second": 6.678, "train_steps_per_second": 0.052 } ], "max_steps": 3906, "num_train_epochs": 1, "total_flos": 331952415375360.0, "trial_name": null, "trial_params": null }