{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9996160245744274, "global_step": 11718, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0, "loss": 9.2897, "step": 1 }, { "epoch": 0.0, "learning_rate": 5.555555555555556e-06, "loss": 9.3838, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.2222222222222223e-05, "loss": 9.2904, "step": 10 }, { "epoch": 0.0, "learning_rate": 2.777777777777778e-05, "loss": 8.364, "step": 15 }, { "epoch": 0.01, "learning_rate": 5.555555555555556e-05, "loss": 7.4332, "step": 20 }, { "epoch": 0.01, "learning_rate": 7.222222222222222e-05, "loss": 6.7965, "step": 25 }, { "epoch": 0.01, "learning_rate": 0.0001, "loss": 6.134, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00012777777777777776, "loss": 5.6268, "step": 35 }, { "epoch": 0.01, "learning_rate": 0.00015555555555555556, "loss": 5.3261, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.00018333333333333334, "loss": 4.8834, "step": 45 }, { "epoch": 0.01, "learning_rate": 0.00019999998553576974, "loss": 4.638, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0001999998228132274, "loss": 4.3787, "step": 55 }, { "epoch": 0.02, "learning_rate": 0.00019999947928815007, "loss": 4.0214, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.00019999895496115885, "loss": 3.8979, "step": 65 }, { "epoch": 0.02, "learning_rate": 0.00019999824983320177, "loss": 3.7545, "step": 70 }, { "epoch": 0.02, "learning_rate": 0.00019999736390555368, "loss": 3.6385, "step": 75 }, { "epoch": 0.02, "learning_rate": 0.0001999962971798164, "loss": 3.5271, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.00019999504965791858, "loss": 3.3175, "step": 85 }, { "epoch": 0.02, "learning_rate": 0.00019999362134211576, "loss": 3.3254, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.0001999920122349904, "loss": 3.231, "step": 95 }, { "epoch": 0.03, "learning_rate": 0.00019999022233945185, "loss": 3.1622, "step": 100 }, { "epoch": 0.03, "learning_rate": 0.0001999882516587362, "loss": 3.1442, "step": 105 }, { "epoch": 0.03, "learning_rate": 0.00019998610019640654, "loss": 3.0162, "step": 110 }, { "epoch": 0.03, "learning_rate": 0.00019998376795635282, "loss": 3.0849, "step": 115 }, { "epoch": 0.03, "learning_rate": 0.00019998125494279174, "loss": 3.0558, "step": 120 }, { "epoch": 0.03, "learning_rate": 0.00019997856116026692, "loss": 2.9272, "step": 125 }, { "epoch": 0.03, "learning_rate": 0.00019997568661364877, "loss": 3.0184, "step": 130 }, { "epoch": 0.03, "learning_rate": 0.0001999726313081346, "loss": 2.9134, "step": 135 }, { "epoch": 0.04, "learning_rate": 0.00019996939524924846, "loss": 2.9225, "step": 140 }, { "epoch": 0.04, "learning_rate": 0.00019996597844284123, "loss": 2.8925, "step": 145 }, { "epoch": 0.04, "learning_rate": 0.0001999623808950906, "loss": 2.8565, "step": 150 }, { "epoch": 0.04, "learning_rate": 0.00019995860261250108, "loss": 2.8153, "step": 155 }, { "epoch": 0.04, "learning_rate": 0.00019995464360190386, "loss": 2.8274, "step": 160 }, { "epoch": 0.04, "learning_rate": 0.00019995050387045695, "loss": 2.6365, "step": 165 }, { "epoch": 0.04, "learning_rate": 0.00019994618342564512, "loss": 2.8168, "step": 170 }, { "epoch": 0.04, "learning_rate": 0.00019994168227527984, "loss": 2.7378, "step": 175 }, { "epoch": 0.05, "learning_rate": 0.00019993700042749937, "loss": 2.7134, "step": 180 }, { "epoch": 0.05, "learning_rate": 0.00019993213789076854, "loss": 2.6976, "step": 185 }, { "epoch": 0.05, "learning_rate": 0.00019992709467387903, "loss": 2.6616, "step": 190 }, { "epoch": 0.05, "learning_rate": 0.0001999218707859491, "loss": 2.6653, "step": 195 }, { "epoch": 0.05, "learning_rate": 0.00019991646623642364, "loss": 2.6252, "step": 200 }, { "epoch": 0.05, "learning_rate": 0.0001999108810350743, "loss": 2.6404, "step": 205 }, { "epoch": 0.05, "learning_rate": 0.00019990511519199923, "loss": 2.7217, "step": 210 }, { "epoch": 0.06, "learning_rate": 0.00019989916871762325, "loss": 2.6164, "step": 215 }, { "epoch": 0.06, "learning_rate": 0.0001998930416226978, "loss": 2.6779, "step": 220 }, { "epoch": 0.06, "learning_rate": 0.0001998867339183008, "loss": 2.5657, "step": 225 }, { "epoch": 0.06, "learning_rate": 0.00019988024561583677, "loss": 2.5789, "step": 230 }, { "epoch": 0.06, "learning_rate": 0.00019987357672703672, "loss": 2.4826, "step": 235 }, { "epoch": 0.06, "learning_rate": 0.00019986672726395824, "loss": 2.4319, "step": 240 }, { "epoch": 0.06, "learning_rate": 0.00019985969723898534, "loss": 2.5734, "step": 245 }, { "epoch": 0.06, "learning_rate": 0.0001998524866648285, "loss": 2.5765, "step": 250 }, { "epoch": 0.07, "learning_rate": 0.00019984509555452461, "loss": 2.5343, "step": 255 }, { "epoch": 0.07, "learning_rate": 0.00019983752392143704, "loss": 2.5778, "step": 260 }, { "epoch": 0.07, "learning_rate": 0.00019982977177925552, "loss": 2.4845, "step": 265 }, { "epoch": 0.07, "learning_rate": 0.00019982183914199616, "loss": 2.5486, "step": 270 }, { "epoch": 0.07, "learning_rate": 0.00019981372602400138, "loss": 2.5074, "step": 275 }, { "epoch": 0.07, "learning_rate": 0.00019980543243993992, "loss": 2.5488, "step": 280 }, { "epoch": 0.07, "learning_rate": 0.00019979695840480682, "loss": 2.4273, "step": 285 }, { "epoch": 0.07, "learning_rate": 0.0001997883039339234, "loss": 2.4197, "step": 290 }, { "epoch": 0.08, "learning_rate": 0.00019977946904293714, "loss": 2.4427, "step": 295 }, { "epoch": 0.08, "learning_rate": 0.0001997704537478218, "loss": 2.4044, "step": 300 }, { "epoch": 0.08, "learning_rate": 0.00019976125806487734, "loss": 2.4811, "step": 305 }, { "epoch": 0.08, "learning_rate": 0.00019975188201072972, "loss": 2.4572, "step": 310 }, { "epoch": 0.08, "learning_rate": 0.00019974232560233122, "loss": 2.4498, "step": 315 }, { "epoch": 0.08, "learning_rate": 0.00019973258885696, "loss": 2.5056, "step": 320 }, { "epoch": 0.08, "learning_rate": 0.00019972267179222045, "loss": 2.4114, "step": 325 }, { "epoch": 0.08, "learning_rate": 0.00019971257442604287, "loss": 2.4181, "step": 330 }, { "epoch": 0.09, "learning_rate": 0.00019970229677668358, "loss": 2.3741, "step": 335 }, { "epoch": 0.09, "learning_rate": 0.0001996918388627249, "loss": 2.3721, "step": 340 }, { "epoch": 0.09, "learning_rate": 0.000199681200703075, "loss": 2.4324, "step": 345 }, { "epoch": 0.09, "learning_rate": 0.00019967038231696803, "loss": 2.3941, "step": 350 }, { "epoch": 0.09, "learning_rate": 0.00019965938372396385, "loss": 2.4113, "step": 355 }, { "epoch": 0.09, "learning_rate": 0.0001996482049439483, "loss": 2.4253, "step": 360 }, { "epoch": 0.09, "learning_rate": 0.00019963684599713291, "loss": 2.3272, "step": 365 }, { "epoch": 0.09, "learning_rate": 0.000199625306904055, "loss": 2.4973, "step": 370 }, { "epoch": 0.1, "learning_rate": 0.00019961358768557757, "loss": 2.2946, "step": 375 }, { "epoch": 0.1, "learning_rate": 0.00019960168836288927, "loss": 2.4072, "step": 380 }, { "epoch": 0.1, "learning_rate": 0.00019958960895750445, "loss": 2.393, "step": 385 }, { "epoch": 0.1, "learning_rate": 0.00019957734949126304, "loss": 2.3143, "step": 390 }, { "epoch": 0.1, "learning_rate": 0.0001995649099863304, "loss": 2.382, "step": 395 }, { "epoch": 0.1, "learning_rate": 0.0001995522904651977, "loss": 2.3571, "step": 400 }, { "epoch": 0.1, "learning_rate": 0.0001995394909506812, "loss": 2.2845, "step": 405 }, { "epoch": 0.1, "learning_rate": 0.00019952651146592286, "loss": 2.3201, "step": 410 }, { "epoch": 0.11, "learning_rate": 0.00019951335203439001, "loss": 2.3537, "step": 415 }, { "epoch": 0.11, "learning_rate": 0.00019950001267987524, "loss": 2.3467, "step": 420 }, { "epoch": 0.11, "learning_rate": 0.00019948649342649646, "loss": 2.369, "step": 425 }, { "epoch": 0.11, "learning_rate": 0.00019947279429869687, "loss": 2.342, "step": 430 }, { "epoch": 0.11, "learning_rate": 0.00019945891532124495, "loss": 2.3362, "step": 435 }, { "epoch": 0.11, "learning_rate": 0.00019944485651923418, "loss": 2.2557, "step": 440 }, { "epoch": 0.11, "learning_rate": 0.00019943061791808338, "loss": 2.2849, "step": 445 }, { "epoch": 0.12, "learning_rate": 0.00019941619954353627, "loss": 2.3154, "step": 450 }, { "epoch": 0.12, "learning_rate": 0.00019940160142166172, "loss": 2.2588, "step": 455 }, { "epoch": 0.12, "learning_rate": 0.00019938682357885357, "loss": 2.3105, "step": 460 }, { "epoch": 0.12, "learning_rate": 0.00019937186604183053, "loss": 2.3508, "step": 465 }, { "epoch": 0.12, "learning_rate": 0.0001993567288376363, "loss": 2.2427, "step": 470 }, { "epoch": 0.12, "learning_rate": 0.00019934141199363933, "loss": 2.2947, "step": 475 }, { "epoch": 0.12, "learning_rate": 0.00019932591553753295, "loss": 2.2439, "step": 480 }, { "epoch": 0.12, "learning_rate": 0.00019931023949733517, "loss": 2.2582, "step": 485 }, { "epoch": 0.13, "learning_rate": 0.00019929438390138877, "loss": 2.2502, "step": 490 }, { "epoch": 0.13, "learning_rate": 0.00019927834877836106, "loss": 2.3103, "step": 495 }, { "epoch": 0.13, "learning_rate": 0.00019926213415724403, "loss": 2.2329, "step": 500 }, { "epoch": 0.13, "learning_rate": 0.00019924574006735418, "loss": 2.2965, "step": 505 }, { "epoch": 0.13, "learning_rate": 0.00019922916653833248, "loss": 2.3283, "step": 510 }, { "epoch": 0.13, "learning_rate": 0.0001992124136001443, "loss": 2.2531, "step": 515 }, { "epoch": 0.13, "learning_rate": 0.00019919548128307954, "loss": 2.3132, "step": 520 }, { "epoch": 0.13, "learning_rate": 0.00019917836961775223, "loss": 2.2994, "step": 525 }, { "epoch": 0.14, "learning_rate": 0.00019916107863510076, "loss": 2.3102, "step": 530 }, { "epoch": 0.14, "learning_rate": 0.00019914360836638774, "loss": 2.1879, "step": 535 }, { "epoch": 0.14, "learning_rate": 0.0001991259588431999, "loss": 2.2855, "step": 540 }, { "epoch": 0.14, "learning_rate": 0.0001991081300974481, "loss": 2.1905, "step": 545 }, { "epoch": 0.14, "learning_rate": 0.0001990901221613672, "loss": 2.2592, "step": 550 }, { "epoch": 0.14, "learning_rate": 0.00019907193506751606, "loss": 2.289, "step": 555 }, { "epoch": 0.14, "learning_rate": 0.00019905356884877747, "loss": 2.2288, "step": 560 }, { "epoch": 0.14, "learning_rate": 0.00019903502353835813, "loss": 2.2417, "step": 565 }, { "epoch": 0.15, "learning_rate": 0.0001990162991697884, "loss": 2.2646, "step": 570 }, { "epoch": 0.15, "learning_rate": 0.00019899739577692254, "loss": 2.2407, "step": 575 }, { "epoch": 0.15, "learning_rate": 0.00019897831339393842, "loss": 2.2366, "step": 580 }, { "epoch": 0.15, "learning_rate": 0.00019895905205533746, "loss": 2.1751, "step": 585 }, { "epoch": 0.15, "learning_rate": 0.00019893961179594482, "loss": 2.282, "step": 590 }, { "epoch": 0.15, "learning_rate": 0.00019891999265090896, "loss": 2.2579, "step": 595 }, { "epoch": 0.15, "learning_rate": 0.00019890019465570189, "loss": 2.2288, "step": 600 }, { "epoch": 0.15, "learning_rate": 0.00019888021784611893, "loss": 2.2074, "step": 605 }, { "epoch": 0.16, "learning_rate": 0.00019886006225827875, "loss": 2.1922, "step": 610 }, { "epoch": 0.16, "learning_rate": 0.00019883972792862322, "loss": 2.2828, "step": 615 }, { "epoch": 0.16, "learning_rate": 0.00019881921489391737, "loss": 2.265, "step": 620 }, { "epoch": 0.16, "learning_rate": 0.0001987985231912494, "loss": 2.1636, "step": 625 }, { "epoch": 0.16, "learning_rate": 0.00019877765285803044, "loss": 2.1897, "step": 630 }, { "epoch": 0.16, "learning_rate": 0.00019875660393199466, "loss": 2.1403, "step": 635 }, { "epoch": 0.16, "learning_rate": 0.00019873537645119914, "loss": 2.0977, "step": 640 }, { "epoch": 0.17, "learning_rate": 0.00019871397045402377, "loss": 2.2344, "step": 645 }, { "epoch": 0.17, "learning_rate": 0.00019869238597917122, "loss": 2.1271, "step": 650 }, { "epoch": 0.17, "learning_rate": 0.0001986706230656668, "loss": 2.1469, "step": 655 }, { "epoch": 0.17, "learning_rate": 0.00019864868175285846, "loss": 2.1112, "step": 660 }, { "epoch": 0.17, "learning_rate": 0.0001986265620804168, "loss": 2.1998, "step": 665 }, { "epoch": 0.17, "learning_rate": 0.00019860426408833476, "loss": 2.1744, "step": 670 }, { "epoch": 0.17, "learning_rate": 0.00019858178781692774, "loss": 2.1173, "step": 675 }, { "epoch": 0.17, "learning_rate": 0.00019855913330683354, "loss": 2.1755, "step": 680 }, { "epoch": 0.18, "learning_rate": 0.0001985363005990121, "loss": 2.1156, "step": 685 }, { "epoch": 0.18, "learning_rate": 0.00019851328973474562, "loss": 2.1461, "step": 690 }, { "epoch": 0.18, "learning_rate": 0.00019849010075563843, "loss": 2.1229, "step": 695 }, { "epoch": 0.18, "learning_rate": 0.00019846673370361683, "loss": 2.1973, "step": 700 }, { "epoch": 0.18, "learning_rate": 0.00019844318862092912, "loss": 2.1457, "step": 705 }, { "epoch": 0.18, "learning_rate": 0.00019841946555014552, "loss": 2.1617, "step": 710 }, { "epoch": 0.18, "learning_rate": 0.00019839556453415797, "loss": 2.1935, "step": 715 }, { "epoch": 0.18, "learning_rate": 0.00019837148561618025, "loss": 2.1807, "step": 720 }, { "epoch": 0.19, "learning_rate": 0.00019834722883974766, "loss": 2.1541, "step": 725 }, { "epoch": 0.19, "learning_rate": 0.00019832279424871719, "loss": 2.1056, "step": 730 }, { "epoch": 0.19, "learning_rate": 0.00019829818188726728, "loss": 2.1242, "step": 735 }, { "epoch": 0.19, "learning_rate": 0.00019827339179989773, "loss": 2.1558, "step": 740 }, { "epoch": 0.19, "learning_rate": 0.00019824842403142983, "loss": 2.1562, "step": 745 }, { "epoch": 0.19, "learning_rate": 0.0001982232786270059, "loss": 2.1095, "step": 750 }, { "epoch": 0.19, "learning_rate": 0.0001981979556320896, "loss": 2.091, "step": 755 }, { "epoch": 0.19, "learning_rate": 0.00019817245509246568, "loss": 2.1305, "step": 760 }, { "epoch": 0.2, "learning_rate": 0.00019814677705423973, "loss": 2.0903, "step": 765 }, { "epoch": 0.2, "learning_rate": 0.00019812092156383849, "loss": 2.1, "step": 770 }, { "epoch": 0.2, "learning_rate": 0.00019809488866800936, "loss": 2.059, "step": 775 }, { "epoch": 0.2, "learning_rate": 0.00019806867841382057, "loss": 2.1434, "step": 780 }, { "epoch": 0.2, "learning_rate": 0.00019804229084866102, "loss": 2.102, "step": 785 }, { "epoch": 0.2, "learning_rate": 0.00019801572602024016, "loss": 2.1808, "step": 790 }, { "epoch": 0.2, "learning_rate": 0.000197988983976588, "loss": 2.0807, "step": 795 }, { "epoch": 0.2, "learning_rate": 0.00019796206476605486, "loss": 2.1151, "step": 800 }, { "epoch": 0.21, "learning_rate": 0.00019793496843731147, "loss": 2.0816, "step": 805 }, { "epoch": 0.21, "learning_rate": 0.00019790769503934882, "loss": 2.1386, "step": 810 }, { "epoch": 0.21, "learning_rate": 0.00019788024462147788, "loss": 2.1359, "step": 815 }, { "epoch": 0.21, "learning_rate": 0.0001978526172333299, "loss": 2.1696, "step": 820 }, { "epoch": 0.21, "learning_rate": 0.00019782481292485588, "loss": 2.102, "step": 825 }, { "epoch": 0.21, "learning_rate": 0.00019779683174632687, "loss": 2.0928, "step": 830 }, { "epoch": 0.21, "learning_rate": 0.00019776867374833365, "loss": 2.1227, "step": 835 }, { "epoch": 0.22, "learning_rate": 0.00019774033898178667, "loss": 2.1159, "step": 840 }, { "epoch": 0.22, "learning_rate": 0.000197711827497916, "loss": 2.0423, "step": 845 }, { "epoch": 0.22, "learning_rate": 0.00019768313934827116, "loss": 2.047, "step": 850 }, { "epoch": 0.22, "learning_rate": 0.00019765427458472124, "loss": 2.0187, "step": 855 }, { "epoch": 0.22, "learning_rate": 0.00019762523325945447, "loss": 2.0837, "step": 860 }, { "epoch": 0.22, "learning_rate": 0.00019759601542497848, "loss": 2.0592, "step": 865 }, { "epoch": 0.22, "learning_rate": 0.00019756662113411984, "loss": 2.0428, "step": 870 }, { "epoch": 0.22, "learning_rate": 0.00019753705044002438, "loss": 2.1073, "step": 875 }, { "epoch": 0.23, "learning_rate": 0.0001975073033961567, "loss": 2.0745, "step": 880 }, { "epoch": 0.23, "learning_rate": 0.0001974773800563003, "loss": 2.0325, "step": 885 }, { "epoch": 0.23, "learning_rate": 0.00019744728047455746, "loss": 2.0986, "step": 890 }, { "epoch": 0.23, "learning_rate": 0.00019741700470534906, "loss": 2.0705, "step": 895 }, { "epoch": 0.23, "learning_rate": 0.00019738655280341456, "loss": 2.0851, "step": 900 }, { "epoch": 0.23, "learning_rate": 0.00019735592482381188, "loss": 2.0603, "step": 905 }, { "epoch": 0.23, "learning_rate": 0.00019732512082191726, "loss": 2.1276, "step": 910 }, { "epoch": 0.23, "learning_rate": 0.00019729414085342526, "loss": 2.087, "step": 915 }, { "epoch": 0.24, "learning_rate": 0.0001972629849743485, "loss": 2.1396, "step": 920 }, { "epoch": 0.24, "learning_rate": 0.0001972316532410177, "loss": 2.0405, "step": 925 }, { "epoch": 0.24, "learning_rate": 0.00019720014571008158, "loss": 2.131, "step": 930 }, { "epoch": 0.24, "learning_rate": 0.00019716846243850663, "loss": 2.0722, "step": 935 }, { "epoch": 0.24, "learning_rate": 0.00019713660348357707, "loss": 2.1107, "step": 940 }, { "epoch": 0.24, "learning_rate": 0.00019710456890289482, "loss": 2.0747, "step": 945 }, { "epoch": 0.24, "learning_rate": 0.00019707235875437933, "loss": 2.0343, "step": 950 }, { "epoch": 0.24, "learning_rate": 0.00019703997309626747, "loss": 2.0575, "step": 955 }, { "epoch": 0.25, "learning_rate": 0.00019700741198711344, "loss": 2.0377, "step": 960 }, { "epoch": 0.25, "learning_rate": 0.00019697467548578863, "loss": 2.0604, "step": 965 }, { "epoch": 0.25, "learning_rate": 0.0001969417636514816, "loss": 2.0612, "step": 970 }, { "epoch": 0.25, "learning_rate": 0.00019690867654369786, "loss": 2.0221, "step": 975 }, { "epoch": 0.25, "learning_rate": 0.00019687541422225985, "loss": 2.0022, "step": 980 }, { "epoch": 0.25, "learning_rate": 0.00019684197674730682, "loss": 2.065, "step": 985 }, { "epoch": 0.25, "learning_rate": 0.00019680836417929463, "loss": 2.0337, "step": 990 }, { "epoch": 0.25, "learning_rate": 0.00019677457657899585, "loss": 2.0171, "step": 995 }, { "epoch": 0.26, "learning_rate": 0.00019674061400749935, "loss": 2.0215, "step": 1000 }, { "epoch": 0.26, "eval_loss": 1.8183355331420898, "eval_runtime": 2.0369, "eval_samples_per_second": 49.095, "eval_steps_per_second": 1.964, "step": 1000 }, { "epoch": 0.26, "learning_rate": 0.00019670647652621041, "loss": 1.9703, "step": 1005 }, { "epoch": 0.26, "learning_rate": 0.00019667216419685067, "loss": 2.0562, "step": 1010 }, { "epoch": 0.26, "learning_rate": 0.0001966376770814577, "loss": 2.0276, "step": 1015 }, { "epoch": 0.26, "learning_rate": 0.00019660301524238524, "loss": 2.0477, "step": 1020 }, { "epoch": 0.26, "learning_rate": 0.00019656817874230288, "loss": 2.0457, "step": 1025 }, { "epoch": 0.26, "learning_rate": 0.00019653316764419598, "loss": 2.0386, "step": 1030 }, { "epoch": 0.26, "learning_rate": 0.00019649798201136566, "loss": 1.988, "step": 1035 }, { "epoch": 0.27, "learning_rate": 0.0001964626219074285, "loss": 2.0161, "step": 1040 }, { "epoch": 0.27, "learning_rate": 0.0001964270873963166, "loss": 2.0728, "step": 1045 }, { "epoch": 0.27, "learning_rate": 0.00019639137854227738, "loss": 1.9877, "step": 1050 }, { "epoch": 0.27, "learning_rate": 0.00019635549540987344, "loss": 1.9707, "step": 1055 }, { "epoch": 0.27, "learning_rate": 0.00019631943806398252, "loss": 2.0202, "step": 1060 }, { "epoch": 0.27, "learning_rate": 0.00019628320656979732, "loss": 2.048, "step": 1065 }, { "epoch": 0.27, "learning_rate": 0.00019624680099282547, "loss": 1.9747, "step": 1070 }, { "epoch": 0.28, "learning_rate": 0.00019621022139888918, "loss": 2.0521, "step": 1075 }, { "epoch": 0.28, "learning_rate": 0.0001961734678541255, "loss": 1.98, "step": 1080 }, { "epoch": 0.28, "learning_rate": 0.00019613654042498587, "loss": 1.9254, "step": 1085 }, { "epoch": 0.28, "learning_rate": 0.00019609943917823616, "loss": 1.9746, "step": 1090 }, { "epoch": 0.28, "learning_rate": 0.0001960621641809564, "loss": 2.0459, "step": 1095 }, { "epoch": 0.28, "learning_rate": 0.00019602471550054091, "loss": 2.0153, "step": 1100 }, { "epoch": 0.28, "learning_rate": 0.00019598709320469798, "loss": 1.9552, "step": 1105 }, { "epoch": 0.28, "learning_rate": 0.00019594929736144976, "loss": 2.049, "step": 1110 }, { "epoch": 0.29, "learning_rate": 0.0001959113280391322, "loss": 2.0373, "step": 1115 }, { "epoch": 0.29, "learning_rate": 0.000195873185306395, "loss": 2.034, "step": 1120 }, { "epoch": 0.29, "learning_rate": 0.00019583486923220123, "loss": 2.0992, "step": 1125 }, { "epoch": 0.29, "learning_rate": 0.0001957963798858275, "loss": 2.0166, "step": 1130 }, { "epoch": 0.29, "learning_rate": 0.00019575771733686362, "loss": 2.0454, "step": 1135 }, { "epoch": 0.29, "learning_rate": 0.00019571888165521254, "loss": 2.0252, "step": 1140 }, { "epoch": 0.29, "learning_rate": 0.00019567987291109037, "loss": 1.9749, "step": 1145 }, { "epoch": 0.29, "learning_rate": 0.000195640691175026, "loss": 1.929, "step": 1150 }, { "epoch": 0.3, "learning_rate": 0.0001956013365178611, "loss": 1.9617, "step": 1155 }, { "epoch": 0.3, "learning_rate": 0.00019556180901075, "loss": 1.977, "step": 1160 }, { "epoch": 0.3, "learning_rate": 0.00019552210872515963, "loss": 1.9455, "step": 1165 }, { "epoch": 0.3, "learning_rate": 0.0001954822357328692, "loss": 2.0271, "step": 1170 }, { "epoch": 0.3, "learning_rate": 0.00019544219010597024, "loss": 1.9803, "step": 1175 }, { "epoch": 0.3, "learning_rate": 0.00019540197191686632, "loss": 1.9875, "step": 1180 }, { "epoch": 0.3, "learning_rate": 0.00019536158123827316, "loss": 1.9826, "step": 1185 }, { "epoch": 0.3, "learning_rate": 0.00019532914455203034, "loss": 1.9978, "step": 1190 }, { "epoch": 0.31, "learning_rate": 0.00019528844357659525, "loss": 2.0476, "step": 1195 }, { "epoch": 0.31, "learning_rate": 0.00019524757031693336, "loss": 2.0412, "step": 1200 }, { "epoch": 0.31, "learning_rate": 0.0001952065248469447, "loss": 2.0222, "step": 1205 }, { "epoch": 0.31, "learning_rate": 0.00019516530724084067, "loss": 1.9951, "step": 1210 }, { "epoch": 0.31, "learning_rate": 0.00019512391757314383, "loss": 1.9622, "step": 1215 }, { "epoch": 0.31, "learning_rate": 0.00019508235591868793, "loss": 1.995, "step": 1220 }, { "epoch": 0.31, "learning_rate": 0.00019504062235261758, "loss": 1.9186, "step": 1225 }, { "epoch": 0.31, "learning_rate": 0.00019499871695038826, "loss": 1.9743, "step": 1230 }, { "epoch": 0.32, "learning_rate": 0.0001949566397877662, "loss": 1.9741, "step": 1235 }, { "epoch": 0.32, "learning_rate": 0.00019491439094082807, "loss": 1.9279, "step": 1240 }, { "epoch": 0.32, "learning_rate": 0.00019487197048596096, "loss": 1.996, "step": 1245 }, { "epoch": 0.32, "learning_rate": 0.0001948293784998623, "loss": 1.9672, "step": 1250 }, { "epoch": 0.32, "learning_rate": 0.0001947866150595396, "loss": 1.9918, "step": 1255 }, { "epoch": 0.32, "learning_rate": 0.00019475228091218712, "loss": 1.9849, "step": 1260 }, { "epoch": 0.32, "learning_rate": 0.00019470920904930954, "loss": 1.9913, "step": 1265 }, { "epoch": 0.33, "learning_rate": 0.00019466596594947775, "loss": 1.9844, "step": 1270 }, { "epoch": 0.33, "learning_rate": 0.00019462255169087648, "loss": 2.0133, "step": 1275 }, { "epoch": 0.33, "learning_rate": 0.00019457896635199996, "loss": 1.9548, "step": 1280 }, { "epoch": 0.33, "learning_rate": 0.00019453521001165173, "loss": 1.9786, "step": 1285 }, { "epoch": 0.33, "learning_rate": 0.0001944912827489445, "loss": 1.9684, "step": 1290 }, { "epoch": 0.33, "learning_rate": 0.0001944471846433, "loss": 1.9318, "step": 1295 }, { "epoch": 0.33, "learning_rate": 0.00019440291577444888, "loss": 2.015, "step": 1300 }, { "epoch": 0.33, "learning_rate": 0.0001943584762224305, "loss": 1.9223, "step": 1305 }, { "epoch": 0.34, "learning_rate": 0.00019431386606759285, "loss": 1.9955, "step": 1310 }, { "epoch": 0.34, "learning_rate": 0.0001942690853905924, "loss": 1.9918, "step": 1315 }, { "epoch": 0.34, "learning_rate": 0.00019422413427239382, "loss": 1.9083, "step": 1320 }, { "epoch": 0.34, "learning_rate": 0.00019417901279427006, "loss": 2.0179, "step": 1325 }, { "epoch": 0.34, "learning_rate": 0.00019413372103780206, "loss": 1.9483, "step": 1330 }, { "epoch": 0.34, "learning_rate": 0.00019408825908487858, "loss": 1.9777, "step": 1335 }, { "epoch": 0.34, "learning_rate": 0.00019404262701769613, "loss": 1.8945, "step": 1340 }, { "epoch": 0.34, "learning_rate": 0.00019399682491875878, "loss": 1.954, "step": 1345 }, { "epoch": 0.35, "learning_rate": 0.00019395085287087809, "loss": 1.9467, "step": 1350 }, { "epoch": 0.35, "learning_rate": 0.00019390471095717272, "loss": 1.9789, "step": 1355 }, { "epoch": 0.35, "learning_rate": 0.0001938583992610687, "loss": 1.9677, "step": 1360 }, { "epoch": 0.35, "learning_rate": 0.00019381191786629884, "loss": 1.9812, "step": 1365 }, { "epoch": 0.35, "learning_rate": 0.0001937652668569028, "loss": 2.0083, "step": 1370 }, { "epoch": 0.35, "learning_rate": 0.00019371844631722697, "loss": 1.9463, "step": 1375 }, { "epoch": 0.35, "learning_rate": 0.00019367145633192423, "loss": 2.0202, "step": 1380 }, { "epoch": 0.35, "learning_rate": 0.00019362429698595378, "loss": 1.867, "step": 1385 }, { "epoch": 0.36, "learning_rate": 0.00019357696836458109, "loss": 1.9782, "step": 1390 }, { "epoch": 0.36, "learning_rate": 0.00019352947055337766, "loss": 1.9559, "step": 1395 }, { "epoch": 0.36, "learning_rate": 0.00019348180363822087, "loss": 1.9721, "step": 1400 }, { "epoch": 0.36, "learning_rate": 0.00019343396770529392, "loss": 1.961, "step": 1405 }, { "epoch": 0.36, "learning_rate": 0.00019338596284108545, "loss": 1.9242, "step": 1410 }, { "epoch": 0.36, "learning_rate": 0.00019333778913238972, "loss": 1.9001, "step": 1415 }, { "epoch": 0.36, "learning_rate": 0.0001932894466663061, "loss": 1.9546, "step": 1420 }, { "epoch": 0.36, "learning_rate": 0.00019324093553023917, "loss": 1.9429, "step": 1425 }, { "epoch": 0.37, "learning_rate": 0.00019319225581189848, "loss": 1.8924, "step": 1430 }, { "epoch": 0.37, "learning_rate": 0.0001931434075992983, "loss": 1.9765, "step": 1435 }, { "epoch": 0.37, "learning_rate": 0.00019309439098075766, "loss": 1.9305, "step": 1440 }, { "epoch": 0.37, "learning_rate": 0.0001930452060448999, "loss": 1.933, "step": 1445 }, { "epoch": 0.37, "learning_rate": 0.0001929958528806529, "loss": 1.9897, "step": 1450 }, { "epoch": 0.37, "learning_rate": 0.00019294633157724853, "loss": 1.9231, "step": 1455 }, { "epoch": 0.37, "learning_rate": 0.00019289664222422274, "loss": 1.9465, "step": 1460 }, { "epoch": 0.38, "learning_rate": 0.00019284678491141529, "loss": 1.8723, "step": 1465 }, { "epoch": 0.38, "learning_rate": 0.0001927967597289696, "loss": 1.9487, "step": 1470 }, { "epoch": 0.38, "learning_rate": 0.0001927465667673327, "loss": 1.9555, "step": 1475 }, { "epoch": 0.38, "learning_rate": 0.00019269620611725481, "loss": 1.9607, "step": 1480 }, { "epoch": 0.38, "learning_rate": 0.0001926456778697895, "loss": 1.9146, "step": 1485 }, { "epoch": 0.38, "learning_rate": 0.00019260513466307967, "loss": 1.9291, "step": 1490 }, { "epoch": 0.38, "learning_rate": 0.00019255430497073888, "loss": 1.9462, "step": 1495 }, { "epoch": 0.38, "learning_rate": 0.00019250330793757196, "loss": 1.8946, "step": 1500 }, { "epoch": 0.39, "learning_rate": 0.00019245214365578298, "loss": 1.916, "step": 1505 }, { "epoch": 0.39, "learning_rate": 0.00019241109187349817, "loss": 1.8882, "step": 1510 }, { "epoch": 0.39, "learning_rate": 0.00019235962677750896, "loss": 1.9685, "step": 1515 }, { "epoch": 0.39, "learning_rate": 0.0001923079946926774, "loss": 1.9601, "step": 1520 }, { "epoch": 0.39, "learning_rate": 0.0001922665688555682, "loss": 1.895, "step": 1525 }, { "epoch": 0.39, "learning_rate": 0.00019221463642627083, "loss": 1.9459, "step": 1530 }, { "epoch": 0.39, "learning_rate": 0.00019216253727027778, "loss": 1.9854, "step": 1535 }, { "epoch": 0.39, "learning_rate": 0.00019211027148178578, "loss": 1.9133, "step": 1540 }, { "epoch": 0.4, "learning_rate": 0.00019205783915529287, "loss": 2.0028, "step": 1545 }, { "epoch": 0.4, "learning_rate": 0.0001920052403855982, "loss": 1.9149, "step": 1550 }, { "epoch": 0.4, "learning_rate": 0.00019195247526780187, "loss": 1.8837, "step": 1555 }, { "epoch": 0.4, "learning_rate": 0.00019189954389730468, "loss": 1.9246, "step": 1560 }, { "epoch": 0.4, "learning_rate": 0.00019184644636980805, "loss": 1.9212, "step": 1565 }, { "epoch": 0.4, "learning_rate": 0.0001917931827813139, "loss": 1.9212, "step": 1570 }, { "epoch": 0.4, "learning_rate": 0.00019173975322812426, "loss": 1.9334, "step": 1575 }, { "epoch": 0.4, "learning_rate": 0.00019168615780684132, "loss": 1.9698, "step": 1580 }, { "epoch": 0.41, "learning_rate": 0.00019163239661436707, "loss": 1.9122, "step": 1585 }, { "epoch": 0.41, "learning_rate": 0.00019157846974790336, "loss": 1.9679, "step": 1590 }, { "epoch": 0.41, "learning_rate": 0.00019152437730495146, "loss": 1.9444, "step": 1595 }, { "epoch": 0.41, "learning_rate": 0.00019147011938331206, "loss": 1.9237, "step": 1600 }, { "epoch": 0.41, "learning_rate": 0.00019141569608108503, "loss": 1.9617, "step": 1605 }, { "epoch": 0.41, "learning_rate": 0.00019136110749666925, "loss": 1.8958, "step": 1610 }, { "epoch": 0.41, "learning_rate": 0.00019130635372876246, "loss": 1.8737, "step": 1615 }, { "epoch": 0.41, "learning_rate": 0.000191251434876361, "loss": 1.9383, "step": 1620 }, { "epoch": 0.42, "learning_rate": 0.00019119635103875972, "loss": 1.8857, "step": 1625 }, { "epoch": 0.42, "learning_rate": 0.00019114110231555182, "loss": 1.8511, "step": 1630 }, { "epoch": 0.42, "learning_rate": 0.0001910856888066286, "loss": 1.911, "step": 1635 }, { "epoch": 0.42, "learning_rate": 0.00019103011061217913, "loss": 1.9076, "step": 1640 }, { "epoch": 0.42, "learning_rate": 0.00019097436783269047, "loss": 1.8789, "step": 1645 }, { "epoch": 0.42, "learning_rate": 0.00019091846056894715, "loss": 1.8864, "step": 1650 }, { "epoch": 0.42, "learning_rate": 0.0001908623889220311, "loss": 1.9381, "step": 1655 }, { "epoch": 0.42, "learning_rate": 0.00019080615299332142, "loss": 1.9556, "step": 1660 }, { "epoch": 0.43, "learning_rate": 0.00019074975288449432, "loss": 1.9341, "step": 1665 }, { "epoch": 0.43, "learning_rate": 0.00019069318869752283, "loss": 1.9761, "step": 1670 }, { "epoch": 0.43, "learning_rate": 0.00019063646053467652, "loss": 1.891, "step": 1675 }, { "epoch": 0.43, "learning_rate": 0.0001905795684985216, "loss": 1.9201, "step": 1680 }, { "epoch": 0.43, "learning_rate": 0.00019052251269192048, "loss": 1.8857, "step": 1685 }, { "epoch": 0.43, "learning_rate": 0.00019046529321803168, "loss": 1.9542, "step": 1690 }, { "epoch": 0.43, "learning_rate": 0.00019040791018030967, "loss": 1.932, "step": 1695 }, { "epoch": 0.44, "learning_rate": 0.0001903503636825046, "loss": 1.9582, "step": 1700 }, { "epoch": 0.44, "learning_rate": 0.00019029265382866214, "loss": 1.9013, "step": 1705 }, { "epoch": 0.44, "learning_rate": 0.00019023478072312342, "loss": 1.9272, "step": 1710 }, { "epoch": 0.44, "learning_rate": 0.00019017674447052463, "loss": 1.8687, "step": 1715 }, { "epoch": 0.44, "learning_rate": 0.000190118545175797, "loss": 1.9503, "step": 1720 }, { "epoch": 0.44, "learning_rate": 0.00019006018294416647, "loss": 1.8982, "step": 1725 }, { "epoch": 0.44, "learning_rate": 0.0001900016578811537, "loss": 1.9493, "step": 1730 }, { "epoch": 0.44, "learning_rate": 0.0001899429700925736, "loss": 1.8548, "step": 1735 }, { "epoch": 0.45, "learning_rate": 0.00018989590277059657, "loss": 1.8518, "step": 1740 }, { "epoch": 0.45, "learning_rate": 0.0001898723300979511, "loss": 1.9131, "step": 1745 }, { "epoch": 0.45, "learning_rate": 0.00018981328468704127, "loss": 1.8887, "step": 1750 }, { "epoch": 0.45, "learning_rate": 0.00018975407689114825, "loss": 1.8728, "step": 1755 }, { "epoch": 0.45, "learning_rate": 0.00018969470681732143, "loss": 1.9128, "step": 1760 }, { "epoch": 0.45, "learning_rate": 0.00018963517457290361, "loss": 1.9224, "step": 1765 }, { "epoch": 0.45, "learning_rate": 0.00018957548026553073, "loss": 1.8755, "step": 1770 }, { "epoch": 0.45, "learning_rate": 0.0001895156240031319, "loss": 1.9126, "step": 1775 }, { "epoch": 0.46, "learning_rate": 0.00018945560589392888, "loss": 1.9315, "step": 1780 }, { "epoch": 0.46, "learning_rate": 0.0001893954260464361, "loss": 1.8778, "step": 1785 }, { "epoch": 0.46, "learning_rate": 0.00018933508456946054, "loss": 1.8613, "step": 1790 }, { "epoch": 0.46, "learning_rate": 0.00018927458157210123, "loss": 1.8284, "step": 1795 }, { "epoch": 0.46, "learning_rate": 0.00018921391716374933, "loss": 1.8996, "step": 1800 }, { "epoch": 0.46, "learning_rate": 0.00018915309145408786, "loss": 1.9148, "step": 1805 }, { "epoch": 0.46, "learning_rate": 0.0001890921045530914, "loss": 1.8774, "step": 1810 }, { "epoch": 0.46, "learning_rate": 0.00018903095657102603, "loss": 1.8948, "step": 1815 }, { "epoch": 0.47, "learning_rate": 0.00018896964761844903, "loss": 1.8544, "step": 1820 }, { "epoch": 0.47, "learning_rate": 0.00018890817780620878, "loss": 1.7928, "step": 1825 }, { "epoch": 0.47, "learning_rate": 0.0001888465472454444, "loss": 1.9044, "step": 1830 }, { "epoch": 0.47, "learning_rate": 0.00018878475604758573, "loss": 1.9089, "step": 1835 }, { "epoch": 0.47, "learning_rate": 0.00018872280432435302, "loss": 1.8255, "step": 1840 }, { "epoch": 0.47, "learning_rate": 0.00018866069218775676, "loss": 1.9163, "step": 1845 }, { "epoch": 0.47, "learning_rate": 0.00018859841975009745, "loss": 1.8495, "step": 1850 }, { "epoch": 0.47, "learning_rate": 0.00018853598712396545, "loss": 1.8004, "step": 1855 }, { "epoch": 0.48, "learning_rate": 0.00018847339442224073, "loss": 1.9387, "step": 1860 }, { "epoch": 0.48, "learning_rate": 0.00018841064175809268, "loss": 1.8566, "step": 1865 }, { "epoch": 0.48, "learning_rate": 0.0001883477292449799, "loss": 1.8849, "step": 1870 }, { "epoch": 0.48, "learning_rate": 0.00018828465699665, "loss": 1.903, "step": 1875 }, { "epoch": 0.48, "learning_rate": 0.00018822142512713944, "loss": 1.8851, "step": 1880 }, { "epoch": 0.48, "learning_rate": 0.0001881580337507732, "loss": 1.8783, "step": 1885 }, { "epoch": 0.48, "learning_rate": 0.0001880944829821648, "loss": 1.8592, "step": 1890 }, { "epoch": 0.49, "learning_rate": 0.00018803077293621568, "loss": 1.8978, "step": 1895 }, { "epoch": 0.49, "learning_rate": 0.00018796690372811556, "loss": 1.8318, "step": 1900 }, { "epoch": 0.49, "learning_rate": 0.00018790287547334176, "loss": 1.8377, "step": 1905 }, { "epoch": 0.49, "learning_rate": 0.00018783868828765914, "loss": 1.8675, "step": 1910 }, { "epoch": 0.49, "learning_rate": 0.00018777434228712002, "loss": 1.8989, "step": 1915 }, { "epoch": 0.49, "learning_rate": 0.00018770983758806378, "loss": 1.8781, "step": 1920 }, { "epoch": 0.49, "learning_rate": 0.00018764517430711676, "loss": 1.8795, "step": 1925 }, { "epoch": 0.49, "learning_rate": 0.00018758035256119204, "loss": 1.8826, "step": 1930 }, { "epoch": 0.5, "learning_rate": 0.00018751537246748917, "loss": 1.9236, "step": 1935 }, { "epoch": 0.5, "learning_rate": 0.000187450234143494, "loss": 1.8728, "step": 1940 }, { "epoch": 0.5, "learning_rate": 0.00018738493770697852, "loss": 1.8633, "step": 1945 }, { "epoch": 0.5, "learning_rate": 0.0001873194832760005, "loss": 1.876, "step": 1950 }, { "epoch": 0.5, "learning_rate": 0.0001872538709689035, "loss": 1.8794, "step": 1955 }, { "epoch": 0.5, "learning_rate": 0.0001871881009043163, "loss": 1.8341, "step": 1960 }, { "epoch": 0.5, "learning_rate": 0.00018712217320115322, "loss": 1.857, "step": 1965 }, { "epoch": 0.5, "learning_rate": 0.00018705608797861333, "loss": 1.9111, "step": 1970 }, { "epoch": 0.51, "learning_rate": 0.00018698984535618065, "loss": 1.8776, "step": 1975 }, { "epoch": 0.51, "learning_rate": 0.0001869234454536237, "loss": 1.909, "step": 1980 }, { "epoch": 0.51, "learning_rate": 0.0001868568883909954, "loss": 1.9096, "step": 1985 }, { "epoch": 0.51, "learning_rate": 0.00018679017428863283, "loss": 1.8574, "step": 1990 }, { "epoch": 0.51, "learning_rate": 0.00018672330326715696, "loss": 1.8551, "step": 1995 }, { "epoch": 0.51, "learning_rate": 0.0001866562754474726, "loss": 1.8565, "step": 2000 }, { "epoch": 0.51, "eval_loss": 1.6405518054962158, "eval_runtime": 2.0249, "eval_samples_per_second": 49.384, "eval_steps_per_second": 1.975, "step": 2000 }, { "epoch": 0.51, "learning_rate": 0.00018658909095076783, "loss": 1.8241, "step": 2005 }, { "epoch": 0.51, "learning_rate": 0.00018652174989851428, "loss": 1.9028, "step": 2010 }, { "epoch": 0.52, "learning_rate": 0.00018645425241246638, "loss": 1.9159, "step": 2015 }, { "epoch": 0.52, "learning_rate": 0.00018638659861466156, "loss": 1.8554, "step": 2020 }, { "epoch": 0.52, "learning_rate": 0.0001863187886274198, "loss": 1.934, "step": 2025 }, { "epoch": 0.52, "learning_rate": 0.00018625082257334353, "loss": 1.9281, "step": 2030 }, { "epoch": 0.52, "learning_rate": 0.00018618270057531728, "loss": 1.8402, "step": 2035 }, { "epoch": 0.52, "learning_rate": 0.00018611442275650756, "loss": 1.8373, "step": 2040 }, { "epoch": 0.52, "learning_rate": 0.00018604598924036267, "loss": 1.8416, "step": 2045 }, { "epoch": 0.52, "learning_rate": 0.0001859774001506123, "loss": 1.7969, "step": 2050 }, { "epoch": 0.53, "learning_rate": 0.00018590865561126757, "loss": 1.8583, "step": 2055 }, { "epoch": 0.53, "learning_rate": 0.00018583975574662046, "loss": 1.9004, "step": 2060 }, { "epoch": 0.53, "learning_rate": 0.0001857707006812439, "loss": 1.8759, "step": 2065 }, { "epoch": 0.53, "learning_rate": 0.00018570149053999152, "loss": 1.8812, "step": 2070 }, { "epoch": 0.53, "learning_rate": 0.00018563212544799718, "loss": 1.8436, "step": 2075 }, { "epoch": 0.53, "learning_rate": 0.0001855626055306749, "loss": 1.876, "step": 2080 }, { "epoch": 0.53, "learning_rate": 0.00018549293091371874, "loss": 1.9006, "step": 2085 }, { "epoch": 0.54, "learning_rate": 0.00018542310172310237, "loss": 1.8599, "step": 2090 }, { "epoch": 0.54, "learning_rate": 0.00018535311808507899, "loss": 1.8957, "step": 2095 }, { "epoch": 0.54, "learning_rate": 0.00018528298012618095, "loss": 1.8305, "step": 2100 }, { "epoch": 0.54, "learning_rate": 0.00018521268797321974, "loss": 1.7519, "step": 2105 }, { "epoch": 0.54, "learning_rate": 0.00018514224175328553, "loss": 1.8212, "step": 2110 }, { "epoch": 0.54, "learning_rate": 0.00018507164159374718, "loss": 1.859, "step": 2115 }, { "epoch": 0.54, "learning_rate": 0.0001850008876222517, "loss": 1.8488, "step": 2120 }, { "epoch": 0.54, "learning_rate": 0.00018492997996672438, "loss": 1.8126, "step": 2125 }, { "epoch": 0.55, "learning_rate": 0.0001848589187553682, "loss": 1.9146, "step": 2130 }, { "epoch": 0.55, "learning_rate": 0.0001847877041166639, "loss": 1.8637, "step": 2135 }, { "epoch": 0.55, "learning_rate": 0.00018471633617936958, "loss": 1.8365, "step": 2140 }, { "epoch": 0.55, "learning_rate": 0.00018464481507252055, "loss": 1.8485, "step": 2145 }, { "epoch": 0.55, "learning_rate": 0.0001845731409254289, "loss": 1.8374, "step": 2150 }, { "epoch": 0.55, "learning_rate": 0.000184530063031755, "loss": 1.8241, "step": 2155 }, { "epoch": 0.55, "learning_rate": 0.00018445814428993525, "loss": 1.8591, "step": 2160 }, { "epoch": 0.55, "learning_rate": 0.00018438607284537907, "loss": 1.8777, "step": 2165 }, { "epoch": 0.56, "learning_rate": 0.00018431384882839364, "loss": 1.8157, "step": 2170 }, { "epoch": 0.56, "learning_rate": 0.0001842414723695621, "loss": 1.8832, "step": 2175 }, { "epoch": 0.56, "learning_rate": 0.00018416894359974312, "loss": 1.826, "step": 2180 }, { "epoch": 0.56, "learning_rate": 0.00018409626265007085, "loss": 1.8825, "step": 2185 }, { "epoch": 0.56, "learning_rate": 0.00018402342965195443, "loss": 1.896, "step": 2190 }, { "epoch": 0.56, "learning_rate": 0.00018395044473707803, "loss": 1.8501, "step": 2195 }, { "epoch": 0.56, "learning_rate": 0.0001838773080374005, "loss": 1.8059, "step": 2200 }, { "epoch": 0.56, "learning_rate": 0.00018380401968515507, "loss": 1.8274, "step": 2205 }, { "epoch": 0.57, "learning_rate": 0.00018373057981284913, "loss": 1.8822, "step": 2210 }, { "epoch": 0.57, "learning_rate": 0.0001836569885532641, "loss": 1.9251, "step": 2215 }, { "epoch": 0.57, "learning_rate": 0.00018358324603945508, "loss": 1.8399, "step": 2220 }, { "epoch": 0.57, "learning_rate": 0.00018350935240475064, "loss": 1.8371, "step": 2225 }, { "epoch": 0.57, "learning_rate": 0.00018343530778275254, "loss": 1.7737, "step": 2230 }, { "epoch": 0.57, "learning_rate": 0.00018336111230733563, "loss": 1.8708, "step": 2235 }, { "epoch": 0.57, "learning_rate": 0.00018328676611264745, "loss": 1.8698, "step": 2240 }, { "epoch": 0.57, "learning_rate": 0.00018322718072934268, "loss": 1.9078, "step": 2245 }, { "epoch": 0.58, "learning_rate": 0.00018315256357888762, "loss": 1.8251, "step": 2250 }, { "epoch": 0.58, "learning_rate": 0.00018307779608622333, "loss": 1.8327, "step": 2255 }, { "epoch": 0.58, "learning_rate": 0.00018300287838653154, "loss": 1.8461, "step": 2260 }, { "epoch": 0.58, "learning_rate": 0.0001829278106152656, "loss": 1.8323, "step": 2265 }, { "epoch": 0.58, "learning_rate": 0.00018285259290815023, "loss": 1.8486, "step": 2270 }, { "epoch": 0.58, "learning_rate": 0.00018277722540118116, "loss": 1.8485, "step": 2275 }, { "epoch": 0.58, "learning_rate": 0.00018270170823062498, "loss": 1.8796, "step": 2280 }, { "epoch": 0.58, "learning_rate": 0.0001826260415330189, "loss": 1.8386, "step": 2285 }, { "epoch": 0.59, "learning_rate": 0.0001825502254451705, "loss": 1.849, "step": 2290 }, { "epoch": 0.59, "learning_rate": 0.00018247426010415736, "loss": 1.7897, "step": 2295 }, { "epoch": 0.59, "learning_rate": 0.00018239814564732706, "loss": 1.8165, "step": 2300 }, { "epoch": 0.59, "learning_rate": 0.00018232188221229664, "loss": 1.925, "step": 2305 }, { "epoch": 0.59, "learning_rate": 0.00018224546993695265, "loss": 1.8233, "step": 2310 }, { "epoch": 0.59, "learning_rate": 0.00018216890895945062, "loss": 1.8417, "step": 2315 }, { "epoch": 0.59, "learning_rate": 0.00018209219941821498, "loss": 1.8409, "step": 2320 }, { "epoch": 0.6, "learning_rate": 0.0001820153414519388, "loss": 1.7978, "step": 2325 }, { "epoch": 0.6, "learning_rate": 0.00018193833519958343, "loss": 1.8343, "step": 2330 }, { "epoch": 0.6, "learning_rate": 0.00018186118080037842, "loss": 1.8589, "step": 2335 }, { "epoch": 0.6, "learning_rate": 0.0001817838783938211, "loss": 1.8368, "step": 2340 }, { "epoch": 0.6, "learning_rate": 0.00018170642811967644, "loss": 1.8111, "step": 2345 }, { "epoch": 0.6, "learning_rate": 0.00018162883011797672, "loss": 1.7967, "step": 2350 }, { "epoch": 0.6, "learning_rate": 0.00018155108452902138, "loss": 1.8485, "step": 2355 }, { "epoch": 0.6, "learning_rate": 0.00018147319149337666, "loss": 1.8424, "step": 2360 }, { "epoch": 0.61, "learning_rate": 0.00018139515115187535, "loss": 1.8031, "step": 2365 }, { "epoch": 0.61, "learning_rate": 0.00018131696364561667, "loss": 1.8247, "step": 2370 }, { "epoch": 0.61, "learning_rate": 0.00018123862911596588, "loss": 1.8358, "step": 2375 }, { "epoch": 0.61, "learning_rate": 0.0001811915578858546, "loss": 1.8729, "step": 2380 }, { "epoch": 0.61, "learning_rate": 0.0001811444338089425, "loss": 1.8388, "step": 2385 }, { "epoch": 0.61, "learning_rate": 0.00018106577632674418, "loss": 1.827, "step": 2390 }, { "epoch": 0.61, "learning_rate": 0.00018098697227530755, "loss": 1.8459, "step": 2395 }, { "epoch": 0.61, "learning_rate": 0.00018090802179711264, "loss": 1.824, "step": 2400 }, { "epoch": 0.62, "learning_rate": 0.0001808289250349041, "loss": 1.8591, "step": 2405 }, { "epoch": 0.62, "learning_rate": 0.00018074968213169115, "loss": 1.8501, "step": 2410 }, { "epoch": 0.62, "learning_rate": 0.00018067029323074724, "loss": 1.8179, "step": 2415 }, { "epoch": 0.62, "learning_rate": 0.00018059075847560974, "loss": 1.8307, "step": 2420 }, { "epoch": 0.62, "learning_rate": 0.0001805110780100798, "loss": 1.8345, "step": 2425 }, { "epoch": 0.62, "learning_rate": 0.00018043125197822192, "loss": 1.8327, "step": 2430 }, { "epoch": 0.62, "learning_rate": 0.0001803512805243639, "loss": 1.8299, "step": 2435 }, { "epoch": 0.62, "learning_rate": 0.00018027116379309638, "loss": 1.8365, "step": 2440 }, { "epoch": 0.63, "learning_rate": 0.00018019090192927272, "loss": 1.8405, "step": 2445 }, { "epoch": 0.63, "learning_rate": 0.00018011049507800863, "loss": 1.7728, "step": 2450 }, { "epoch": 0.63, "learning_rate": 0.000180029943384682, "loss": 1.8139, "step": 2455 }, { "epoch": 0.63, "learning_rate": 0.0001799492469949326, "loss": 1.8555, "step": 2460 }, { "epoch": 0.63, "learning_rate": 0.00017986840605466187, "loss": 1.7641, "step": 2465 }, { "epoch": 0.63, "learning_rate": 0.00017978742071003242, "loss": 1.867, "step": 2470 }, { "epoch": 0.63, "learning_rate": 0.00017970629110746815, "loss": 1.8666, "step": 2475 }, { "epoch": 0.63, "learning_rate": 0.00017962501739365364, "loss": 1.7786, "step": 2480 }, { "epoch": 0.64, "learning_rate": 0.00017954359971553415, "loss": 1.7452, "step": 2485 }, { "epoch": 0.64, "learning_rate": 0.00017946203822031512, "loss": 1.7882, "step": 2490 }, { "epoch": 0.64, "learning_rate": 0.00017938033305546207, "loss": 1.8475, "step": 2495 }, { "epoch": 0.64, "learning_rate": 0.00017929848436870028, "loss": 1.8235, "step": 2500 }, { "epoch": 0.64, "learning_rate": 0.0001792164923080145, "loss": 1.8675, "step": 2505 }, { "epoch": 0.64, "learning_rate": 0.00017913435702164875, "loss": 1.836, "step": 2510 }, { "epoch": 0.64, "learning_rate": 0.00017905207865810593, "loss": 1.7496, "step": 2515 }, { "epoch": 0.65, "learning_rate": 0.0001789696573661477, "loss": 1.8329, "step": 2520 }, { "epoch": 0.65, "learning_rate": 0.0001788870932947941, "loss": 1.7659, "step": 2525 }, { "epoch": 0.65, "learning_rate": 0.00017880438659332332, "loss": 1.8404, "step": 2530 }, { "epoch": 0.65, "learning_rate": 0.00017872153741127145, "loss": 1.7743, "step": 2535 }, { "epoch": 0.65, "learning_rate": 0.00017863854589843215, "loss": 1.8238, "step": 2540 }, { "epoch": 0.65, "learning_rate": 0.00017855541220485648, "loss": 1.8287, "step": 2545 }, { "epoch": 0.65, "learning_rate": 0.00017847213648085255, "loss": 1.7769, "step": 2550 }, { "epoch": 0.65, "learning_rate": 0.00017838871887698518, "loss": 1.8508, "step": 2555 }, { "epoch": 0.66, "learning_rate": 0.0001783051595440758, "loss": 1.8107, "step": 2560 }, { "epoch": 0.66, "learning_rate": 0.00017822145863320214, "loss": 1.8357, "step": 2565 }, { "epoch": 0.66, "learning_rate": 0.00017813761629569772, "loss": 1.8268, "step": 2570 }, { "epoch": 0.66, "learning_rate": 0.00017805363268315197, "loss": 1.8067, "step": 2575 }, { "epoch": 0.66, "learning_rate": 0.00017796950794740965, "loss": 1.8515, "step": 2580 }, { "epoch": 0.66, "learning_rate": 0.00017788524224057067, "loss": 1.7839, "step": 2585 }, { "epoch": 0.66, "learning_rate": 0.0001778008357149898, "loss": 1.864, "step": 2590 }, { "epoch": 0.66, "learning_rate": 0.00017771628852327647, "loss": 1.8346, "step": 2595 }, { "epoch": 0.67, "learning_rate": 0.00017763160081829446, "loss": 1.7779, "step": 2600 }, { "epoch": 0.67, "learning_rate": 0.0001775467727531615, "loss": 1.8834, "step": 2605 }, { "epoch": 0.67, "learning_rate": 0.00017746180448124918, "loss": 1.7606, "step": 2610 }, { "epoch": 0.67, "learning_rate": 0.00017737669615618256, "loss": 1.7948, "step": 2615 }, { "epoch": 0.67, "learning_rate": 0.00017729144793183992, "loss": 1.778, "step": 2620 }, { "epoch": 0.67, "learning_rate": 0.00017720605996235248, "loss": 1.8107, "step": 2625 }, { "epoch": 0.67, "learning_rate": 0.00017712053240210413, "loss": 1.7959, "step": 2630 }, { "epoch": 0.67, "learning_rate": 0.00017703486540573117, "loss": 1.7941, "step": 2635 }, { "epoch": 0.68, "learning_rate": 0.00017694905912812192, "loss": 1.8386, "step": 2640 }, { "epoch": 0.68, "learning_rate": 0.0001768631137244166, "loss": 1.7738, "step": 2645 }, { "epoch": 0.68, "learning_rate": 0.00017679425733507803, "loss": 1.755, "step": 2650 }, { "epoch": 0.68, "learning_rate": 0.00017670806189615585, "loss": 1.8204, "step": 2655 }, { "epoch": 0.68, "learning_rate": 0.0001766217277668675, "loss": 1.8402, "step": 2660 }, { "epoch": 0.68, "learning_rate": 0.00017653525510330755, "loss": 1.8475, "step": 2665 }, { "epoch": 0.68, "learning_rate": 0.00017644864406182102, "loss": 1.8359, "step": 2670 }, { "epoch": 0.68, "learning_rate": 0.00017636189479900316, "loss": 1.7782, "step": 2675 }, { "epoch": 0.69, "learning_rate": 0.00017627500747169908, "loss": 1.7502, "step": 2680 }, { "epoch": 0.69, "learning_rate": 0.00017618798223700363, "loss": 1.776, "step": 2685 }, { "epoch": 0.69, "learning_rate": 0.00017611826286165232, "loss": 1.8608, "step": 2690 }, { "epoch": 0.69, "learning_rate": 0.00017603098979032683, "loss": 1.7792, "step": 2695 }, { "epoch": 0.69, "learning_rate": 0.0001759435792528008, "loss": 1.7932, "step": 2700 }, { "epoch": 0.69, "learning_rate": 0.0001758560314071151, "loss": 1.8069, "step": 2705 }, { "epoch": 0.69, "learning_rate": 0.00017576834641155865, "loss": 1.8374, "step": 2710 }, { "epoch": 0.69, "learning_rate": 0.00017568052442466843, "loss": 1.7983, "step": 2715 }, { "epoch": 0.7, "learning_rate": 0.00017559256560522912, "loss": 1.7921, "step": 2720 }, { "epoch": 0.7, "learning_rate": 0.00017550447011227278, "loss": 1.8499, "step": 2725 }, { "epoch": 0.7, "learning_rate": 0.00017541623810507856, "loss": 1.8161, "step": 2730 }, { "epoch": 0.7, "learning_rate": 0.00017532786974317244, "loss": 1.8425, "step": 2735 }, { "epoch": 0.7, "learning_rate": 0.00017523936518632697, "loss": 1.8023, "step": 2740 }, { "epoch": 0.7, "learning_rate": 0.0001751507245945609, "loss": 1.8118, "step": 2745 }, { "epoch": 0.7, "learning_rate": 0.00017506194812813897, "loss": 1.8096, "step": 2750 }, { "epoch": 0.71, "learning_rate": 0.00017497303594757153, "loss": 1.8208, "step": 2755 }, { "epoch": 0.71, "learning_rate": 0.00017488398821361435, "loss": 1.8271, "step": 2760 }, { "epoch": 0.71, "learning_rate": 0.00017479480508726832, "loss": 1.7719, "step": 2765 }, { "epoch": 0.71, "learning_rate": 0.00017470548672977903, "loss": 1.8406, "step": 2770 }, { "epoch": 0.71, "learning_rate": 0.00017461603330263663, "loss": 1.8597, "step": 2775 }, { "epoch": 0.71, "learning_rate": 0.0001745264449675755, "loss": 1.7879, "step": 2780 }, { "epoch": 0.71, "learning_rate": 0.00017443672188657386, "loss": 1.7856, "step": 2785 }, { "epoch": 0.71, "learning_rate": 0.0001743468642218536, "loss": 1.7796, "step": 2790 }, { "epoch": 0.72, "learning_rate": 0.00017425687213588003, "loss": 1.7924, "step": 2795 }, { "epoch": 0.72, "learning_rate": 0.0001741667457913613, "loss": 1.7707, "step": 2800 }, { "epoch": 0.72, "learning_rate": 0.0001740764853512485, "loss": 1.8271, "step": 2805 }, { "epoch": 0.72, "learning_rate": 0.00017398609097873501, "loss": 1.7561, "step": 2810 }, { "epoch": 0.72, "learning_rate": 0.00017389556283725654, "loss": 1.7428, "step": 2815 }, { "epoch": 0.72, "learning_rate": 0.00017380490109049042, "loss": 1.8217, "step": 2820 }, { "epoch": 0.72, "learning_rate": 0.00017371410590235578, "loss": 1.7255, "step": 2825 }, { "epoch": 0.72, "learning_rate": 0.00017362317743701292, "loss": 1.8414, "step": 2830 }, { "epoch": 0.73, "learning_rate": 0.000173532115858863, "loss": 1.8338, "step": 2835 }, { "epoch": 0.73, "learning_rate": 0.00017344092133254803, "loss": 1.782, "step": 2840 }, { "epoch": 0.73, "learning_rate": 0.0001733495940229503, "loss": 1.839, "step": 2845 }, { "epoch": 0.73, "learning_rate": 0.0001732581340951922, "loss": 1.8006, "step": 2850 }, { "epoch": 0.73, "learning_rate": 0.0001731665417146359, "loss": 1.8208, "step": 2855 }, { "epoch": 0.73, "learning_rate": 0.000173074817046883, "loss": 1.8102, "step": 2860 }, { "epoch": 0.73, "learning_rate": 0.00017298296025777433, "loss": 1.7616, "step": 2865 }, { "epoch": 0.73, "learning_rate": 0.00017289097151338965, "loss": 1.8392, "step": 2870 }, { "epoch": 0.74, "learning_rate": 0.00017279885098004717, "loss": 1.7347, "step": 2875 }, { "epoch": 0.74, "learning_rate": 0.00017270659882430347, "loss": 1.7882, "step": 2880 }, { "epoch": 0.74, "learning_rate": 0.00017261421521295306, "loss": 1.7935, "step": 2885 }, { "epoch": 0.74, "learning_rate": 0.00017252170031302816, "loss": 1.7692, "step": 2890 }, { "epoch": 0.74, "learning_rate": 0.00017242905429179834, "loss": 1.8289, "step": 2895 }, { "epoch": 0.74, "learning_rate": 0.00017233627731677028, "loss": 1.7925, "step": 2900 }, { "epoch": 0.74, "learning_rate": 0.0001722433695556874, "loss": 1.774, "step": 2905 }, { "epoch": 0.74, "learning_rate": 0.00017215033117652954, "loss": 1.8013, "step": 2910 }, { "epoch": 0.75, "learning_rate": 0.0001720571623475128, "loss": 1.8238, "step": 2915 }, { "epoch": 0.75, "learning_rate": 0.000171963863237089, "loss": 1.775, "step": 2920 }, { "epoch": 0.75, "learning_rate": 0.00017187043401394573, "loss": 1.8423, "step": 2925 }, { "epoch": 0.75, "learning_rate": 0.00017179559706778118, "loss": 1.7738, "step": 2930 }, { "epoch": 0.75, "learning_rate": 0.0001717019340675858, "loss": 1.8124, "step": 2935 }, { "epoch": 0.75, "learning_rate": 0.00017160814142824627, "loss": 1.775, "step": 2940 }, { "epoch": 0.75, "learning_rate": 0.00017151421931934242, "loss": 1.7789, "step": 2945 }, { "epoch": 0.76, "learning_rate": 0.00017142016791068802, "loss": 1.7481, "step": 2950 }, { "epoch": 0.76, "learning_rate": 0.0001713259873723308, "loss": 1.7979, "step": 2955 }, { "epoch": 0.76, "learning_rate": 0.00017123167787455175, "loss": 1.7033, "step": 2960 }, { "epoch": 0.76, "learning_rate": 0.00017113723958786525, "loss": 1.8301, "step": 2965 }, { "epoch": 0.76, "learning_rate": 0.00017104267268301836, "loss": 1.7979, "step": 2970 }, { "epoch": 0.76, "learning_rate": 0.00017094797733099073, "loss": 1.7681, "step": 2975 }, { "epoch": 0.76, "learning_rate": 0.00017085315370299426, "loss": 1.8129, "step": 2980 }, { "epoch": 0.76, "learning_rate": 0.0001707582019704728, "loss": 1.7753, "step": 2985 }, { "epoch": 0.77, "learning_rate": 0.00017066312230510183, "loss": 1.7478, "step": 2990 }, { "epoch": 0.77, "learning_rate": 0.00017056791487878804, "loss": 1.7916, "step": 2995 }, { "epoch": 0.77, "learning_rate": 0.00017047257986366917, "loss": 1.7704, "step": 3000 }, { "epoch": 0.77, "eval_loss": 1.5884343385696411, "eval_runtime": 2.03, "eval_samples_per_second": 49.262, "eval_steps_per_second": 1.97, "step": 3000 }, { "epoch": 0.77, "learning_rate": 0.0001703771174321137, "loss": 1.7495, "step": 3005 }, { "epoch": 0.77, "learning_rate": 0.00017028152775672042, "loss": 1.8345, "step": 3010 }, { "epoch": 0.77, "learning_rate": 0.00017018581101031814, "loss": 1.7485, "step": 3015 }, { "epoch": 0.77, "learning_rate": 0.00017008996736596552, "loss": 1.8157, "step": 3020 }, { "epoch": 0.77, "learning_rate": 0.0001699939969969506, "loss": 1.8166, "step": 3025 }, { "epoch": 0.78, "learning_rate": 0.00016989790007679054, "loss": 1.7762, "step": 3030 }, { "epoch": 0.78, "learning_rate": 0.0001698209315405876, "loss": 1.8121, "step": 3035 }, { "epoch": 0.78, "learning_rate": 0.00016974388221190808, "loss": 1.833, "step": 3040 }, { "epoch": 0.78, "learning_rate": 0.00016964745707247203, "loss": 1.7162, "step": 3045 }, { "epoch": 0.78, "learning_rate": 0.00016955090600844492, "loss": 1.7007, "step": 3050 }, { "epoch": 0.78, "learning_rate": 0.00016945422919439383, "loss": 1.736, "step": 3055 }, { "epoch": 0.78, "learning_rate": 0.0001693574268051132, "loss": 1.7525, "step": 3060 }, { "epoch": 0.78, "learning_rate": 0.00016926049901562452, "loss": 1.7361, "step": 3065 }, { "epoch": 0.79, "learning_rate": 0.000169163446001176, "loss": 1.7872, "step": 3070 }, { "epoch": 0.79, "learning_rate": 0.00016906626793724224, "loss": 1.8129, "step": 3075 }, { "epoch": 0.79, "learning_rate": 0.00016896896499952403, "loss": 1.8149, "step": 3080 }, { "epoch": 0.79, "learning_rate": 0.00016887153736394774, "loss": 1.7877, "step": 3085 }, { "epoch": 0.79, "learning_rate": 0.00016877398520666537, "loss": 1.7853, "step": 3090 }, { "epoch": 0.79, "learning_rate": 0.00016867630870405406, "loss": 1.7835, "step": 3095 }, { "epoch": 0.79, "learning_rate": 0.00016857850803271565, "loss": 1.7098, "step": 3100 }, { "epoch": 0.79, "learning_rate": 0.00016848058336947657, "loss": 1.7589, "step": 3105 }, { "epoch": 0.8, "learning_rate": 0.0001683825348913874, "loss": 1.767, "step": 3110 }, { "epoch": 0.8, "learning_rate": 0.0001682843627757226, "loss": 1.8099, "step": 3115 }, { "epoch": 0.8, "learning_rate": 0.00016818606719998014, "loss": 1.7039, "step": 3120 }, { "epoch": 0.8, "learning_rate": 0.00016808764834188122, "loss": 1.7686, "step": 3125 }, { "epoch": 0.8, "learning_rate": 0.00016798910637937, "loss": 1.7712, "step": 3130 }, { "epoch": 0.8, "learning_rate": 0.0001678904414906131, "loss": 1.7679, "step": 3135 }, { "epoch": 0.8, "learning_rate": 0.00016779165385399952, "loss": 1.7529, "step": 3140 }, { "epoch": 0.81, "learning_rate": 0.00016769274364814005, "loss": 1.7715, "step": 3145 }, { "epoch": 0.81, "learning_rate": 0.00016759371105186719, "loss": 1.7047, "step": 3150 }, { "epoch": 0.81, "learning_rate": 0.00016749455624423475, "loss": 1.8295, "step": 3155 }, { "epoch": 0.81, "learning_rate": 0.0001673952794045174, "loss": 1.751, "step": 3160 }, { "epoch": 0.81, "learning_rate": 0.00016729588071221055, "loss": 1.7657, "step": 3165 }, { "epoch": 0.81, "learning_rate": 0.00016719636034702982, "loss": 1.7863, "step": 3170 }, { "epoch": 0.81, "learning_rate": 0.0001670967184889109, "loss": 1.7807, "step": 3175 }, { "epoch": 0.81, "learning_rate": 0.00016699695531800912, "loss": 1.7969, "step": 3180 }, { "epoch": 0.82, "learning_rate": 0.00016691705755728843, "loss": 1.7769, "step": 3185 }, { "epoch": 0.82, "learning_rate": 0.00016681707647806884, "loss": 1.8018, "step": 3190 }, { "epoch": 0.82, "learning_rate": 0.0001667169745916671, "loss": 1.7613, "step": 3195 }, { "epoch": 0.82, "learning_rate": 0.0001666167520790703, "loss": 1.7569, "step": 3200 }, { "epoch": 0.82, "learning_rate": 0.00016651640912148362, "loss": 1.7482, "step": 3205 }, { "epoch": 0.82, "learning_rate": 0.00016641594590032993, "loss": 1.7362, "step": 3210 }, { "epoch": 0.82, "learning_rate": 0.0001663153625972497, "loss": 1.7373, "step": 3215 }, { "epoch": 0.82, "learning_rate": 0.00016621465939410033, "loss": 1.7994, "step": 3220 }, { "epoch": 0.83, "learning_rate": 0.00016611383647295615, "loss": 1.8034, "step": 3225 }, { "epoch": 0.83, "learning_rate": 0.00016601289401610786, "loss": 1.771, "step": 3230 }, { "epoch": 0.83, "learning_rate": 0.00016591183220606226, "loss": 1.7844, "step": 3235 }, { "epoch": 0.83, "learning_rate": 0.00016581065122554204, "loss": 1.7604, "step": 3240 }, { "epoch": 0.83, "learning_rate": 0.0001657093512574853, "loss": 1.7697, "step": 3245 }, { "epoch": 0.83, "learning_rate": 0.00016562822573508532, "loss": 1.7499, "step": 3250 }, { "epoch": 0.83, "learning_rate": 0.00016552671205115335, "loss": 1.7472, "step": 3255 }, { "epoch": 0.83, "learning_rate": 0.00016542507989305492, "loss": 1.7398, "step": 3260 }, { "epoch": 0.84, "learning_rate": 0.00016532332944454395, "loss": 1.7206, "step": 3265 }, { "epoch": 0.84, "learning_rate": 0.00016522146088958817, "loss": 1.8041, "step": 3270 }, { "epoch": 0.84, "learning_rate": 0.00016511947441236877, "loss": 1.7728, "step": 3275 }, { "epoch": 0.84, "learning_rate": 0.00016501737019728027, "loss": 1.7138, "step": 3280 }, { "epoch": 0.84, "learning_rate": 0.00016491514842893, "loss": 1.7569, "step": 3285 }, { "epoch": 0.84, "learning_rate": 0.00016481280929213777, "loss": 1.7633, "step": 3290 }, { "epoch": 0.84, "learning_rate": 0.00016471035297193576, "loss": 1.73, "step": 3295 }, { "epoch": 0.84, "learning_rate": 0.0001646283036681959, "loss": 1.7086, "step": 3300 }, { "epoch": 0.85, "learning_rate": 0.00016452563688481312, "loss": 1.7338, "step": 3305 }, { "epoch": 0.85, "learning_rate": 0.0001644228534372364, "loss": 1.7476, "step": 3310 }, { "epoch": 0.85, "learning_rate": 0.00016431995351130118, "loss": 1.7882, "step": 3315 }, { "epoch": 0.85, "learning_rate": 0.00016421693729305346, "loss": 1.7481, "step": 3320 }, { "epoch": 0.85, "learning_rate": 0.0001641138049687495, "loss": 1.7341, "step": 3325 }, { "epoch": 0.85, "learning_rate": 0.0001640105567248555, "loss": 1.7624, "step": 3330 }, { "epoch": 0.85, "learning_rate": 0.0001639071927480472, "loss": 1.6861, "step": 3335 }, { "epoch": 0.85, "learning_rate": 0.00016380371322520968, "loss": 1.7322, "step": 3340 }, { "epoch": 0.86, "learning_rate": 0.0001637001183434368, "loss": 1.7089, "step": 3345 }, { "epoch": 0.86, "learning_rate": 0.0001635964082900311, "loss": 1.7919, "step": 3350 }, { "epoch": 0.86, "learning_rate": 0.00016349258325250334, "loss": 1.7282, "step": 3355 }, { "epoch": 0.86, "learning_rate": 0.00016338864341857213, "loss": 1.7253, "step": 3360 }, { "epoch": 0.86, "learning_rate": 0.00016328458897616366, "loss": 1.7694, "step": 3365 }, { "epoch": 0.86, "learning_rate": 0.0001631804201134113, "loss": 1.7304, "step": 3370 }, { "epoch": 0.86, "learning_rate": 0.0001630761370186554, "loss": 1.7418, "step": 3375 }, { "epoch": 0.87, "learning_rate": 0.0001629717398804427, "loss": 1.7013, "step": 3380 }, { "epoch": 0.87, "learning_rate": 0.00016286722888752623, "loss": 1.7637, "step": 3385 }, { "epoch": 0.87, "learning_rate": 0.00016276260422886488, "loss": 1.8182, "step": 3390 }, { "epoch": 0.87, "learning_rate": 0.00016265786609362298, "loss": 1.7593, "step": 3395 }, { "epoch": 0.87, "learning_rate": 0.0001625530146711701, "loss": 1.6767, "step": 3400 }, { "epoch": 0.87, "learning_rate": 0.0001624480501510806, "loss": 1.7853, "step": 3405 }, { "epoch": 0.87, "learning_rate": 0.00016234297272313332, "loss": 1.7319, "step": 3410 }, { "epoch": 0.87, "learning_rate": 0.00016223778257731125, "loss": 1.7088, "step": 3415 }, { "epoch": 0.88, "learning_rate": 0.00016213247990380116, "loss": 1.7661, "step": 3420 }, { "epoch": 0.88, "learning_rate": 0.0001620270648929933, "loss": 1.7962, "step": 3425 }, { "epoch": 0.88, "learning_rate": 0.0001619215377354811, "loss": 1.7268, "step": 3430 }, { "epoch": 0.88, "learning_rate": 0.00016181589862206052, "loss": 1.7392, "step": 3435 }, { "epoch": 0.88, "learning_rate": 0.00016171014774373022, "loss": 1.7416, "step": 3440 }, { "epoch": 0.88, "learning_rate": 0.00016160428529169075, "loss": 1.7915, "step": 3445 }, { "epoch": 0.88, "learning_rate": 0.0001615407143447017, "loss": 1.7038, "step": 3450 }, { "epoch": 0.88, "learning_rate": 0.0001614346737729282, "loss": 1.7548, "step": 3455 }, { "epoch": 0.89, "learning_rate": 0.0001613285221255104, "loss": 1.7841, "step": 3460 }, { "epoch": 0.89, "learning_rate": 0.00016122225959437346, "loss": 1.768, "step": 3465 }, { "epoch": 0.89, "learning_rate": 0.000161115886371643, "loss": 1.7726, "step": 3470 }, { "epoch": 0.89, "learning_rate": 0.000161009402649645, "loss": 1.7424, "step": 3475 }, { "epoch": 0.89, "learning_rate": 0.00016090280862090495, "loss": 1.7308, "step": 3480 }, { "epoch": 0.89, "learning_rate": 0.0001607961044781479, "loss": 1.7352, "step": 3485 }, { "epoch": 0.89, "learning_rate": 0.00016068929041429804, "loss": 1.7744, "step": 3490 }, { "epoch": 0.89, "learning_rate": 0.00016058236662247824, "loss": 1.7397, "step": 3495 }, { "epoch": 0.9, "learning_rate": 0.00016047533329600975, "loss": 1.7218, "step": 3500 }, { "epoch": 0.9, "learning_rate": 0.00016036819062841188, "loss": 1.7494, "step": 3505 }, { "epoch": 0.9, "learning_rate": 0.00016026093881340163, "loss": 1.7849, "step": 3510 }, { "epoch": 0.9, "learning_rate": 0.00016015357804489337, "loss": 1.7749, "step": 3515 }, { "epoch": 0.9, "learning_rate": 0.00016004610851699837, "loss": 1.8122, "step": 3520 }, { "epoch": 0.9, "learning_rate": 0.0001599385304240247, "loss": 1.7413, "step": 3525 }, { "epoch": 0.9, "learning_rate": 0.00015983084396047653, "loss": 1.7451, "step": 3530 }, { "epoch": 0.9, "learning_rate": 0.00015972304932105412, "loss": 1.7287, "step": 3535 }, { "epoch": 0.91, "learning_rate": 0.00015961514670065325, "loss": 1.7468, "step": 3540 }, { "epoch": 0.91, "learning_rate": 0.00015950713629436493, "loss": 1.7306, "step": 3545 }, { "epoch": 0.91, "learning_rate": 0.00015939901829747506, "loss": 1.7094, "step": 3550 }, { "epoch": 0.91, "learning_rate": 0.00015929079290546408, "loss": 1.693, "step": 3555 }, { "epoch": 0.91, "learning_rate": 0.00015918246031400654, "loss": 1.7384, "step": 3560 }, { "epoch": 0.91, "learning_rate": 0.00015907402071897088, "loss": 1.7206, "step": 3565 }, { "epoch": 0.91, "learning_rate": 0.00015896547431641902, "loss": 1.7526, "step": 3570 }, { "epoch": 0.92, "learning_rate": 0.00015885682130260592, "loss": 1.7292, "step": 3575 }, { "epoch": 0.92, "learning_rate": 0.00015874806187397936, "loss": 1.6798, "step": 3580 }, { "epoch": 0.92, "learning_rate": 0.00015863919622717947, "loss": 1.7443, "step": 3585 }, { "epoch": 0.92, "learning_rate": 0.00015853022455903845, "loss": 1.7708, "step": 3590 }, { "epoch": 0.92, "learning_rate": 0.00015842114706658017, "loss": 1.7308, "step": 3595 }, { "epoch": 0.92, "learning_rate": 0.00015831196394701987, "loss": 1.725, "step": 3600 }, { "epoch": 0.92, "learning_rate": 0.00015820267539776376, "loss": 1.7344, "step": 3605 }, { "epoch": 0.92, "learning_rate": 0.00015809328161640865, "loss": 1.7279, "step": 3610 }, { "epoch": 0.93, "learning_rate": 0.00015798378280074165, "loss": 1.7488, "step": 3615 }, { "epoch": 0.93, "learning_rate": 0.00015787417914873967, "loss": 1.7, "step": 3620 }, { "epoch": 0.93, "learning_rate": 0.00015776447085856927, "loss": 1.7771, "step": 3625 }, { "epoch": 0.93, "learning_rate": 0.0001576546581285862, "loss": 1.771, "step": 3630 }, { "epoch": 0.93, "learning_rate": 0.00015754474115733502, "loss": 1.693, "step": 3635 }, { "epoch": 0.93, "learning_rate": 0.00015743472014354869, "loss": 1.7527, "step": 3640 }, { "epoch": 0.93, "learning_rate": 0.00015732459528614842, "loss": 1.731, "step": 3645 }, { "epoch": 0.93, "learning_rate": 0.00015721436678424304, "loss": 1.7262, "step": 3650 }, { "epoch": 0.94, "learning_rate": 0.00015710403483712885, "loss": 1.765, "step": 3655 }, { "epoch": 0.94, "learning_rate": 0.00015699359964428916, "loss": 1.7769, "step": 3660 }, { "epoch": 0.94, "learning_rate": 0.00015688306140539395, "loss": 1.8061, "step": 3665 }, { "epoch": 0.94, "learning_rate": 0.00015677242032029952, "loss": 1.7197, "step": 3670 }, { "epoch": 0.94, "learning_rate": 0.0001566616765890481, "loss": 1.746, "step": 3675 }, { "epoch": 0.94, "learning_rate": 0.00015655083041186754, "loss": 1.7236, "step": 3680 }, { "epoch": 0.94, "learning_rate": 0.00015643988198917085, "loss": 1.7476, "step": 3685 }, { "epoch": 0.94, "learning_rate": 0.00015632883152155597, "loss": 1.6906, "step": 3690 }, { "epoch": 0.95, "learning_rate": 0.00015621767920980534, "loss": 1.7522, "step": 3695 }, { "epoch": 0.95, "learning_rate": 0.0001561064252548855, "loss": 1.7211, "step": 3700 }, { "epoch": 0.95, "learning_rate": 0.00015599506985794674, "loss": 1.7704, "step": 3705 }, { "epoch": 0.95, "learning_rate": 0.00015588361322032283, "loss": 1.7486, "step": 3710 }, { "epoch": 0.95, "learning_rate": 0.00015577205554353056, "loss": 1.6846, "step": 3715 }, { "epoch": 0.95, "learning_rate": 0.0001556603970292694, "loss": 1.7253, "step": 3720 }, { "epoch": 0.95, "learning_rate": 0.0001555709977505413, "loss": 1.6592, "step": 3725 }, { "epoch": 0.95, "learning_rate": 0.00015545915823770047, "loss": 1.7007, "step": 3730 }, { "epoch": 0.96, "learning_rate": 0.00015534721845311797, "loss": 1.695, "step": 3735 }, { "epoch": 0.96, "learning_rate": 0.0001552351785991842, "loss": 1.7151, "step": 3740 }, { "epoch": 0.96, "learning_rate": 0.00015512303887847036, "loss": 1.7295, "step": 3745 }, { "epoch": 0.96, "learning_rate": 0.00015501079949372824, "loss": 1.7069, "step": 3750 }, { "epoch": 0.96, "learning_rate": 0.00015489846064788994, "loss": 1.7176, "step": 3755 }, { "epoch": 0.96, "learning_rate": 0.0001547860225440672, "loss": 1.6711, "step": 3760 }, { "epoch": 0.96, "learning_rate": 0.00015467348538555141, "loss": 1.73, "step": 3765 }, { "epoch": 0.97, "learning_rate": 0.0001545608493758129, "loss": 1.7381, "step": 3770 }, { "epoch": 0.97, "learning_rate": 0.00015444811471850085, "loss": 1.7319, "step": 3775 }, { "epoch": 0.97, "learning_rate": 0.00015433528161744274, "loss": 1.7279, "step": 3780 }, { "epoch": 0.97, "learning_rate": 0.00015422235027664393, "loss": 1.7489, "step": 3785 }, { "epoch": 0.97, "learning_rate": 0.00015410932090028764, "loss": 1.7643, "step": 3790 }, { "epoch": 0.97, "learning_rate": 0.00015401882695092537, "loss": 1.7515, "step": 3795 }, { "epoch": 0.97, "learning_rate": 0.00015392827049430752, "loss": 1.7379, "step": 3800 }, { "epoch": 0.97, "learning_rate": 0.00015381498718234938, "loss": 1.6704, "step": 3805 }, { "epoch": 0.98, "learning_rate": 0.00015370160657135782, "loss": 1.7248, "step": 3810 }, { "epoch": 0.98, "learning_rate": 0.0001535881288663282, "loss": 1.7526, "step": 3815 }, { "epoch": 0.98, "learning_rate": 0.00015347455427243146, "loss": 1.7318, "step": 3820 }, { "epoch": 0.98, "learning_rate": 0.00015336088299501373, "loss": 1.7539, "step": 3825 }, { "epoch": 0.98, "learning_rate": 0.00015324711523959588, "loss": 1.7613, "step": 3830 }, { "epoch": 0.98, "learning_rate": 0.0001531332512118733, "loss": 1.7455, "step": 3835 }, { "epoch": 0.98, "learning_rate": 0.00015301929111771542, "loss": 1.7224, "step": 3840 }, { "epoch": 0.98, "learning_rate": 0.00015290523516316528, "loss": 1.818, "step": 3845 }, { "epoch": 0.99, "learning_rate": 0.0001527910835544394, "loss": 1.7811, "step": 3850 }, { "epoch": 0.99, "learning_rate": 0.00015267683649792702, "loss": 1.7031, "step": 3855 }, { "epoch": 0.99, "learning_rate": 0.0001525624942001902, "loss": 1.7497, "step": 3860 }, { "epoch": 0.99, "learning_rate": 0.00015244805686796302, "loss": 1.6636, "step": 3865 }, { "epoch": 0.99, "learning_rate": 0.00015233352470815143, "loss": 1.7377, "step": 3870 }, { "epoch": 0.99, "learning_rate": 0.0001522188979278329, "loss": 1.7465, "step": 3875 }, { "epoch": 0.99, "learning_rate": 0.00015210417673425583, "loss": 1.7301, "step": 3880 }, { "epoch": 0.99, "learning_rate": 0.00015198936133483952, "loss": 1.6354, "step": 3885 }, { "epoch": 1.0, "learning_rate": 0.00015187445193717342, "loss": 1.7327, "step": 3890 }, { "epoch": 1.0, "learning_rate": 0.00015175944874901704, "loss": 1.7322, "step": 3895 }, { "epoch": 1.0, "learning_rate": 0.00015164435197829944, "loss": 1.7583, "step": 3900 }, { "epoch": 1.0, "learning_rate": 0.0001515291618331188, "loss": 1.7074, "step": 3905 }, { "epoch": 1.0, "learning_rate": 0.00015146000301290025, "loss": 1.5754, "step": 3910 }, { "epoch": 1.0, "learning_rate": 0.0001513446639018444, "loss": 1.474, "step": 3915 }, { "epoch": 1.0, "learning_rate": 0.0001512292319581702, "loss": 1.4232, "step": 3920 }, { "epoch": 1.0, "learning_rate": 0.00015111370739058182, "loss": 1.4191, "step": 3925 }, { "epoch": 1.01, "learning_rate": 0.00015099809040795102, "loss": 1.3778, "step": 3930 }, { "epoch": 1.01, "learning_rate": 0.00015088238121931658, "loss": 1.461, "step": 3935 }, { "epoch": 1.01, "learning_rate": 0.000150766580033884, "loss": 1.4005, "step": 3940 }, { "epoch": 1.01, "learning_rate": 0.0001506506870610252, "loss": 1.4257, "step": 3945 }, { "epoch": 1.01, "learning_rate": 0.00015053470251027794, "loss": 1.3933, "step": 3950 }, { "epoch": 1.01, "learning_rate": 0.00015041862659134558, "loss": 1.413, "step": 3955 }, { "epoch": 1.01, "learning_rate": 0.00015030245951409675, "loss": 1.4571, "step": 3960 }, { "epoch": 1.01, "learning_rate": 0.00015018620148856478, "loss": 1.3696, "step": 3965 }, { "epoch": 1.02, "learning_rate": 0.00015006985272494755, "loss": 1.4168, "step": 3970 }, { "epoch": 1.02, "learning_rate": 0.00014995341343360694, "loss": 1.3433, "step": 3975 }, { "epoch": 1.02, "learning_rate": 0.00014983688382506848, "loss": 1.4284, "step": 3980 }, { "epoch": 1.02, "learning_rate": 0.00014972026411002107, "loss": 1.4177, "step": 3985 }, { "epoch": 1.02, "learning_rate": 0.00014960355449931644, "loss": 1.4491, "step": 3990 }, { "epoch": 1.02, "learning_rate": 0.00014948675520396894, "loss": 1.3903, "step": 3995 }, { "epoch": 1.02, "learning_rate": 0.000149369866435155, "loss": 1.3637, "step": 4000 }, { "epoch": 1.02, "eval_loss": 1.5145312547683716, "eval_runtime": 2.0288, "eval_samples_per_second": 49.29, "eval_steps_per_second": 1.972, "step": 4000 }, { "epoch": 1.03, "learning_rate": 0.0001492528884042129, "loss": 1.4221, "step": 4005 }, { "epoch": 1.03, "learning_rate": 0.00014913582132264222, "loss": 1.4648, "step": 4010 }, { "epoch": 1.03, "learning_rate": 0.00014901866540210356, "loss": 1.4487, "step": 4015 }, { "epoch": 1.03, "learning_rate": 0.00014890142085441824, "loss": 1.4623, "step": 4020 }, { "epoch": 1.03, "learning_rate": 0.0001487840878915677, "loss": 1.3967, "step": 4025 }, { "epoch": 1.03, "learning_rate": 0.00014866666672569328, "loss": 1.3644, "step": 4030 }, { "epoch": 1.03, "learning_rate": 0.00014854915756909586, "loss": 1.4131, "step": 4035 }, { "epoch": 1.03, "learning_rate": 0.00014843156063423527, "loss": 1.4199, "step": 4040 }, { "epoch": 1.04, "learning_rate": 0.00014831387613373017, "loss": 1.4172, "step": 4045 }, { "epoch": 1.04, "learning_rate": 0.00014819610428035753, "loss": 1.4314, "step": 4050 }, { "epoch": 1.04, "learning_rate": 0.0001480782452870522, "loss": 1.4041, "step": 4055 }, { "epoch": 1.04, "learning_rate": 0.00014796029936690655, "loss": 1.4303, "step": 4060 }, { "epoch": 1.04, "learning_rate": 0.00014784226673317022, "loss": 1.4184, "step": 4065 }, { "epoch": 1.04, "learning_rate": 0.00014772414759924956, "loss": 1.4041, "step": 4070 }, { "epoch": 1.04, "learning_rate": 0.00014760594217870737, "loss": 1.401, "step": 4075 }, { "epoch": 1.04, "learning_rate": 0.00014751131585951993, "loss": 1.4062, "step": 4080 }, { "epoch": 1.05, "learning_rate": 0.00014739295566173482, "loss": 1.4252, "step": 4085 }, { "epoch": 1.05, "learning_rate": 0.00014727450977613274, "loss": 1.4373, "step": 4090 }, { "epoch": 1.05, "learning_rate": 0.00014715597841686708, "loss": 1.4052, "step": 4095 }, { "epoch": 1.05, "learning_rate": 0.00014703736179824597, "loss": 1.4319, "step": 4100 }, { "epoch": 1.05, "learning_rate": 0.00014691866013473166, "loss": 1.4012, "step": 4105 }, { "epoch": 1.05, "learning_rate": 0.0001467998736409401, "loss": 1.3806, "step": 4110 }, { "epoch": 1.05, "learning_rate": 0.0001466810025316408, "loss": 1.4366, "step": 4115 }, { "epoch": 1.05, "learning_rate": 0.00014656204702175597, "loss": 1.4409, "step": 4120 }, { "epoch": 1.06, "learning_rate": 0.0001464430073263606, "loss": 1.4041, "step": 4125 }, { "epoch": 1.06, "learning_rate": 0.00014632388366068191, "loss": 1.4567, "step": 4130 }, { "epoch": 1.06, "learning_rate": 0.00014620467624009884, "loss": 1.4992, "step": 4135 }, { "epoch": 1.06, "learning_rate": 0.0001460853852801418, "loss": 1.4411, "step": 4140 }, { "epoch": 1.06, "learning_rate": 0.00014596601099649232, "loss": 1.4584, "step": 4145 }, { "epoch": 1.06, "learning_rate": 0.00014587045172154828, "loss": 1.4097, "step": 4150 }, { "epoch": 1.06, "learning_rate": 0.00014575092799925022, "loss": 1.4295, "step": 4155 }, { "epoch": 1.06, "learning_rate": 0.000145631321557968, "loss": 1.4266, "step": 4160 }, { "epoch": 1.07, "learning_rate": 0.0001455116326139535, "loss": 1.3929, "step": 4165 }, { "epoch": 1.07, "learning_rate": 0.00014539186138360768, "loss": 1.4545, "step": 4170 }, { "epoch": 1.07, "learning_rate": 0.0001452720080834804, "loss": 1.3649, "step": 4175 }, { "epoch": 1.07, "learning_rate": 0.00014515207293026985, "loss": 1.4152, "step": 4180 }, { "epoch": 1.07, "learning_rate": 0.0001450320561408222, "loss": 1.4012, "step": 4185 }, { "epoch": 1.07, "learning_rate": 0.0001449119579321312, "loss": 1.3889, "step": 4190 }, { "epoch": 1.07, "learning_rate": 0.00014479177852133788, "loss": 1.4516, "step": 4195 }, { "epoch": 1.08, "learning_rate": 0.00014467151812573006, "loss": 1.3974, "step": 4200 }, { "epoch": 1.08, "learning_rate": 0.0001445511769627419, "loss": 1.4095, "step": 4205 }, { "epoch": 1.08, "learning_rate": 0.00014443075524995372, "loss": 1.4249, "step": 4210 }, { "epoch": 1.08, "learning_rate": 0.00014431025320509142, "loss": 1.418, "step": 4215 }, { "epoch": 1.08, "learning_rate": 0.0001441896710460261, "loss": 1.398, "step": 4220 }, { "epoch": 1.08, "learning_rate": 0.0001440690089907738, "loss": 1.3797, "step": 4225 }, { "epoch": 1.08, "learning_rate": 0.00014394826725749486, "loss": 1.4703, "step": 4230 }, { "epoch": 1.08, "learning_rate": 0.00014382744606449387, "loss": 1.4441, "step": 4235 }, { "epoch": 1.09, "learning_rate": 0.000143706545630219, "loss": 1.4367, "step": 4240 }, { "epoch": 1.09, "learning_rate": 0.00014358556617326162, "loss": 1.415, "step": 4245 }, { "epoch": 1.09, "learning_rate": 0.0001434645079123561, "loss": 1.4756, "step": 4250 }, { "epoch": 1.09, "learning_rate": 0.00014334337106637922, "loss": 1.4432, "step": 4255 }, { "epoch": 1.09, "learning_rate": 0.00014322215585434983, "loss": 1.4235, "step": 4260 }, { "epoch": 1.09, "learning_rate": 0.00014310086249542852, "loss": 1.4393, "step": 4265 }, { "epoch": 1.09, "learning_rate": 0.00014297949120891718, "loss": 1.4237, "step": 4270 }, { "epoch": 1.09, "learning_rate": 0.0001428580422142585, "loss": 1.3877, "step": 4275 }, { "epoch": 1.1, "learning_rate": 0.00014273651573103573, "loss": 1.4009, "step": 4280 }, { "epoch": 1.1, "learning_rate": 0.00014261491197897227, "loss": 1.4401, "step": 4285 }, { "epoch": 1.1, "learning_rate": 0.00014249323117793115, "loss": 1.3907, "step": 4290 }, { "epoch": 1.1, "learning_rate": 0.00014237147354791474, "loss": 1.3885, "step": 4295 }, { "epoch": 1.1, "learning_rate": 0.00014224963930906426, "loss": 1.3705, "step": 4300 }, { "epoch": 1.1, "learning_rate": 0.00014212772868165958, "loss": 1.4345, "step": 4305 }, { "epoch": 1.1, "learning_rate": 0.00014200574188611854, "loss": 1.5228, "step": 4310 }, { "epoch": 1.1, "learning_rate": 0.00014188367914299676, "loss": 1.4026, "step": 4315 }, { "epoch": 1.11, "learning_rate": 0.00014176154067298716, "loss": 1.4184, "step": 4320 }, { "epoch": 1.11, "learning_rate": 0.0001416393266969196, "loss": 1.4628, "step": 4325 }, { "epoch": 1.11, "learning_rate": 0.00014151703743576043, "loss": 1.4257, "step": 4330 }, { "epoch": 1.11, "learning_rate": 0.00014139467311061215, "loss": 1.4134, "step": 4335 }, { "epoch": 1.11, "learning_rate": 0.0001412722339427129, "loss": 1.4334, "step": 4340 }, { "epoch": 1.11, "learning_rate": 0.0001411497201534362, "loss": 1.4083, "step": 4345 }, { "epoch": 1.11, "learning_rate": 0.00014102713196429055, "loss": 1.4254, "step": 4350 }, { "epoch": 1.11, "learning_rate": 0.0001409044695969188, "loss": 1.4232, "step": 4355 }, { "epoch": 1.12, "learning_rate": 0.00014078173327309806, "loss": 1.4508, "step": 4360 }, { "epoch": 1.12, "learning_rate": 0.0001406589232147391, "loss": 1.4353, "step": 4365 }, { "epoch": 1.12, "learning_rate": 0.0001405606222283936, "loss": 1.3598, "step": 4370 }, { "epoch": 1.12, "learning_rate": 0.0001404376800075063, "loss": 1.4732, "step": 4375 }, { "epoch": 1.12, "learning_rate": 0.00014031466467413891, "loss": 1.5183, "step": 4380 }, { "epoch": 1.12, "learning_rate": 0.00014019157645070665, "loss": 1.3976, "step": 4385 }, { "epoch": 1.12, "learning_rate": 0.0001400684155597566, "loss": 1.4479, "step": 4390 }, { "epoch": 1.13, "learning_rate": 0.00013994518222396713, "loss": 1.4417, "step": 4395 }, { "epoch": 1.13, "learning_rate": 0.00013982187666614762, "loss": 1.4082, "step": 4400 }, { "epoch": 1.13, "learning_rate": 0.00013969849910923802, "loss": 1.4277, "step": 4405 }, { "epoch": 1.13, "learning_rate": 0.00013957504977630853, "loss": 1.4101, "step": 4410 }, { "epoch": 1.13, "learning_rate": 0.00013945152889055902, "loss": 1.4257, "step": 4415 }, { "epoch": 1.13, "learning_rate": 0.0001393279366753188, "loss": 1.4512, "step": 4420 }, { "epoch": 1.13, "learning_rate": 0.0001392042733540462, "loss": 1.4347, "step": 4425 }, { "epoch": 1.13, "learning_rate": 0.00013908053915032785, "loss": 1.4204, "step": 4430 }, { "epoch": 1.14, "learning_rate": 0.0001389567342878789, "loss": 1.3492, "step": 4435 }, { "epoch": 1.14, "learning_rate": 0.000138832858990542, "loss": 1.4595, "step": 4440 }, { "epoch": 1.14, "learning_rate": 0.00013870891348228726, "loss": 1.4881, "step": 4445 }, { "epoch": 1.14, "learning_rate": 0.0001385848979872116, "loss": 1.4345, "step": 4450 }, { "epoch": 1.14, "learning_rate": 0.00013846081272953867, "loss": 1.4488, "step": 4455 }, { "epoch": 1.14, "learning_rate": 0.00013833665793361818, "loss": 1.3982, "step": 4460 }, { "epoch": 1.14, "learning_rate": 0.00013821243382392548, "loss": 1.4072, "step": 4465 }, { "epoch": 1.14, "learning_rate": 0.00013811300478118284, "loss": 1.4407, "step": 4470 }, { "epoch": 1.15, "learning_rate": 0.00013798865647277801, "loss": 1.449, "step": 4475 }, { "epoch": 1.15, "learning_rate": 0.0001378891283623639, "loss": 1.3961, "step": 4480 }, { "epoch": 1.15, "learning_rate": 0.0001377646565836807, "loss": 1.4503, "step": 4485 }, { "epoch": 1.15, "learning_rate": 0.00013764011652542003, "loss": 1.4568, "step": 4490 }, { "epoch": 1.15, "learning_rate": 0.00013751550841275393, "loss": 1.4734, "step": 4495 }, { "epoch": 1.15, "learning_rate": 0.00013739083247097747, "loss": 1.4814, "step": 4500 }, { "epoch": 1.15, "learning_rate": 0.00013726608892550822, "loss": 1.4997, "step": 4505 }, { "epoch": 1.15, "learning_rate": 0.00013714127800188613, "loss": 1.4476, "step": 4510 }, { "epoch": 1.16, "learning_rate": 0.00013704138090235902, "loss": 1.4278, "step": 4515 }, { "epoch": 1.16, "learning_rate": 0.0001369164492668109, "loss": 1.409, "step": 4520 }, { "epoch": 1.16, "learning_rate": 0.00013679145088526842, "loss": 1.3598, "step": 4525 }, { "epoch": 1.16, "learning_rate": 0.00013666638598373225, "loss": 1.4, "step": 4530 }, { "epoch": 1.16, "learning_rate": 0.00013654125478832326, "loss": 1.4285, "step": 4535 }, { "epoch": 1.16, "learning_rate": 0.00013641605752528224, "loss": 1.4639, "step": 4540 }, { "epoch": 1.16, "learning_rate": 0.0001362907944209694, "loss": 1.4717, "step": 4545 }, { "epoch": 1.16, "learning_rate": 0.000136165465701864, "loss": 1.407, "step": 4550 }, { "epoch": 1.17, "learning_rate": 0.000136040071594564, "loss": 1.4288, "step": 4555 }, { "epoch": 1.17, "learning_rate": 0.00013591461232578545, "loss": 1.4438, "step": 4560 }, { "epoch": 1.17, "learning_rate": 0.0001357890881223624, "loss": 1.4144, "step": 4565 }, { "epoch": 1.17, "learning_rate": 0.00013566349921124608, "loss": 1.4002, "step": 4570 }, { "epoch": 1.17, "learning_rate": 0.00013553784581950485, "loss": 1.446, "step": 4575 }, { "epoch": 1.17, "learning_rate": 0.0001354121281743237, "loss": 1.4575, "step": 4580 }, { "epoch": 1.17, "learning_rate": 0.00013528634650300365, "loss": 1.401, "step": 4585 }, { "epoch": 1.17, "learning_rate": 0.00013516050103296157, "loss": 1.4169, "step": 4590 }, { "epoch": 1.18, "learning_rate": 0.00013503459199172967, "loss": 1.4195, "step": 4595 }, { "epoch": 1.18, "learning_rate": 0.0001349086196069551, "loss": 1.4326, "step": 4600 }, { "epoch": 1.18, "learning_rate": 0.00013478258410639952, "loss": 1.4582, "step": 4605 }, { "epoch": 1.18, "learning_rate": 0.0001346564857179387, "loss": 1.4867, "step": 4610 }, { "epoch": 1.18, "learning_rate": 0.00013453032466956218, "loss": 1.4168, "step": 4615 }, { "epoch": 1.18, "learning_rate": 0.00013440410118937266, "loss": 1.3966, "step": 4620 }, { "epoch": 1.18, "learning_rate": 0.00013427781550558586, "loss": 1.3888, "step": 4625 }, { "epoch": 1.19, "learning_rate": 0.00013415146784652985, "loss": 1.5074, "step": 4630 }, { "epoch": 1.19, "learning_rate": 0.00013402505844064487, "loss": 1.4581, "step": 4635 }, { "epoch": 1.19, "learning_rate": 0.00013389858751648265, "loss": 1.4293, "step": 4640 }, { "epoch": 1.19, "learning_rate": 0.0001337720553027063, "loss": 1.4107, "step": 4645 }, { "epoch": 1.19, "learning_rate": 0.00013364546202808964, "loss": 1.3778, "step": 4650 }, { "epoch": 1.19, "learning_rate": 0.000133518807921517, "loss": 1.3992, "step": 4655 }, { "epoch": 1.19, "learning_rate": 0.00013339209321198254, "loss": 1.4125, "step": 4660 }, { "epoch": 1.19, "learning_rate": 0.00013326531812859002, "loss": 1.4158, "step": 4665 }, { "epoch": 1.2, "learning_rate": 0.00013313848290055256, "loss": 1.4829, "step": 4670 }, { "epoch": 1.2, "learning_rate": 0.0001330115877571918, "loss": 1.4541, "step": 4675 }, { "epoch": 1.2, "learning_rate": 0.00013288463292793776, "loss": 1.4877, "step": 4680 }, { "epoch": 1.2, "learning_rate": 0.0001327576186423284, "loss": 1.4225, "step": 4685 }, { "epoch": 1.2, "learning_rate": 0.0001326305451300092, "loss": 1.4739, "step": 4690 }, { "epoch": 1.2, "learning_rate": 0.00013250341262073274, "loss": 1.4001, "step": 4695 }, { "epoch": 1.2, "learning_rate": 0.00013237622134435815, "loss": 1.4394, "step": 4700 }, { "epoch": 1.2, "learning_rate": 0.0001322489715308509, "loss": 1.4119, "step": 4705 }, { "epoch": 1.21, "learning_rate": 0.0001321216634102823, "loss": 1.3745, "step": 4710 }, { "epoch": 1.21, "learning_rate": 0.0001319942972128291, "loss": 1.4752, "step": 4715 }, { "epoch": 1.21, "learning_rate": 0.00013186687316877298, "loss": 1.4181, "step": 4720 }, { "epoch": 1.21, "learning_rate": 0.00013173939150850028, "loss": 1.4892, "step": 4725 }, { "epoch": 1.21, "learning_rate": 0.00013161185246250142, "loss": 1.4416, "step": 4730 }, { "epoch": 1.21, "learning_rate": 0.0001314842562613707, "loss": 1.4177, "step": 4735 }, { "epoch": 1.21, "learning_rate": 0.0001313566031358057, "loss": 1.4313, "step": 4740 }, { "epoch": 1.21, "learning_rate": 0.00013122889331660688, "loss": 1.3827, "step": 4745 }, { "epoch": 1.22, "learning_rate": 0.00013110112703467724, "loss": 1.4574, "step": 4750 }, { "epoch": 1.22, "learning_rate": 0.0001309733045210219, "loss": 1.4436, "step": 4755 }, { "epoch": 1.22, "learning_rate": 0.00013084542600674755, "loss": 1.4202, "step": 4760 }, { "epoch": 1.22, "learning_rate": 0.00013071749172306228, "loss": 1.424, "step": 4765 }, { "epoch": 1.22, "learning_rate": 0.00013058950190127487, "loss": 1.4608, "step": 4770 }, { "epoch": 1.22, "learning_rate": 0.0001304614567727946, "loss": 1.3907, "step": 4775 }, { "epoch": 1.22, "learning_rate": 0.00013035898100476238, "loss": 1.4107, "step": 4780 }, { "epoch": 1.22, "learning_rate": 0.00013023083690770517, "loss": 1.4548, "step": 4785 }, { "epoch": 1.23, "learning_rate": 0.00013010263815243164, "loss": 1.4549, "step": 4790 }, { "epoch": 1.23, "learning_rate": 0.00013000003995005462, "loss": 1.4208, "step": 4795 }, { "epoch": 1.23, "learning_rate": 0.0001298717433941611, "loss": 1.4437, "step": 4800 }, { "epoch": 1.23, "learning_rate": 0.0001297433928293026, "loss": 1.4486, "step": 4805 }, { "epoch": 1.23, "learning_rate": 0.00012961498848754058, "loss": 1.407, "step": 4810 }, { "epoch": 1.23, "learning_rate": 0.00012948653060103376, "loss": 1.3718, "step": 4815 }, { "epoch": 1.23, "learning_rate": 0.0001293580194020377, "loss": 1.4805, "step": 4820 }, { "epoch": 1.24, "learning_rate": 0.00012922945512290428, "loss": 1.45, "step": 4825 }, { "epoch": 1.24, "learning_rate": 0.00012910083799608137, "loss": 1.4157, "step": 4830 }, { "epoch": 1.24, "learning_rate": 0.00012897216825411245, "loss": 1.3946, "step": 4835 }, { "epoch": 1.24, "learning_rate": 0.00012884344612963607, "loss": 1.4229, "step": 4840 }, { "epoch": 1.24, "learning_rate": 0.00012871467185538552, "loss": 1.4577, "step": 4845 }, { "epoch": 1.24, "learning_rate": 0.00012858584566418838, "loss": 1.4174, "step": 4850 }, { "epoch": 1.24, "learning_rate": 0.00012845696778896598, "loss": 1.4712, "step": 4855 }, { "epoch": 1.24, "learning_rate": 0.0001283280384627333, "loss": 1.4475, "step": 4860 }, { "epoch": 1.25, "learning_rate": 0.00012819905791859829, "loss": 1.3623, "step": 4865 }, { "epoch": 1.25, "learning_rate": 0.00012807002638976132, "loss": 1.418, "step": 4870 }, { "epoch": 1.25, "learning_rate": 0.00012794094410951515, "loss": 1.3749, "step": 4875 }, { "epoch": 1.25, "learning_rate": 0.0001278118113112442, "loss": 1.4437, "step": 4880 }, { "epoch": 1.25, "learning_rate": 0.0001276826282284243, "loss": 1.4181, "step": 4885 }, { "epoch": 1.25, "learning_rate": 0.00012755339509462214, "loss": 1.384, "step": 4890 }, { "epoch": 1.25, "learning_rate": 0.00012742411214349488, "loss": 1.39, "step": 4895 }, { "epoch": 1.25, "learning_rate": 0.0001273206500711942, "loss": 1.4391, "step": 4900 }, { "epoch": 1.26, "learning_rate": 0.00012719127803798518, "loss": 1.4204, "step": 4905 }, { "epoch": 1.26, "learning_rate": 0.00012706185684216906, "loss": 1.3879, "step": 4910 }, { "epoch": 1.26, "learning_rate": 0.00012693238671774307, "loss": 1.4231, "step": 4915 }, { "epoch": 1.26, "learning_rate": 0.0001268028678987929, "loss": 1.3655, "step": 4920 }, { "epoch": 1.26, "learning_rate": 0.00012667330061949224, "loss": 1.4627, "step": 4925 }, { "epoch": 1.26, "learning_rate": 0.00012654368511410245, "loss": 1.4352, "step": 4930 }, { "epoch": 1.26, "learning_rate": 0.00012641402161697206, "loss": 1.4179, "step": 4935 }, { "epoch": 1.26, "learning_rate": 0.0001262843103625364, "loss": 1.4422, "step": 4940 }, { "epoch": 1.27, "learning_rate": 0.00012618050713132368, "loss": 1.3732, "step": 4945 }, { "epoch": 1.27, "learning_rate": 0.00012605071050479077, "loss": 1.4328, "step": 4950 }, { "epoch": 1.27, "learning_rate": 0.00012592086677782946, "loss": 1.357, "step": 4955 }, { "epoch": 1.27, "learning_rate": 0.00012579097618520087, "loss": 1.4299, "step": 4960 }, { "epoch": 1.27, "learning_rate": 0.0001256610389617509, "loss": 1.4426, "step": 4965 }, { "epoch": 1.27, "learning_rate": 0.00012555705576666986, "loss": 1.4366, "step": 4970 }, { "epoch": 1.27, "learning_rate": 0.00012542703519982198, "loss": 1.4408, "step": 4975 }, { "epoch": 1.27, "learning_rate": 0.0001252969686601684, "loss": 1.4207, "step": 4980 }, { "epoch": 1.28, "learning_rate": 0.00012516685638287319, "loss": 1.3681, "step": 4985 }, { "epoch": 1.28, "learning_rate": 0.000125036698603183, "loss": 1.3886, "step": 4990 }, { "epoch": 1.28, "learning_rate": 0.00012490649555642693, "loss": 1.3916, "step": 4995 }, { "epoch": 1.28, "learning_rate": 0.00012477624747801567, "loss": 1.4379, "step": 5000 }, { "epoch": 1.28, "eval_loss": 1.4501123428344727, "eval_runtime": 2.0229, "eval_samples_per_second": 49.433, "eval_steps_per_second": 1.977, "step": 5000 }, { "epoch": 1.28, "learning_rate": 0.00012464595460344154, "loss": 1.3558, "step": 5005 }, { "epoch": 1.28, "learning_rate": 0.00012451561716827778, "loss": 1.4617, "step": 5010 }, { "epoch": 1.28, "learning_rate": 0.00012438523540817816, "loss": 1.4486, "step": 5015 }, { "epoch": 1.29, "learning_rate": 0.00012425480955887663, "loss": 1.4118, "step": 5020 }, { "epoch": 1.29, "learning_rate": 0.00012412433985618685, "loss": 1.4532, "step": 5025 }, { "epoch": 1.29, "learning_rate": 0.0001239938265360018, "loss": 1.406, "step": 5030 }, { "epoch": 1.29, "learning_rate": 0.00012386326983429327, "loss": 1.4404, "step": 5035 }, { "epoch": 1.29, "learning_rate": 0.0001237326699871115, "loss": 1.4229, "step": 5040 }, { "epoch": 1.29, "learning_rate": 0.00012360202723058473, "loss": 1.3981, "step": 5045 }, { "epoch": 1.29, "learning_rate": 0.0001234713418009188, "loss": 1.4176, "step": 5050 }, { "epoch": 1.29, "learning_rate": 0.00012334061393439675, "loss": 1.4095, "step": 5055 }, { "epoch": 1.3, "learning_rate": 0.00012320984386737823, "loss": 1.4348, "step": 5060 }, { "epoch": 1.3, "learning_rate": 0.00012307903183629928, "loss": 1.4238, "step": 5065 }, { "epoch": 1.3, "learning_rate": 0.00012294817807767175, "loss": 1.3832, "step": 5070 }, { "epoch": 1.3, "learning_rate": 0.00012281728282808306, "loss": 1.4585, "step": 5075 }, { "epoch": 1.3, "learning_rate": 0.0001226863463241955, "loss": 1.4286, "step": 5080 }, { "epoch": 1.3, "learning_rate": 0.00012255536880274596, "loss": 1.4371, "step": 5085 }, { "epoch": 1.3, "learning_rate": 0.00012242435050054563, "loss": 1.3724, "step": 5090 }, { "epoch": 1.3, "learning_rate": 0.0001222932916544793, "loss": 1.389, "step": 5095 }, { "epoch": 1.31, "learning_rate": 0.00012216219250150512, "loss": 1.4051, "step": 5100 }, { "epoch": 1.31, "learning_rate": 0.00012203105327865407, "loss": 1.4805, "step": 5105 }, { "epoch": 1.31, "learning_rate": 0.00012189987422302964, "loss": 1.4422, "step": 5110 }, { "epoch": 1.31, "learning_rate": 0.0001217686555718073, "loss": 1.4036, "step": 5115 }, { "epoch": 1.31, "learning_rate": 0.00012163739756223411, "loss": 1.344, "step": 5120 }, { "epoch": 1.31, "learning_rate": 0.00012150610043162832, "loss": 1.4108, "step": 5125 }, { "epoch": 1.31, "learning_rate": 0.00012137476441737888, "loss": 1.4028, "step": 5130 }, { "epoch": 1.31, "learning_rate": 0.00012124338975694503, "loss": 1.4541, "step": 5135 }, { "epoch": 1.32, "learning_rate": 0.00012111197668785592, "loss": 1.4288, "step": 5140 }, { "epoch": 1.32, "learning_rate": 0.00012098052544771019, "loss": 1.4387, "step": 5145 }, { "epoch": 1.32, "learning_rate": 0.00012084903627417535, "loss": 1.4569, "step": 5150 }, { "epoch": 1.32, "learning_rate": 0.00012071750940498761, "loss": 1.4091, "step": 5155 }, { "epoch": 1.32, "learning_rate": 0.00012058594507795131, "loss": 1.4269, "step": 5160 }, { "epoch": 1.32, "learning_rate": 0.00012045434353093851, "loss": 1.3892, "step": 5165 }, { "epoch": 1.32, "learning_rate": 0.00012032270500188858, "loss": 1.427, "step": 5170 }, { "epoch": 1.32, "learning_rate": 0.00012019102972880774, "loss": 1.4119, "step": 5175 }, { "epoch": 1.33, "learning_rate": 0.00012005931794976859, "loss": 1.4303, "step": 5180 }, { "epoch": 1.33, "learning_rate": 0.00011992756990290984, "loss": 1.4263, "step": 5185 }, { "epoch": 1.33, "learning_rate": 0.00011979578582643569, "loss": 1.4554, "step": 5190 }, { "epoch": 1.33, "learning_rate": 0.00011966396595861555, "loss": 1.4347, "step": 5195 }, { "epoch": 1.33, "learning_rate": 0.00011953211053778351, "loss": 1.4639, "step": 5200 }, { "epoch": 1.33, "learning_rate": 0.00011945298031900956, "loss": 1.4315, "step": 5205 }, { "epoch": 1.33, "learning_rate": 0.0001193210685092536, "loss": 1.3672, "step": 5210 }, { "epoch": 1.33, "learning_rate": 0.0001191891217664541, "loss": 1.383, "step": 5215 }, { "epoch": 1.34, "learning_rate": 0.00011905714032917448, "loss": 1.4325, "step": 5220 }, { "epoch": 1.34, "learning_rate": 0.00011892512443604102, "loss": 1.411, "step": 5225 }, { "epoch": 1.34, "learning_rate": 0.00011879307432574223, "loss": 1.3938, "step": 5230 }, { "epoch": 1.34, "learning_rate": 0.00011866099023702841, "loss": 1.4079, "step": 5235 }, { "epoch": 1.34, "learning_rate": 0.00011852887240871145, "loss": 1.4273, "step": 5240 }, { "epoch": 1.34, "learning_rate": 0.00011839672107966406, "loss": 1.3616, "step": 5245 }, { "epoch": 1.34, "learning_rate": 0.00011826453648881972, "loss": 1.3725, "step": 5250 }, { "epoch": 1.35, "learning_rate": 0.00011813231887517189, "loss": 1.3689, "step": 5255 }, { "epoch": 1.35, "learning_rate": 0.00011800006847777377, "loss": 1.3701, "step": 5260 }, { "epoch": 1.35, "learning_rate": 0.00011786778553573787, "loss": 1.3809, "step": 5265 }, { "epoch": 1.35, "learning_rate": 0.00011773547028823557, "loss": 1.4298, "step": 5270 }, { "epoch": 1.35, "learning_rate": 0.00011760312297449656, "loss": 1.3978, "step": 5275 }, { "epoch": 1.35, "learning_rate": 0.0001174707438338086, "loss": 1.3769, "step": 5280 }, { "epoch": 1.35, "learning_rate": 0.00011733833310551692, "loss": 1.3599, "step": 5285 }, { "epoch": 1.35, "learning_rate": 0.0001172058910290239, "loss": 1.4283, "step": 5290 }, { "epoch": 1.36, "learning_rate": 0.00011707341784378864, "loss": 1.4419, "step": 5295 }, { "epoch": 1.36, "learning_rate": 0.00011694091378932641, "loss": 1.3855, "step": 5300 }, { "epoch": 1.36, "learning_rate": 0.00011680837910520834, "loss": 1.458, "step": 5305 }, { "epoch": 1.36, "learning_rate": 0.00011667581403106089, "loss": 1.3965, "step": 5310 }, { "epoch": 1.36, "learning_rate": 0.00011654321880656552, "loss": 1.4395, "step": 5315 }, { "epoch": 1.36, "learning_rate": 0.00011641059367145819, "loss": 1.4491, "step": 5320 }, { "epoch": 1.36, "learning_rate": 0.00011630447218886882, "loss": 1.4088, "step": 5325 }, { "epoch": 1.36, "learning_rate": 0.00011617179381896688, "loss": 1.43, "step": 5330 }, { "epoch": 1.37, "learning_rate": 0.00011603908620999997, "loss": 1.4521, "step": 5335 }, { "epoch": 1.37, "learning_rate": 0.00011590634960190721, "loss": 1.3744, "step": 5340 }, { "epoch": 1.37, "learning_rate": 0.00011577358423468021, "loss": 1.4356, "step": 5345 }, { "epoch": 1.37, "learning_rate": 0.0001156407903483625, "loss": 1.4306, "step": 5350 }, { "epoch": 1.37, "learning_rate": 0.00011550796818304925, "loss": 1.4369, "step": 5355 }, { "epoch": 1.37, "learning_rate": 0.00011540169025129845, "loss": 1.3815, "step": 5360 }, { "epoch": 1.37, "learning_rate": 0.00011526881778899591, "loss": 1.431, "step": 5365 }, { "epoch": 1.37, "learning_rate": 0.00011513591772023478, "loss": 1.4024, "step": 5370 }, { "epoch": 1.38, "learning_rate": 0.00011500299028530228, "loss": 1.4084, "step": 5375 }, { "epoch": 1.38, "learning_rate": 0.00011487003572453496, "loss": 1.4309, "step": 5380 }, { "epoch": 1.38, "learning_rate": 0.00011473705427831842, "loss": 1.4089, "step": 5385 }, { "epoch": 1.38, "learning_rate": 0.00011460404618708698, "loss": 1.4052, "step": 5390 }, { "epoch": 1.38, "learning_rate": 0.00011447101169132302, "loss": 1.425, "step": 5395 }, { "epoch": 1.38, "learning_rate": 0.00011433795103155676, "loss": 1.3861, "step": 5400 }, { "epoch": 1.38, "learning_rate": 0.00011420486444836562, "loss": 1.4156, "step": 5405 }, { "epoch": 1.38, "learning_rate": 0.00011407175218237394, "loss": 1.4007, "step": 5410 }, { "epoch": 1.39, "learning_rate": 0.00011393861447425252, "loss": 1.3826, "step": 5415 }, { "epoch": 1.39, "learning_rate": 0.00011380545156471815, "loss": 1.4594, "step": 5420 }, { "epoch": 1.39, "learning_rate": 0.00011367226369453316, "loss": 1.3712, "step": 5425 }, { "epoch": 1.39, "learning_rate": 0.00011353905110450503, "loss": 1.4281, "step": 5430 }, { "epoch": 1.39, "learning_rate": 0.00011340581403548589, "loss": 1.4149, "step": 5435 }, { "epoch": 1.39, "learning_rate": 0.00011327255272837221, "loss": 1.3817, "step": 5440 }, { "epoch": 1.39, "learning_rate": 0.00011313926742410421, "loss": 1.4171, "step": 5445 }, { "epoch": 1.4, "learning_rate": 0.00011300595836366555, "loss": 1.4425, "step": 5450 }, { "epoch": 1.4, "learning_rate": 0.0001128726257880828, "loss": 1.3551, "step": 5455 }, { "epoch": 1.4, "learning_rate": 0.00011273926993842505, "loss": 1.3763, "step": 5460 }, { "epoch": 1.4, "learning_rate": 0.00011260589105580352, "loss": 1.3562, "step": 5465 }, { "epoch": 1.4, "learning_rate": 0.00011247248938137103, "loss": 1.3517, "step": 5470 }, { "epoch": 1.4, "learning_rate": 0.00011233906515632158, "loss": 1.4338, "step": 5475 }, { "epoch": 1.4, "learning_rate": 0.00011223230970194626, "loss": 1.4113, "step": 5480 }, { "epoch": 1.4, "learning_rate": 0.0001120988454937252, "loss": 1.4015, "step": 5485 }, { "epoch": 1.41, "learning_rate": 0.00011196535941044599, "loss": 1.4032, "step": 5490 }, { "epoch": 1.41, "learning_rate": 0.00011183185169345534, "loss": 1.4157, "step": 5495 }, { "epoch": 1.41, "learning_rate": 0.00011169832258413897, "loss": 1.4636, "step": 5500 }, { "epoch": 1.41, "learning_rate": 0.00011156477232392133, "loss": 1.3721, "step": 5505 }, { "epoch": 1.41, "learning_rate": 0.00011145791704936, "loss": 1.4165, "step": 5510 }, { "epoch": 1.41, "learning_rate": 0.00011132432932603147, "loss": 1.4159, "step": 5515 }, { "epoch": 1.41, "learning_rate": 0.00011119072112799218, "loss": 1.3732, "step": 5520 }, { "epoch": 1.41, "learning_rate": 0.0001110570926968096, "loss": 1.3909, "step": 5525 }, { "epoch": 1.42, "learning_rate": 0.00011092344427408767, "loss": 1.4006, "step": 5530 }, { "epoch": 1.42, "learning_rate": 0.00011078977610146661, "loss": 1.3661, "step": 5535 }, { "epoch": 1.42, "learning_rate": 0.00011065608842062226, "loss": 1.4046, "step": 5540 }, { "epoch": 1.42, "learning_rate": 0.00011052238147326575, "loss": 1.4093, "step": 5545 }, { "epoch": 1.42, "learning_rate": 0.00011038865550114314, "loss": 1.4184, "step": 5550 }, { "epoch": 1.42, "learning_rate": 0.00011025491074603473, "loss": 1.4129, "step": 5555 }, { "epoch": 1.42, "learning_rate": 0.00011012114744975488, "loss": 1.442, "step": 5560 }, { "epoch": 1.42, "learning_rate": 0.00010998736585415143, "loss": 1.4422, "step": 5565 }, { "epoch": 1.43, "learning_rate": 0.00010985356620110534, "loss": 1.3884, "step": 5570 }, { "epoch": 1.43, "learning_rate": 0.00010971974873253024, "loss": 1.4395, "step": 5575 }, { "epoch": 1.43, "learning_rate": 0.0001095859136903719, "loss": 1.4436, "step": 5580 }, { "epoch": 1.43, "learning_rate": 0.00010945206131660786, "loss": 1.4015, "step": 5585 }, { "epoch": 1.43, "learning_rate": 0.0001093181918532471, "loss": 1.4198, "step": 5590 }, { "epoch": 1.43, "learning_rate": 0.0001091843055423294, "loss": 1.4142, "step": 5595 }, { "epoch": 1.43, "learning_rate": 0.00010905040262592501, "loss": 1.5011, "step": 5600 }, { "epoch": 1.43, "learning_rate": 0.00010891648334613427, "loss": 1.3874, "step": 5605 }, { "epoch": 1.44, "learning_rate": 0.00010878254794508704, "loss": 1.3769, "step": 5610 }, { "epoch": 1.44, "learning_rate": 0.0001086753881796745, "loss": 1.458, "step": 5615 }, { "epoch": 1.44, "learning_rate": 0.00010854142437062567, "loss": 1.4166, "step": 5620 }, { "epoch": 1.44, "learning_rate": 0.00010840744511843768, "loss": 1.3609, "step": 5625 }, { "epoch": 1.44, "learning_rate": 0.00010827345066534887, "loss": 1.4003, "step": 5630 }, { "epoch": 1.44, "learning_rate": 0.0001081662443210309, "loss": 1.4183, "step": 5635 }, { "epoch": 1.44, "learning_rate": 0.00010803222311684858, "loss": 1.4088, "step": 5640 }, { "epoch": 1.45, "learning_rate": 0.00010792499568567884, "loss": 1.4285, "step": 5645 }, { "epoch": 1.45, "learning_rate": 0.00010779094851550066, "loss": 1.4227, "step": 5650 }, { "epoch": 1.45, "learning_rate": 0.00010765688725906506, "loss": 1.3918, "step": 5655 }, { "epoch": 1.45, "learning_rate": 0.00010752281215875871, "loss": 1.4503, "step": 5660 }, { "epoch": 1.45, "learning_rate": 0.00010738872345699313, "loss": 1.4094, "step": 5665 }, { "epoch": 1.45, "learning_rate": 0.00010725462139620459, "loss": 1.3559, "step": 5670 }, { "epoch": 1.45, "learning_rate": 0.00010712050621885343, "loss": 1.3442, "step": 5675 }, { "epoch": 1.45, "learning_rate": 0.0001069863781674237, "loss": 1.3906, "step": 5680 }, { "epoch": 1.46, "learning_rate": 0.00010685223748442275, "loss": 1.3976, "step": 5685 }, { "epoch": 1.46, "learning_rate": 0.00010671808441238078, "loss": 1.3859, "step": 5690 }, { "epoch": 1.46, "learning_rate": 0.00010658391919385035, "loss": 1.4548, "step": 5695 }, { "epoch": 1.46, "learning_rate": 0.00010644974207140608, "loss": 1.4269, "step": 5700 }, { "epoch": 1.46, "learning_rate": 0.00010631555328764393, "loss": 1.4029, "step": 5705 }, { "epoch": 1.46, "learning_rate": 0.00010618135308518111, "loss": 1.4477, "step": 5710 }, { "epoch": 1.46, "learning_rate": 0.00010604714170665544, "loss": 1.3715, "step": 5715 }, { "epoch": 1.46, "learning_rate": 0.00010591291939472487, "loss": 1.4253, "step": 5720 }, { "epoch": 1.47, "learning_rate": 0.00010577868639206722, "loss": 1.3838, "step": 5725 }, { "epoch": 1.47, "learning_rate": 0.00010564444294137956, "loss": 1.3944, "step": 5730 }, { "epoch": 1.47, "learning_rate": 0.0001055101892853779, "loss": 1.3913, "step": 5735 }, { "epoch": 1.47, "learning_rate": 0.00010537592566679669, "loss": 1.3901, "step": 5740 }, { "epoch": 1.47, "learning_rate": 0.00010524165232838836, "loss": 1.4328, "step": 5745 }, { "epoch": 1.47, "learning_rate": 0.00010510736951292295, "loss": 1.3933, "step": 5750 }, { "epoch": 1.47, "learning_rate": 0.00010497307746318762, "loss": 1.4321, "step": 5755 }, { "epoch": 1.47, "learning_rate": 0.0001048656373378887, "loss": 1.4344, "step": 5760 }, { "epoch": 1.48, "learning_rate": 0.0001047313292783442, "loss": 1.4141, "step": 5765 }, { "epoch": 1.48, "learning_rate": 0.00010459701266442126, "loss": 1.3427, "step": 5770 }, { "epoch": 1.48, "learning_rate": 0.00010446268773896817, "loss": 1.3734, "step": 5775 }, { "epoch": 1.48, "learning_rate": 0.00010432835474484825, "loss": 1.4039, "step": 5780 }, { "epoch": 1.48, "learning_rate": 0.00010419401392493935, "loss": 1.3891, "step": 5785 }, { "epoch": 1.48, "learning_rate": 0.00010405966552213351, "loss": 1.4056, "step": 5790 }, { "epoch": 1.48, "learning_rate": 0.00010392530977933648, "loss": 1.3927, "step": 5795 }, { "epoch": 1.48, "learning_rate": 0.00010379094693946732, "loss": 1.3752, "step": 5800 }, { "epoch": 1.49, "learning_rate": 0.00010368345172092979, "loss": 1.3876, "step": 5805 }, { "epoch": 1.49, "learning_rate": 0.00010357595224012906, "loss": 1.3996, "step": 5810 }, { "epoch": 1.49, "learning_rate": 0.0001034415720848241, "loss": 1.3784, "step": 5815 }, { "epoch": 1.49, "learning_rate": 0.00010330718570705853, "loss": 1.374, "step": 5820 }, { "epoch": 1.49, "learning_rate": 0.00010317279334980678, "loss": 1.3702, "step": 5825 }, { "epoch": 1.49, "learning_rate": 0.00010303839525605405, "loss": 1.3943, "step": 5830 }, { "epoch": 1.49, "learning_rate": 0.00010290399166879594, "loss": 1.4236, "step": 5835 }, { "epoch": 1.49, "learning_rate": 0.00010276958283103797, "loss": 1.3874, "step": 5840 }, { "epoch": 1.5, "learning_rate": 0.00010263516898579517, "loss": 1.3695, "step": 5845 }, { "epoch": 1.5, "learning_rate": 0.00010250075037609161, "loss": 1.3526, "step": 5850 }, { "epoch": 1.5, "learning_rate": 0.00010236632724495993, "loss": 1.3377, "step": 5855 }, { "epoch": 1.5, "learning_rate": 0.00010223189983544104, "loss": 1.3969, "step": 5860 }, { "epoch": 1.5, "learning_rate": 0.00010209746839058354, "loss": 1.3599, "step": 5865 }, { "epoch": 1.5, "learning_rate": 0.00010196303315344326, "loss": 1.4172, "step": 5870 }, { "epoch": 1.5, "learning_rate": 0.00010182859436708303, "loss": 1.4567, "step": 5875 }, { "epoch": 1.51, "learning_rate": 0.00010169415227457196, "loss": 1.414, "step": 5880 }, { "epoch": 1.51, "learning_rate": 0.00010155970711898519, "loss": 1.3855, "step": 5885 }, { "epoch": 1.51, "learning_rate": 0.00010142525914340344, "loss": 1.3983, "step": 5890 }, { "epoch": 1.51, "learning_rate": 0.00010129080859091242, "loss": 1.3987, "step": 5895 }, { "epoch": 1.51, "learning_rate": 0.00010115635570460262, "loss": 1.4195, "step": 5900 }, { "epoch": 1.51, "learning_rate": 0.00010102190072756872, "loss": 1.4462, "step": 5905 }, { "epoch": 1.51, "learning_rate": 0.0001008874439029091, "loss": 1.4056, "step": 5910 }, { "epoch": 1.51, "learning_rate": 0.0001007529854737256, "loss": 1.4045, "step": 5915 }, { "epoch": 1.52, "learning_rate": 0.00010061852568312282, "loss": 1.378, "step": 5920 }, { "epoch": 1.52, "learning_rate": 0.00010048406477420795, "loss": 1.4047, "step": 5925 }, { "epoch": 1.52, "learning_rate": 0.00010034960299009018, "loss": 1.4246, "step": 5930 }, { "epoch": 1.52, "learning_rate": 0.00010021514057388019, "loss": 1.4401, "step": 5935 }, { "epoch": 1.52, "learning_rate": 0.0001000806777686899, "loss": 1.4515, "step": 5940 }, { "epoch": 1.52, "learning_rate": 9.994621481763191e-05, "loss": 1.3925, "step": 5945 }, { "epoch": 1.52, "learning_rate": 9.981175196381905e-05, "loss": 1.4343, "step": 5950 }, { "epoch": 1.52, "learning_rate": 9.967728945036407e-05, "loss": 1.4574, "step": 5955 }, { "epoch": 1.53, "learning_rate": 9.954282752037893e-05, "loss": 1.3839, "step": 5960 }, { "epoch": 1.53, "learning_rate": 9.940836641697465e-05, "loss": 1.4161, "step": 5965 }, { "epoch": 1.53, "learning_rate": 9.927390638326081e-05, "loss": 1.3791, "step": 5970 }, { "epoch": 1.53, "learning_rate": 9.913944766234502e-05, "loss": 1.4105, "step": 5975 }, { "epoch": 1.53, "learning_rate": 9.900499049733238e-05, "loss": 1.419, "step": 5980 }, { "epoch": 1.53, "learning_rate": 9.887053513132533e-05, "loss": 1.4007, "step": 5985 }, { "epoch": 1.53, "learning_rate": 9.873608180742302e-05, "loss": 1.4211, "step": 5990 }, { "epoch": 1.53, "learning_rate": 9.860163076872089e-05, "loss": 1.3887, "step": 5995 }, { "epoch": 1.54, "learning_rate": 9.846718225831024e-05, "loss": 1.3633, "step": 6000 }, { "epoch": 1.54, "eval_loss": 1.4240084886550903, "eval_runtime": 2.0288, "eval_samples_per_second": 49.289, "eval_steps_per_second": 1.972, "step": 6000 }, { "epoch": 1.54, "learning_rate": 9.833273651927786e-05, "loss": 1.4191, "step": 6005 }, { "epoch": 1.54, "learning_rate": 9.819829379470545e-05, "loss": 1.3797, "step": 6010 }, { "epoch": 1.54, "learning_rate": 9.80638543276693e-05, "loss": 1.4165, "step": 6015 }, { "epoch": 1.54, "learning_rate": 9.792941836123985e-05, "loss": 1.4234, "step": 6020 }, { "epoch": 1.54, "learning_rate": 9.779498613848109e-05, "loss": 1.4159, "step": 6025 }, { "epoch": 1.54, "learning_rate": 9.766055790245034e-05, "loss": 1.3976, "step": 6030 }, { "epoch": 1.54, "learning_rate": 9.752613389619772e-05, "loss": 1.3828, "step": 6035 }, { "epoch": 1.55, "learning_rate": 9.739171436276564e-05, "loss": 1.376, "step": 6040 }, { "epoch": 1.55, "learning_rate": 9.725729954518847e-05, "loss": 1.4632, "step": 6045 }, { "epoch": 1.55, "learning_rate": 9.712288968649202e-05, "loss": 1.3897, "step": 6050 }, { "epoch": 1.55, "learning_rate": 9.69884850296932e-05, "loss": 1.3601, "step": 6055 }, { "epoch": 1.55, "learning_rate": 9.685408581779942e-05, "loss": 1.3986, "step": 6060 }, { "epoch": 1.55, "learning_rate": 9.671969229380829e-05, "loss": 1.3543, "step": 6065 }, { "epoch": 1.55, "learning_rate": 9.658530470070722e-05, "loss": 1.4197, "step": 6070 }, { "epoch": 1.56, "learning_rate": 9.64509232814727e-05, "loss": 1.4355, "step": 6075 }, { "epoch": 1.56, "learning_rate": 9.631654827907021e-05, "loss": 1.3688, "step": 6080 }, { "epoch": 1.56, "learning_rate": 9.618217993645367e-05, "loss": 1.3824, "step": 6085 }, { "epoch": 1.56, "learning_rate": 9.604781849656477e-05, "loss": 1.388, "step": 6090 }, { "epoch": 1.56, "learning_rate": 9.591346420233286e-05, "loss": 1.4368, "step": 6095 }, { "epoch": 1.56, "learning_rate": 9.57791172966744e-05, "loss": 1.4085, "step": 6100 }, { "epoch": 1.56, "learning_rate": 9.564477802249236e-05, "loss": 1.3496, "step": 6105 }, { "epoch": 1.56, "learning_rate": 9.551044662267602e-05, "loss": 1.3607, "step": 6110 }, { "epoch": 1.57, "learning_rate": 9.53761233401004e-05, "loss": 1.4359, "step": 6115 }, { "epoch": 1.57, "learning_rate": 9.526867072165581e-05, "loss": 1.3941, "step": 6120 }, { "epoch": 1.57, "learning_rate": 9.513436266211135e-05, "loss": 1.3779, "step": 6125 }, { "epoch": 1.57, "learning_rate": 9.500006339977808e-05, "loss": 1.3884, "step": 6130 }, { "epoch": 1.57, "learning_rate": 9.489263048707706e-05, "loss": 1.345, "step": 6135 }, { "epoch": 1.57, "learning_rate": 9.475834767161165e-05, "loss": 1.3983, "step": 6140 }, { "epoch": 1.57, "learning_rate": 9.462407433320335e-05, "loss": 1.3893, "step": 6145 }, { "epoch": 1.57, "learning_rate": 9.448981071462211e-05, "loss": 1.4511, "step": 6150 }, { "epoch": 1.58, "learning_rate": 9.438240698116291e-05, "loss": 1.3767, "step": 6155 }, { "epoch": 1.58, "learning_rate": 9.424816146999457e-05, "loss": 1.3907, "step": 6160 }, { "epoch": 1.58, "learning_rate": 9.411392635831452e-05, "loss": 1.4223, "step": 6165 }, { "epoch": 1.58, "learning_rate": 9.397970188882373e-05, "loss": 1.3985, "step": 6170 }, { "epoch": 1.58, "learning_rate": 9.384548830420376e-05, "loss": 1.4028, "step": 6175 }, { "epoch": 1.58, "learning_rate": 9.371128584711669e-05, "loss": 1.4084, "step": 6180 }, { "epoch": 1.58, "learning_rate": 9.357709476020442e-05, "loss": 1.4113, "step": 6185 }, { "epoch": 1.58, "learning_rate": 9.344291528608818e-05, "loss": 1.3479, "step": 6190 }, { "epoch": 1.59, "learning_rate": 9.330874766736832e-05, "loss": 1.3876, "step": 6195 }, { "epoch": 1.59, "learning_rate": 9.317459214662374e-05, "loss": 1.3931, "step": 6200 }, { "epoch": 1.59, "learning_rate": 9.304044896641147e-05, "loss": 1.3901, "step": 6205 }, { "epoch": 1.59, "learning_rate": 9.290631836926615e-05, "loss": 1.4116, "step": 6210 }, { "epoch": 1.59, "learning_rate": 9.277220059769979e-05, "loss": 1.3667, "step": 6215 }, { "epoch": 1.59, "learning_rate": 9.263809589420114e-05, "loss": 1.3717, "step": 6220 }, { "epoch": 1.59, "learning_rate": 9.25040045012353e-05, "loss": 1.386, "step": 6225 }, { "epoch": 1.59, "learning_rate": 9.236992666124335e-05, "loss": 1.3505, "step": 6230 }, { "epoch": 1.6, "learning_rate": 9.223586261664192e-05, "loss": 1.3537, "step": 6235 }, { "epoch": 1.6, "learning_rate": 9.210181260982252e-05, "loss": 1.3243, "step": 6240 }, { "epoch": 1.6, "learning_rate": 9.196777688315145e-05, "loss": 1.3467, "step": 6245 }, { "epoch": 1.6, "learning_rate": 9.183375567896912e-05, "loss": 1.3835, "step": 6250 }, { "epoch": 1.6, "learning_rate": 9.16997492395897e-05, "loss": 1.3783, "step": 6255 }, { "epoch": 1.6, "learning_rate": 9.156575780730064e-05, "loss": 1.3511, "step": 6260 }, { "epoch": 1.6, "learning_rate": 9.143178162436232e-05, "loss": 1.3788, "step": 6265 }, { "epoch": 1.61, "learning_rate": 9.13246118203255e-05, "loss": 1.425, "step": 6270 }, { "epoch": 1.61, "learning_rate": 9.119066369662623e-05, "loss": 1.3933, "step": 6275 }, { "epoch": 1.61, "learning_rate": 9.10567315004585e-05, "loss": 1.3429, "step": 6280 }, { "epoch": 1.61, "learning_rate": 9.092281547397559e-05, "loss": 1.3718, "step": 6285 }, { "epoch": 1.61, "learning_rate": 9.07889158593015e-05, "loss": 1.4122, "step": 6290 }, { "epoch": 1.61, "learning_rate": 9.06550328985305e-05, "loss": 1.3429, "step": 6295 }, { "epoch": 1.61, "learning_rate": 9.052116683372688e-05, "loss": 1.3735, "step": 6300 }, { "epoch": 1.61, "learning_rate": 9.038731790692428e-05, "loss": 1.3651, "step": 6305 }, { "epoch": 1.62, "learning_rate": 9.025348636012536e-05, "loss": 1.3558, "step": 6310 }, { "epoch": 1.62, "learning_rate": 9.01196724353014e-05, "loss": 1.3307, "step": 6315 }, { "epoch": 1.62, "learning_rate": 8.998587637439185e-05, "loss": 1.3343, "step": 6320 }, { "epoch": 1.62, "learning_rate": 8.985209841930369e-05, "loss": 1.3435, "step": 6325 }, { "epoch": 1.62, "learning_rate": 8.971833881191134e-05, "loss": 1.362, "step": 6330 }, { "epoch": 1.62, "learning_rate": 8.958459779405601e-05, "loss": 1.3608, "step": 6335 }, { "epoch": 1.62, "learning_rate": 8.94508756075453e-05, "loss": 1.3742, "step": 6340 }, { "epoch": 1.62, "learning_rate": 8.931717249415267e-05, "loss": 1.3826, "step": 6345 }, { "epoch": 1.63, "learning_rate": 8.918348869561721e-05, "loss": 1.3985, "step": 6350 }, { "epoch": 1.63, "learning_rate": 8.904982445364303e-05, "loss": 1.3799, "step": 6355 }, { "epoch": 1.63, "learning_rate": 8.891618000989891e-05, "loss": 1.3598, "step": 6360 }, { "epoch": 1.63, "learning_rate": 8.878255560601781e-05, "loss": 1.3633, "step": 6365 }, { "epoch": 1.63, "learning_rate": 8.864895148359647e-05, "loss": 1.3742, "step": 6370 }, { "epoch": 1.63, "learning_rate": 8.851536788419495e-05, "loss": 1.3781, "step": 6375 }, { "epoch": 1.63, "learning_rate": 8.840851594355273e-05, "loss": 1.3448, "step": 6380 }, { "epoch": 1.63, "learning_rate": 8.827496989419954e-05, "loss": 1.3512, "step": 6385 }, { "epoch": 1.64, "learning_rate": 8.814144504403558e-05, "loss": 1.3487, "step": 6390 }, { "epoch": 1.64, "learning_rate": 8.800794163447763e-05, "loss": 1.3791, "step": 6395 }, { "epoch": 1.64, "learning_rate": 8.787445990690355e-05, "loss": 1.3459, "step": 6400 }, { "epoch": 1.64, "learning_rate": 8.776769029805376e-05, "loss": 1.4147, "step": 6405 }, { "epoch": 1.64, "learning_rate": 8.763424820619789e-05, "loss": 1.4235, "step": 6410 }, { "epoch": 1.64, "learning_rate": 8.750082847197436e-05, "loss": 1.3645, "step": 6415 }, { "epoch": 1.64, "learning_rate": 8.736743133660983e-05, "loss": 1.3669, "step": 6420 }, { "epoch": 1.64, "learning_rate": 8.723405704129015e-05, "loss": 1.3283, "step": 6425 }, { "epoch": 1.65, "learning_rate": 8.710070582715987e-05, "loss": 1.3286, "step": 6430 }, { "epoch": 1.65, "learning_rate": 8.696737793532175e-05, "loss": 1.3463, "step": 6435 }, { "epoch": 1.65, "learning_rate": 8.683407360683644e-05, "loss": 1.4076, "step": 6440 }, { "epoch": 1.65, "learning_rate": 8.670079308272199e-05, "loss": 1.3792, "step": 6445 }, { "epoch": 1.65, "learning_rate": 8.656753660395344e-05, "loss": 1.3589, "step": 6450 }, { "epoch": 1.65, "learning_rate": 8.64343044114622e-05, "loss": 1.3516, "step": 6455 }, { "epoch": 1.65, "learning_rate": 8.63010967461359e-05, "loss": 1.3236, "step": 6460 }, { "epoch": 1.65, "learning_rate": 8.616791384881791e-05, "loss": 1.3945, "step": 6465 }, { "epoch": 1.66, "learning_rate": 8.603475596030658e-05, "loss": 1.4158, "step": 6470 }, { "epoch": 1.66, "learning_rate": 8.592824781762609e-05, "loss": 1.3717, "step": 6475 }, { "epoch": 1.66, "learning_rate": 8.579513555163441e-05, "loss": 1.4276, "step": 6480 }, { "epoch": 1.66, "learning_rate": 8.566204896844325e-05, "loss": 1.3594, "step": 6485 }, { "epoch": 1.66, "learning_rate": 8.5528988308677e-05, "loss": 1.3455, "step": 6490 }, { "epoch": 1.66, "learning_rate": 8.539595381291304e-05, "loss": 1.3707, "step": 6495 }, { "epoch": 1.66, "learning_rate": 8.526294572168156e-05, "loss": 1.3732, "step": 6500 }, { "epoch": 1.67, "learning_rate": 8.512996427546509e-05, "loss": 1.3353, "step": 6505 }, { "epoch": 1.67, "learning_rate": 8.499700971469774e-05, "loss": 1.3812, "step": 6510 }, { "epoch": 1.67, "learning_rate": 8.486408227976521e-05, "loss": 1.379, "step": 6515 }, { "epoch": 1.67, "learning_rate": 8.473118221100414e-05, "loss": 1.3487, "step": 6520 }, { "epoch": 1.67, "learning_rate": 8.459830974870156e-05, "loss": 1.3838, "step": 6525 }, { "epoch": 1.67, "learning_rate": 8.446546513309474e-05, "loss": 1.3404, "step": 6530 }, { "epoch": 1.67, "learning_rate": 8.433264860437056e-05, "loss": 1.3956, "step": 6535 }, { "epoch": 1.67, "learning_rate": 8.4199860402665e-05, "loss": 1.3947, "step": 6540 }, { "epoch": 1.68, "learning_rate": 8.409365039809281e-05, "loss": 1.4088, "step": 6545 }, { "epoch": 1.68, "learning_rate": 8.396091379000005e-05, "loss": 1.3934, "step": 6550 }, { "epoch": 1.68, "learning_rate": 8.382820618103313e-05, "loss": 1.3734, "step": 6555 }, { "epoch": 1.68, "learning_rate": 8.369552781113122e-05, "loss": 1.3308, "step": 6560 }, { "epoch": 1.68, "learning_rate": 8.356287892018054e-05, "loss": 1.328, "step": 6565 }, { "epoch": 1.68, "learning_rate": 8.343025974801412e-05, "loss": 1.3877, "step": 6570 }, { "epoch": 1.68, "learning_rate": 8.329767053441124e-05, "loss": 1.4135, "step": 6575 }, { "epoch": 1.68, "learning_rate": 8.316511151909689e-05, "loss": 1.3353, "step": 6580 }, { "epoch": 1.69, "learning_rate": 8.30325829417416e-05, "loss": 1.4085, "step": 6585 }, { "epoch": 1.69, "learning_rate": 8.290008504196088e-05, "loss": 1.4019, "step": 6590 }, { "epoch": 1.69, "learning_rate": 8.276761805931463e-05, "loss": 1.3734, "step": 6595 }, { "epoch": 1.69, "learning_rate": 8.263518223330697e-05, "loss": 1.386, "step": 6600 }, { "epoch": 1.69, "learning_rate": 8.25027778033857e-05, "loss": 1.3633, "step": 6605 }, { "epoch": 1.69, "learning_rate": 8.237040500894171e-05, "loss": 1.359, "step": 6610 }, { "epoch": 1.69, "learning_rate": 8.223806408930885e-05, "loss": 1.3589, "step": 6615 }, { "epoch": 1.69, "learning_rate": 8.21057552837633e-05, "loss": 1.3916, "step": 6620 }, { "epoch": 1.7, "learning_rate": 8.197347883152315e-05, "loss": 1.3352, "step": 6625 }, { "epoch": 1.7, "learning_rate": 8.184123497174798e-05, "loss": 1.3992, "step": 6630 }, { "epoch": 1.7, "learning_rate": 8.170902394353848e-05, "loss": 1.3857, "step": 6635 }, { "epoch": 1.7, "learning_rate": 8.157684598593597e-05, "loss": 1.3681, "step": 6640 }, { "epoch": 1.7, "learning_rate": 8.144470133792194e-05, "loss": 1.3834, "step": 6645 }, { "epoch": 1.7, "learning_rate": 8.131259023841772e-05, "loss": 1.3725, "step": 6650 }, { "epoch": 1.7, "learning_rate": 8.118051292628394e-05, "loss": 1.379, "step": 6655 }, { "epoch": 1.7, "learning_rate": 8.104846964032015e-05, "loss": 1.3235, "step": 6660 }, { "epoch": 1.71, "learning_rate": 8.091646061926437e-05, "loss": 1.3643, "step": 6665 }, { "epoch": 1.71, "learning_rate": 8.07844861017927e-05, "loss": 1.3295, "step": 6670 }, { "epoch": 1.71, "learning_rate": 8.065254632651881e-05, "loss": 1.3032, "step": 6675 }, { "epoch": 1.71, "learning_rate": 8.05206415319936e-05, "loss": 1.3784, "step": 6680 }, { "epoch": 1.71, "learning_rate": 8.038877195670475e-05, "loss": 1.413, "step": 6685 }, { "epoch": 1.71, "learning_rate": 8.025693783907613e-05, "loss": 1.3758, "step": 6690 }, { "epoch": 1.71, "learning_rate": 8.012513941746761e-05, "loss": 1.3375, "step": 6695 }, { "epoch": 1.72, "learning_rate": 7.999337693017452e-05, "loss": 1.3502, "step": 6700 }, { "epoch": 1.72, "learning_rate": 7.986165061542727e-05, "loss": 1.4262, "step": 6705 }, { "epoch": 1.72, "learning_rate": 7.972996071139064e-05, "loss": 1.3947, "step": 6710 }, { "epoch": 1.72, "learning_rate": 7.959830745616386e-05, "loss": 1.4095, "step": 6715 }, { "epoch": 1.72, "learning_rate": 7.946669108777975e-05, "loss": 1.3522, "step": 6720 }, { "epoch": 1.72, "learning_rate": 7.933511184420443e-05, "loss": 1.3396, "step": 6725 }, { "epoch": 1.72, "learning_rate": 7.920356996333694e-05, "loss": 1.3092, "step": 6730 }, { "epoch": 1.72, "learning_rate": 7.907206568300879e-05, "loss": 1.3781, "step": 6735 }, { "epoch": 1.73, "learning_rate": 7.894059924098343e-05, "loss": 1.3828, "step": 6740 }, { "epoch": 1.73, "learning_rate": 7.880917087495595e-05, "loss": 1.3521, "step": 6745 }, { "epoch": 1.73, "learning_rate": 7.867778082255264e-05, "loss": 1.3787, "step": 6750 }, { "epoch": 1.73, "learning_rate": 7.85464293213304e-05, "loss": 1.4029, "step": 6755 }, { "epoch": 1.73, "learning_rate": 7.841511660877651e-05, "loss": 1.3751, "step": 6760 }, { "epoch": 1.73, "learning_rate": 7.828384292230813e-05, "loss": 1.3629, "step": 6765 }, { "epoch": 1.73, "learning_rate": 7.815260849927181e-05, "loss": 1.3513, "step": 6770 }, { "epoch": 1.73, "learning_rate": 7.802141357694312e-05, "loss": 1.3583, "step": 6775 }, { "epoch": 1.74, "learning_rate": 7.789025839252626e-05, "loss": 1.3875, "step": 6780 }, { "epoch": 1.74, "learning_rate": 7.775914318315356e-05, "loss": 1.3683, "step": 6785 }, { "epoch": 1.74, "learning_rate": 7.762806818588502e-05, "loss": 1.3166, "step": 6790 }, { "epoch": 1.74, "learning_rate": 7.749703363770797e-05, "loss": 1.3417, "step": 6795 }, { "epoch": 1.74, "learning_rate": 7.73660397755367e-05, "loss": 1.3855, "step": 6800 }, { "epoch": 1.74, "learning_rate": 7.723508683621173e-05, "loss": 1.3646, "step": 6805 }, { "epoch": 1.74, "learning_rate": 7.71041750564998e-05, "loss": 1.3429, "step": 6810 }, { "epoch": 1.74, "learning_rate": 7.697330467309315e-05, "loss": 1.3614, "step": 6815 }, { "epoch": 1.75, "learning_rate": 7.684247592260911e-05, "loss": 1.322, "step": 6820 }, { "epoch": 1.75, "learning_rate": 7.671168904158982e-05, "loss": 1.3567, "step": 6825 }, { "epoch": 1.75, "learning_rate": 7.658094426650173e-05, "loss": 1.3313, "step": 6830 }, { "epoch": 1.75, "learning_rate": 7.645024183373509e-05, "loss": 1.2917, "step": 6835 }, { "epoch": 1.75, "learning_rate": 7.631958197960363e-05, "loss": 1.3379, "step": 6840 }, { "epoch": 1.75, "learning_rate": 7.618896494034412e-05, "loss": 1.3761, "step": 6845 }, { "epoch": 1.75, "learning_rate": 7.605839095211586e-05, "loss": 1.393, "step": 6850 }, { "epoch": 1.75, "learning_rate": 7.592786025100039e-05, "loss": 1.3642, "step": 6855 }, { "epoch": 1.76, "learning_rate": 7.579737307300092e-05, "loss": 1.3784, "step": 6860 }, { "epoch": 1.76, "learning_rate": 7.566692965404205e-05, "loss": 1.3717, "step": 6865 }, { "epoch": 1.76, "learning_rate": 7.55626065838749e-05, "loss": 1.3332, "step": 6870 }, { "epoch": 1.76, "learning_rate": 7.543224252546602e-05, "loss": 1.3377, "step": 6875 }, { "epoch": 1.76, "learning_rate": 7.532798324925699e-05, "loss": 1.3931, "step": 6880 }, { "epoch": 1.76, "learning_rate": 7.519769931507215e-05, "loss": 1.3665, "step": 6885 }, { "epoch": 1.76, "learning_rate": 7.506746022415514e-05, "loss": 1.3628, "step": 6890 }, { "epoch": 1.77, "learning_rate": 7.493726621198199e-05, "loss": 1.3418, "step": 6895 }, { "epoch": 1.77, "learning_rate": 7.480711751394712e-05, "loss": 1.3913, "step": 6900 }, { "epoch": 1.77, "learning_rate": 7.467701436536312e-05, "loss": 1.3581, "step": 6905 }, { "epoch": 1.77, "learning_rate": 7.454695700146024e-05, "loss": 1.3304, "step": 6910 }, { "epoch": 1.77, "learning_rate": 7.441694565738587e-05, "loss": 1.3737, "step": 6915 }, { "epoch": 1.77, "learning_rate": 7.42869805682042e-05, "loss": 1.3514, "step": 6920 }, { "epoch": 1.77, "learning_rate": 7.415706196889592e-05, "loss": 1.3778, "step": 6925 }, { "epoch": 1.77, "learning_rate": 7.402719009435745e-05, "loss": 1.348, "step": 6930 }, { "epoch": 1.78, "learning_rate": 7.389736517940089e-05, "loss": 1.3409, "step": 6935 }, { "epoch": 1.78, "learning_rate": 7.376758745875346e-05, "loss": 1.3892, "step": 6940 }, { "epoch": 1.78, "learning_rate": 7.363785716705688e-05, "loss": 1.3793, "step": 6945 }, { "epoch": 1.78, "learning_rate": 7.350817453886729e-05, "loss": 1.3446, "step": 6950 }, { "epoch": 1.78, "learning_rate": 7.337853980865459e-05, "loss": 1.3599, "step": 6955 }, { "epoch": 1.78, "learning_rate": 7.324895321080201e-05, "loss": 1.3812, "step": 6960 }, { "epoch": 1.78, "learning_rate": 7.311941497960586e-05, "loss": 1.3655, "step": 6965 }, { "epoch": 1.78, "learning_rate": 7.298992534927498e-05, "loss": 1.2942, "step": 6970 }, { "epoch": 1.79, "learning_rate": 7.28604845539303e-05, "loss": 1.3597, "step": 6975 }, { "epoch": 1.79, "learning_rate": 7.273109282760447e-05, "loss": 1.3486, "step": 6980 }, { "epoch": 1.79, "learning_rate": 7.26017504042414e-05, "loss": 1.3881, "step": 6985 }, { "epoch": 1.79, "learning_rate": 7.247245751769596e-05, "loss": 1.408, "step": 6990 }, { "epoch": 1.79, "learning_rate": 7.23432144017333e-05, "loss": 1.312, "step": 6995 }, { "epoch": 1.79, "learning_rate": 7.22398559008156e-05, "loss": 1.368, "step": 7000 }, { "epoch": 1.79, "eval_loss": 1.3932809829711914, "eval_runtime": 2.0256, "eval_samples_per_second": 49.369, "eval_steps_per_second": 1.975, "step": 7000 }, { "epoch": 1.79, "learning_rate": 7.211070296070288e-05, "loss": 1.3496, "step": 7005 }, { "epoch": 1.79, "learning_rate": 7.200741690315187e-05, "loss": 1.3066, "step": 7010 }, { "epoch": 1.8, "learning_rate": 7.187835489546838e-05, "loss": 1.3885, "step": 7015 }, { "epoch": 1.8, "learning_rate": 7.174934373252222e-05, "loss": 1.3138, "step": 7020 }, { "epoch": 1.8, "learning_rate": 7.162038364756922e-05, "loss": 1.3625, "step": 7025 }, { "epoch": 1.8, "learning_rate": 7.151725251245094e-05, "loss": 1.3357, "step": 7030 }, { "epoch": 1.8, "learning_rate": 7.138838495539538e-05, "loss": 1.3793, "step": 7035 }, { "epoch": 1.8, "learning_rate": 7.125956912895679e-05, "loss": 1.3629, "step": 7040 }, { "epoch": 1.8, "learning_rate": 7.113080526603792e-05, "loss": 1.3643, "step": 7045 }, { "epoch": 1.8, "learning_rate": 7.100209359944739e-05, "loss": 1.3606, "step": 7050 }, { "epoch": 1.81, "learning_rate": 7.087343436189962e-05, "loss": 1.3523, "step": 7055 }, { "epoch": 1.81, "learning_rate": 7.074482778601421e-05, "loss": 1.3189, "step": 7060 }, { "epoch": 1.81, "learning_rate": 7.061627410431549e-05, "loss": 1.3774, "step": 7065 }, { "epoch": 1.81, "learning_rate": 7.048777354923217e-05, "loss": 1.3047, "step": 7070 }, { "epoch": 1.81, "learning_rate": 7.0359326353097e-05, "loss": 1.3835, "step": 7075 }, { "epoch": 1.81, "learning_rate": 7.023093274814608e-05, "loss": 1.3136, "step": 7080 }, { "epoch": 1.81, "learning_rate": 7.010259296651876e-05, "loss": 1.3299, "step": 7085 }, { "epoch": 1.81, "learning_rate": 6.997430724025706e-05, "loss": 1.3577, "step": 7090 }, { "epoch": 1.82, "learning_rate": 6.984607580130518e-05, "loss": 1.3134, "step": 7095 }, { "epoch": 1.82, "learning_rate": 6.971789888150926e-05, "loss": 1.3005, "step": 7100 }, { "epoch": 1.82, "learning_rate": 6.958977671261683e-05, "loss": 1.3304, "step": 7105 }, { "epoch": 1.82, "learning_rate": 6.94617095262764e-05, "loss": 1.4018, "step": 7110 }, { "epoch": 1.82, "learning_rate": 6.933369755403711e-05, "loss": 1.285, "step": 7115 }, { "epoch": 1.82, "learning_rate": 6.920574102734827e-05, "loss": 1.3071, "step": 7120 }, { "epoch": 1.82, "learning_rate": 6.907784017755896e-05, "loss": 1.3059, "step": 7125 }, { "epoch": 1.83, "learning_rate": 6.894999523591752e-05, "loss": 1.3412, "step": 7130 }, { "epoch": 1.83, "learning_rate": 6.882220643357129e-05, "loss": 1.3371, "step": 7135 }, { "epoch": 1.83, "learning_rate": 6.869447400156608e-05, "loss": 1.3368, "step": 7140 }, { "epoch": 1.83, "learning_rate": 6.856679817084571e-05, "loss": 1.3812, "step": 7145 }, { "epoch": 1.83, "learning_rate": 6.843917917225178e-05, "loss": 1.3641, "step": 7150 }, { "epoch": 1.83, "learning_rate": 6.831161723652313e-05, "loss": 1.3325, "step": 7155 }, { "epoch": 1.83, "learning_rate": 6.818411259429529e-05, "loss": 1.3737, "step": 7160 }, { "epoch": 1.83, "learning_rate": 6.80566654761003e-05, "loss": 1.3421, "step": 7165 }, { "epoch": 1.84, "learning_rate": 6.792927611236628e-05, "loss": 1.3544, "step": 7170 }, { "epoch": 1.84, "learning_rate": 6.780194473341675e-05, "loss": 1.3373, "step": 7175 }, { "epoch": 1.84, "learning_rate": 6.767467156947049e-05, "loss": 1.3546, "step": 7180 }, { "epoch": 1.84, "learning_rate": 6.754745685064105e-05, "loss": 1.342, "step": 7185 }, { "epoch": 1.84, "learning_rate": 6.742030080693625e-05, "loss": 1.3306, "step": 7190 }, { "epoch": 1.84, "learning_rate": 6.729320366825784e-05, "loss": 1.3223, "step": 7195 }, { "epoch": 1.84, "learning_rate": 6.716616566440108e-05, "loss": 1.3691, "step": 7200 }, { "epoch": 1.84, "learning_rate": 6.703918702505439e-05, "loss": 1.3439, "step": 7205 }, { "epoch": 1.85, "learning_rate": 6.691226797979864e-05, "loss": 1.3378, "step": 7210 }, { "epoch": 1.85, "learning_rate": 6.678540875810712e-05, "loss": 1.3042, "step": 7215 }, { "epoch": 1.85, "learning_rate": 6.665860958934504e-05, "loss": 1.32, "step": 7220 }, { "epoch": 1.85, "learning_rate": 6.653187070276877e-05, "loss": 1.3102, "step": 7225 }, { "epoch": 1.85, "learning_rate": 6.64051923275259e-05, "loss": 1.305, "step": 7230 }, { "epoch": 1.85, "learning_rate": 6.62785746926546e-05, "loss": 1.3073, "step": 7235 }, { "epoch": 1.85, "learning_rate": 6.615201802708307e-05, "loss": 1.3606, "step": 7240 }, { "epoch": 1.85, "learning_rate": 6.602552255962942e-05, "loss": 1.3425, "step": 7245 }, { "epoch": 1.86, "learning_rate": 6.589908851900112e-05, "loss": 1.3135, "step": 7250 }, { "epoch": 1.86, "learning_rate": 6.577271613379442e-05, "loss": 1.3483, "step": 7255 }, { "epoch": 1.86, "learning_rate": 6.564640563249426e-05, "loss": 1.3177, "step": 7260 }, { "epoch": 1.86, "learning_rate": 6.552015724347366e-05, "loss": 1.3023, "step": 7265 }, { "epoch": 1.86, "learning_rate": 6.539397119499326e-05, "loss": 1.2986, "step": 7270 }, { "epoch": 1.86, "learning_rate": 6.526784771520107e-05, "loss": 1.357, "step": 7275 }, { "epoch": 1.86, "learning_rate": 6.514178703213192e-05, "loss": 1.3706, "step": 7280 }, { "epoch": 1.86, "learning_rate": 6.501578937370716e-05, "loss": 1.3431, "step": 7285 }, { "epoch": 1.87, "learning_rate": 6.48898549677341e-05, "loss": 1.3846, "step": 7290 }, { "epoch": 1.87, "learning_rate": 6.476398404190578e-05, "loss": 1.3383, "step": 7295 }, { "epoch": 1.87, "learning_rate": 6.463817682380045e-05, "loss": 1.3783, "step": 7300 }, { "epoch": 1.87, "learning_rate": 6.451243354088107e-05, "loss": 1.3338, "step": 7305 }, { "epoch": 1.87, "learning_rate": 6.438675442049518e-05, "loss": 1.321, "step": 7310 }, { "epoch": 1.87, "learning_rate": 6.42611396898742e-05, "loss": 1.3351, "step": 7315 }, { "epoch": 1.87, "learning_rate": 6.413558957613308e-05, "loss": 1.331, "step": 7320 }, { "epoch": 1.88, "learning_rate": 6.401010430627006e-05, "loss": 1.3132, "step": 7325 }, { "epoch": 1.88, "learning_rate": 6.388468410716613e-05, "loss": 1.3398, "step": 7330 }, { "epoch": 1.88, "learning_rate": 6.380946331600652e-05, "loss": 1.2932, "step": 7335 }, { "epoch": 1.88, "learning_rate": 6.370920557903063e-05, "loss": 1.3546, "step": 7340 }, { "epoch": 1.88, "learning_rate": 6.358394247471778e-05, "loss": 1.326, "step": 7345 }, { "epoch": 1.88, "learning_rate": 6.345874521167675e-05, "loss": 1.3047, "step": 7350 }, { "epoch": 1.88, "learning_rate": 6.333361401626776e-05, "loss": 1.3499, "step": 7355 }, { "epoch": 1.88, "learning_rate": 6.320854911473157e-05, "loss": 1.3802, "step": 7360 }, { "epoch": 1.89, "learning_rate": 6.308355073318911e-05, "loss": 1.3391, "step": 7365 }, { "epoch": 1.89, "learning_rate": 6.295861909764105e-05, "loss": 1.3309, "step": 7370 }, { "epoch": 1.89, "learning_rate": 6.283375443396726e-05, "loss": 1.3428, "step": 7375 }, { "epoch": 1.89, "learning_rate": 6.270895696792669e-05, "loss": 1.3402, "step": 7380 }, { "epoch": 1.89, "learning_rate": 6.258422692515673e-05, "loss": 1.3278, "step": 7385 }, { "epoch": 1.89, "learning_rate": 6.245956453117282e-05, "loss": 1.3833, "step": 7390 }, { "epoch": 1.89, "learning_rate": 6.233497001136815e-05, "loss": 1.3758, "step": 7395 }, { "epoch": 1.89, "learning_rate": 6.221044359101317e-05, "loss": 1.3332, "step": 7400 }, { "epoch": 1.9, "learning_rate": 6.208598549525518e-05, "loss": 1.367, "step": 7405 }, { "epoch": 1.9, "learning_rate": 6.196159594911799e-05, "loss": 1.3435, "step": 7410 }, { "epoch": 1.9, "learning_rate": 6.18372751775015e-05, "loss": 1.3399, "step": 7415 }, { "epoch": 1.9, "learning_rate": 6.171302340518111e-05, "loss": 1.3217, "step": 7420 }, { "epoch": 1.9, "learning_rate": 6.158884085680762e-05, "loss": 1.3208, "step": 7425 }, { "epoch": 1.9, "learning_rate": 6.146472775690665e-05, "loss": 1.3457, "step": 7430 }, { "epoch": 1.9, "learning_rate": 6.134068432987825e-05, "loss": 1.3435, "step": 7435 }, { "epoch": 1.9, "learning_rate": 6.121671079999638e-05, "loss": 1.2834, "step": 7440 }, { "epoch": 1.91, "learning_rate": 6.10928073914088e-05, "loss": 1.3118, "step": 7445 }, { "epoch": 1.91, "learning_rate": 6.09689743281364e-05, "loss": 1.2927, "step": 7450 }, { "epoch": 1.91, "learning_rate": 6.084521183407286e-05, "loss": 1.3626, "step": 7455 }, { "epoch": 1.91, "learning_rate": 6.0721520132984335e-05, "loss": 1.3948, "step": 7460 }, { "epoch": 1.91, "learning_rate": 6.0597899448508955e-05, "loss": 1.299, "step": 7465 }, { "epoch": 1.91, "learning_rate": 6.047435000415642e-05, "loss": 1.3198, "step": 7470 }, { "epoch": 1.91, "learning_rate": 6.035087202330767e-05, "loss": 1.3511, "step": 7475 }, { "epoch": 1.91, "learning_rate": 6.022746572921447e-05, "loss": 1.3859, "step": 7480 }, { "epoch": 1.92, "learning_rate": 6.0104131344998814e-05, "loss": 1.3216, "step": 7485 }, { "epoch": 1.92, "learning_rate": 6.003016532398079e-05, "loss": 1.3332, "step": 7490 }, { "epoch": 1.92, "learning_rate": 5.9906946459338656e-05, "loss": 1.307, "step": 7495 }, { "epoch": 1.92, "learning_rate": 5.978380008408209e-05, "loss": 1.3368, "step": 7500 }, { "epoch": 1.92, "learning_rate": 5.9660726420863255e-05, "loss": 1.3286, "step": 7505 }, { "epoch": 1.92, "learning_rate": 5.9537725692202906e-05, "loss": 1.3934, "step": 7510 }, { "epoch": 1.92, "learning_rate": 5.941479812048982e-05, "loss": 1.3065, "step": 7515 }, { "epoch": 1.92, "learning_rate": 5.9291943927980584e-05, "loss": 1.2723, "step": 7520 }, { "epoch": 1.93, "learning_rate": 5.9169163336799085e-05, "loss": 1.3582, "step": 7525 }, { "epoch": 1.93, "learning_rate": 5.904645656893615e-05, "loss": 1.3274, "step": 7530 }, { "epoch": 1.93, "learning_rate": 5.892382384624907e-05, "loss": 1.3052, "step": 7535 }, { "epoch": 1.93, "learning_rate": 5.880126539046136e-05, "loss": 1.359, "step": 7540 }, { "epoch": 1.93, "learning_rate": 5.867878142316221e-05, "loss": 1.2711, "step": 7545 }, { "epoch": 1.93, "learning_rate": 5.855637216580605e-05, "loss": 1.3067, "step": 7550 }, { "epoch": 1.93, "learning_rate": 5.84340378397124e-05, "loss": 1.3473, "step": 7555 }, { "epoch": 1.94, "learning_rate": 5.831177866606523e-05, "loss": 1.3167, "step": 7560 }, { "epoch": 1.94, "learning_rate": 5.81895948659125e-05, "loss": 1.2768, "step": 7565 }, { "epoch": 1.94, "learning_rate": 5.806748666016612e-05, "loss": 1.3302, "step": 7570 }, { "epoch": 1.94, "learning_rate": 5.7945454269601185e-05, "loss": 1.298, "step": 7575 }, { "epoch": 1.94, "learning_rate": 5.782349791485573e-05, "loss": 1.2749, "step": 7580 }, { "epoch": 1.94, "learning_rate": 5.7725987725030394e-05, "loss": 1.296, "step": 7585 }, { "epoch": 1.94, "learning_rate": 5.760416879032877e-05, "loss": 1.3993, "step": 7590 }, { "epoch": 1.94, "learning_rate": 5.748242650850055e-05, "loss": 1.3955, "step": 7595 }, { "epoch": 1.95, "learning_rate": 5.7360761099659235e-05, "loss": 1.3511, "step": 7600 }, { "epoch": 1.95, "learning_rate": 5.723917278377931e-05, "loss": 1.3161, "step": 7605 }, { "epoch": 1.95, "learning_rate": 5.711766178069597e-05, "loss": 1.3537, "step": 7610 }, { "epoch": 1.95, "learning_rate": 5.69962283101046e-05, "loss": 1.2801, "step": 7615 }, { "epoch": 1.95, "learning_rate": 5.6874872591560346e-05, "loss": 1.3439, "step": 7620 }, { "epoch": 1.95, "learning_rate": 5.675359484447776e-05, "loss": 1.2978, "step": 7625 }, { "epoch": 1.95, "learning_rate": 5.663239528813058e-05, "loss": 1.2987, "step": 7630 }, { "epoch": 1.95, "learning_rate": 5.6511274141651005e-05, "loss": 1.3354, "step": 7635 }, { "epoch": 1.96, "learning_rate": 5.63902316240295e-05, "loss": 1.2766, "step": 7640 }, { "epoch": 1.96, "learning_rate": 5.626926795411447e-05, "loss": 1.3342, "step": 7645 }, { "epoch": 1.96, "learning_rate": 5.6148383350611636e-05, "loss": 1.3392, "step": 7650 }, { "epoch": 1.96, "learning_rate": 5.602757803208377e-05, "loss": 1.2999, "step": 7655 }, { "epoch": 1.96, "learning_rate": 5.5906852216950434e-05, "loss": 1.3485, "step": 7660 }, { "epoch": 1.96, "learning_rate": 5.5786206123487295e-05, "loss": 1.3972, "step": 7665 }, { "epoch": 1.96, "learning_rate": 5.56656399698259e-05, "loss": 1.3637, "step": 7670 }, { "epoch": 1.96, "learning_rate": 5.5545153973953377e-05, "loss": 1.312, "step": 7675 }, { "epoch": 1.97, "learning_rate": 5.542474835371181e-05, "loss": 1.3149, "step": 7680 }, { "epoch": 1.97, "learning_rate": 5.530442332679799e-05, "loss": 1.2972, "step": 7685 }, { "epoch": 1.97, "learning_rate": 5.518417911076297e-05, "loss": 1.2729, "step": 7690 }, { "epoch": 1.97, "learning_rate": 5.5064015923011804e-05, "loss": 1.3449, "step": 7695 }, { "epoch": 1.97, "learning_rate": 5.494393398080292e-05, "loss": 1.3416, "step": 7700 }, { "epoch": 1.97, "learning_rate": 5.482393350124786e-05, "loss": 1.3982, "step": 7705 }, { "epoch": 1.97, "learning_rate": 5.4704014701311013e-05, "loss": 1.3364, "step": 7710 }, { "epoch": 1.97, "learning_rate": 5.458417779780893e-05, "loss": 1.2942, "step": 7715 }, { "epoch": 1.98, "learning_rate": 5.4464423007410125e-05, "loss": 1.3555, "step": 7720 }, { "epoch": 1.98, "learning_rate": 5.4344750546634805e-05, "loss": 1.3627, "step": 7725 }, { "epoch": 1.98, "learning_rate": 5.422516063185403e-05, "loss": 1.2841, "step": 7730 }, { "epoch": 1.98, "learning_rate": 5.41056534792899e-05, "loss": 1.3398, "step": 7735 }, { "epoch": 1.98, "learning_rate": 5.398622930501467e-05, "loss": 1.3023, "step": 7740 }, { "epoch": 1.98, "learning_rate": 5.386688832495076e-05, "loss": 1.3116, "step": 7745 }, { "epoch": 1.98, "learning_rate": 5.374763075487002e-05, "loss": 1.336, "step": 7750 }, { "epoch": 1.99, "learning_rate": 5.3628456810393476e-05, "loss": 1.3207, "step": 7755 }, { "epoch": 1.99, "learning_rate": 5.350936670699114e-05, "loss": 1.3143, "step": 7760 }, { "epoch": 1.99, "learning_rate": 5.3390360659981266e-05, "loss": 1.3169, "step": 7765 }, { "epoch": 1.99, "learning_rate": 5.327143888453013e-05, "loss": 1.3261, "step": 7770 }, { "epoch": 1.99, "learning_rate": 5.3152601595651843e-05, "loss": 1.3181, "step": 7775 }, { "epoch": 1.99, "learning_rate": 5.303384900820744e-05, "loss": 1.3681, "step": 7780 }, { "epoch": 1.99, "learning_rate": 5.291518133690507e-05, "loss": 1.2813, "step": 7785 }, { "epoch": 1.99, "learning_rate": 5.2796598796299356e-05, "loss": 1.3234, "step": 7790 }, { "epoch": 2.0, "learning_rate": 5.267810160079076e-05, "loss": 1.3349, "step": 7795 }, { "epoch": 2.0, "learning_rate": 5.255968996462571e-05, "loss": 1.331, "step": 7800 }, { "epoch": 2.0, "learning_rate": 5.24413641018958e-05, "loss": 1.3721, "step": 7805 }, { "epoch": 2.0, "learning_rate": 5.232312422653749e-05, "loss": 1.3645, "step": 7810 }, { "epoch": 2.0, "learning_rate": 5.2228594380824624e-05, "loss": 1.1685, "step": 7815 }, { "epoch": 2.0, "learning_rate": 5.21105098213589e-05, "loss": 0.9281, "step": 7820 }, { "epoch": 2.0, "learning_rate": 5.199251184745883e-05, "loss": 0.8604, "step": 7825 }, { "epoch": 2.0, "learning_rate": 5.189817595331834e-05, "loss": 0.7993, "step": 7830 }, { "epoch": 2.01, "learning_rate": 5.1780334370957174e-05, "loss": 0.8365, "step": 7835 }, { "epoch": 2.01, "learning_rate": 5.166257997112825e-05, "loss": 0.8685, "step": 7840 }, { "epoch": 2.01, "learning_rate": 5.1544912966734994e-05, "loss": 0.8127, "step": 7845 }, { "epoch": 2.01, "learning_rate": 5.1427333570522604e-05, "loss": 0.8241, "step": 7850 }, { "epoch": 2.01, "learning_rate": 5.130984199507811e-05, "loss": 0.8348, "step": 7855 }, { "epoch": 2.01, "learning_rate": 5.1192438452829484e-05, "loss": 0.8307, "step": 7860 }, { "epoch": 2.01, "learning_rate": 5.10751231560458e-05, "loss": 0.8332, "step": 7865 }, { "epoch": 2.01, "learning_rate": 5.098133459789646e-05, "loss": 0.7993, "step": 7870 }, { "epoch": 2.02, "learning_rate": 5.086417867735782e-05, "loss": 0.8072, "step": 7875 }, { "epoch": 2.02, "learning_rate": 5.074711159578711e-05, "loss": 0.8111, "step": 7880 }, { "epoch": 2.02, "learning_rate": 5.0630133564845004e-05, "loss": 0.8453, "step": 7885 }, { "epoch": 2.02, "learning_rate": 5.051324479603106e-05, "loss": 0.8498, "step": 7890 }, { "epoch": 2.02, "learning_rate": 5.039644550068361e-05, "loss": 0.82, "step": 7895 }, { "epoch": 2.02, "learning_rate": 5.027973588997896e-05, "loss": 0.8221, "step": 7900 }, { "epoch": 2.02, "learning_rate": 5.0163116174931546e-05, "loss": 0.8248, "step": 7905 }, { "epoch": 2.02, "learning_rate": 5.004658656639311e-05, "loss": 0.7862, "step": 7910 }, { "epoch": 2.03, "learning_rate": 4.9930147275052455e-05, "loss": 0.7912, "step": 7915 }, { "epoch": 2.03, "learning_rate": 4.9813798511435236e-05, "loss": 0.826, "step": 7920 }, { "epoch": 2.03, "learning_rate": 4.9697540485903295e-05, "loss": 0.8136, "step": 7925 }, { "epoch": 2.03, "learning_rate": 4.958137340865442e-05, "loss": 0.8145, "step": 7930 }, { "epoch": 2.03, "learning_rate": 4.946529748972209e-05, "loss": 0.8301, "step": 7935 }, { "epoch": 2.03, "learning_rate": 4.9349312938974835e-05, "loss": 0.8612, "step": 7940 }, { "epoch": 2.03, "learning_rate": 4.9233419966116036e-05, "loss": 0.8155, "step": 7945 }, { "epoch": 2.04, "learning_rate": 4.911761878068345e-05, "loss": 0.8126, "step": 7950 }, { "epoch": 2.04, "learning_rate": 4.900190959204902e-05, "loss": 0.8259, "step": 7955 }, { "epoch": 2.04, "learning_rate": 4.888629260941823e-05, "loss": 0.837, "step": 7960 }, { "epoch": 2.04, "learning_rate": 4.877076804182982e-05, "loss": 0.8169, "step": 7965 }, { "epoch": 2.04, "learning_rate": 4.86553360981556e-05, "loss": 0.8136, "step": 7970 }, { "epoch": 2.04, "learning_rate": 4.853999698709978e-05, "loss": 0.8307, "step": 7975 }, { "epoch": 2.04, "learning_rate": 4.8424750917198725e-05, "loss": 0.8249, "step": 7980 }, { "epoch": 2.04, "learning_rate": 4.830959809682071e-05, "loss": 0.8184, "step": 7985 }, { "epoch": 2.05, "learning_rate": 4.819453873416526e-05, "loss": 0.8315, "step": 7990 }, { "epoch": 2.05, "learning_rate": 4.807957303726297e-05, "loss": 0.7969, "step": 7995 }, { "epoch": 2.05, "learning_rate": 4.7964701213975174e-05, "loss": 0.8141, "step": 8000 }, { "epoch": 2.05, "eval_loss": 1.541500210762024, "eval_runtime": 2.0224, "eval_samples_per_second": 49.447, "eval_steps_per_second": 1.978, "step": 8000 }, { "epoch": 2.05, "learning_rate": 4.784992347199336e-05, "loss": 0.7866, "step": 8005 }, { "epoch": 2.05, "learning_rate": 4.773524001883898e-05, "loss": 0.8183, "step": 8010 }, { "epoch": 2.05, "learning_rate": 4.7620651061862945e-05, "loss": 0.8114, "step": 8015 }, { "epoch": 2.05, "learning_rate": 4.750615680824543e-05, "loss": 0.8187, "step": 8020 }, { "epoch": 2.05, "learning_rate": 4.739175746499528e-05, "loss": 0.8116, "step": 8025 }, { "epoch": 2.06, "learning_rate": 4.727745323894976e-05, "loss": 0.8192, "step": 8030 }, { "epoch": 2.06, "learning_rate": 4.716324433677422e-05, "loss": 0.7999, "step": 8035 }, { "epoch": 2.06, "learning_rate": 4.7049130964961575e-05, "loss": 0.822, "step": 8040 }, { "epoch": 2.06, "learning_rate": 4.693511332983205e-05, "loss": 0.8005, "step": 8045 }, { "epoch": 2.06, "learning_rate": 4.682119163753288e-05, "loss": 0.8102, "step": 8050 }, { "epoch": 2.06, "learning_rate": 4.67073660940376e-05, "loss": 0.8282, "step": 8055 }, { "epoch": 2.06, "learning_rate": 4.659363690514613e-05, "loss": 0.8053, "step": 8060 }, { "epoch": 2.06, "learning_rate": 4.648000427648405e-05, "loss": 0.7925, "step": 8065 }, { "epoch": 2.07, "learning_rate": 4.636646841350244e-05, "loss": 0.7961, "step": 8070 }, { "epoch": 2.07, "learning_rate": 4.6253029521477364e-05, "loss": 0.8349, "step": 8075 }, { "epoch": 2.07, "learning_rate": 4.616234836477959e-05, "loss": 0.791, "step": 8080 }, { "epoch": 2.07, "learning_rate": 4.6049084537212075e-05, "loss": 0.7877, "step": 8085 }, { "epoch": 2.07, "learning_rate": 4.593591825444028e-05, "loss": 0.8458, "step": 8090 }, { "epoch": 2.07, "learning_rate": 4.5822849721071916e-05, "loss": 0.8374, "step": 8095 }, { "epoch": 2.07, "learning_rate": 4.570987914153823e-05, "loss": 0.8018, "step": 8100 }, { "epoch": 2.07, "learning_rate": 4.5597006720093195e-05, "loss": 0.7831, "step": 8105 }, { "epoch": 2.08, "learning_rate": 4.548423266081344e-05, "loss": 0.8071, "step": 8110 }, { "epoch": 2.08, "learning_rate": 4.537155716759763e-05, "loss": 0.8404, "step": 8115 }, { "epoch": 2.08, "learning_rate": 4.5258980444166276e-05, "loss": 0.7918, "step": 8120 }, { "epoch": 2.08, "learning_rate": 4.514650269406134e-05, "loss": 0.7938, "step": 8125 }, { "epoch": 2.08, "learning_rate": 4.503412412064579e-05, "loss": 0.8247, "step": 8130 }, { "epoch": 2.08, "learning_rate": 4.4921844927103264e-05, "loss": 0.8304, "step": 8135 }, { "epoch": 2.08, "learning_rate": 4.483209326220262e-05, "loss": 0.8135, "step": 8140 }, { "epoch": 2.08, "learning_rate": 4.47199934638807e-05, "loss": 0.7624, "step": 8145 }, { "epoch": 2.09, "learning_rate": 4.4607993613388976e-05, "loss": 0.8426, "step": 8150 }, { "epoch": 2.09, "learning_rate": 4.4496093913226475e-05, "loss": 0.8086, "step": 8155 }, { "epoch": 2.09, "learning_rate": 4.438429456571097e-05, "loss": 0.8011, "step": 8160 }, { "epoch": 2.09, "learning_rate": 4.4272595772978944e-05, "loss": 0.7947, "step": 8165 }, { "epoch": 2.09, "learning_rate": 4.416099773698497e-05, "loss": 0.7717, "step": 8170 }, { "epoch": 2.09, "learning_rate": 4.4049500659501444e-05, "loss": 0.789, "step": 8175 }, { "epoch": 2.09, "learning_rate": 4.393810474211837e-05, "loss": 0.7948, "step": 8180 }, { "epoch": 2.1, "learning_rate": 4.3826810186242615e-05, "loss": 0.827, "step": 8185 }, { "epoch": 2.1, "learning_rate": 4.371561719309798e-05, "loss": 0.8249, "step": 8190 }, { "epoch": 2.1, "learning_rate": 4.360452596372464e-05, "loss": 0.7775, "step": 8195 }, { "epoch": 2.1, "learning_rate": 4.3493536698978566e-05, "loss": 0.7961, "step": 8200 }, { "epoch": 2.1, "learning_rate": 4.3382649599531636e-05, "loss": 0.8309, "step": 8205 }, { "epoch": 2.1, "learning_rate": 4.327186486587085e-05, "loss": 0.8117, "step": 8210 }, { "epoch": 2.1, "learning_rate": 4.316118269829812e-05, "loss": 0.8262, "step": 8215 }, { "epoch": 2.1, "learning_rate": 4.307271094630797e-05, "loss": 0.8273, "step": 8220 }, { "epoch": 2.11, "learning_rate": 4.296221390186261e-05, "loss": 0.8161, "step": 8225 }, { "epoch": 2.11, "learning_rate": 4.2851819983363185e-05, "loss": 0.7935, "step": 8230 }, { "epoch": 2.11, "learning_rate": 4.2741529390405e-05, "loss": 0.7901, "step": 8235 }, { "epoch": 2.11, "learning_rate": 4.263134232239657e-05, "loss": 0.8162, "step": 8240 }, { "epoch": 2.11, "learning_rate": 4.252125897855932e-05, "loss": 0.7729, "step": 8245 }, { "epoch": 2.11, "learning_rate": 4.2411279557927064e-05, "loss": 0.8068, "step": 8250 }, { "epoch": 2.11, "learning_rate": 4.2301404259345665e-05, "loss": 0.8164, "step": 8255 }, { "epoch": 2.11, "learning_rate": 4.2191633281472905e-05, "loss": 0.7805, "step": 8260 }, { "epoch": 2.12, "learning_rate": 4.208196682277781e-05, "loss": 0.8267, "step": 8265 }, { "epoch": 2.12, "learning_rate": 4.1972405081540445e-05, "loss": 0.7728, "step": 8270 }, { "epoch": 2.12, "learning_rate": 4.186294825585154e-05, "loss": 0.789, "step": 8275 }, { "epoch": 2.12, "learning_rate": 4.1753596543612236e-05, "loss": 0.8235, "step": 8280 }, { "epoch": 2.12, "learning_rate": 4.1644350142533514e-05, "loss": 0.8251, "step": 8285 }, { "epoch": 2.12, "learning_rate": 4.1535209250135964e-05, "loss": 0.7859, "step": 8290 }, { "epoch": 2.12, "learning_rate": 4.142617406374948e-05, "loss": 0.7764, "step": 8295 }, { "epoch": 2.12, "learning_rate": 4.131724478051278e-05, "loss": 0.7557, "step": 8300 }, { "epoch": 2.13, "learning_rate": 4.120842159737307e-05, "loss": 0.817, "step": 8305 }, { "epoch": 2.13, "learning_rate": 4.109970471108585e-05, "loss": 0.8062, "step": 8310 }, { "epoch": 2.13, "learning_rate": 4.0991094318214305e-05, "loss": 0.7775, "step": 8315 }, { "epoch": 2.13, "learning_rate": 4.088259061512912e-05, "loss": 0.8345, "step": 8320 }, { "epoch": 2.13, "learning_rate": 4.077419379800813e-05, "loss": 0.78, "step": 8325 }, { "epoch": 2.13, "learning_rate": 4.0665904062835856e-05, "loss": 0.79, "step": 8330 }, { "epoch": 2.13, "learning_rate": 4.055772160540323e-05, "loss": 0.8021, "step": 8335 }, { "epoch": 2.13, "learning_rate": 4.044964662130719e-05, "loss": 0.8264, "step": 8340 }, { "epoch": 2.14, "learning_rate": 4.036326414614985e-05, "loss": 0.8133, "step": 8345 }, { "epoch": 2.14, "learning_rate": 4.0255383106343646e-05, "loss": 0.7864, "step": 8350 }, { "epoch": 2.14, "learning_rate": 4.014761008651069e-05, "loss": 0.7778, "step": 8355 }, { "epoch": 2.14, "learning_rate": 4.0039945281507665e-05, "loss": 0.844, "step": 8360 }, { "epoch": 2.14, "learning_rate": 3.9932388885995654e-05, "loss": 0.7857, "step": 8365 }, { "epoch": 2.14, "learning_rate": 3.9824941094439685e-05, "loss": 0.7739, "step": 8370 }, { "epoch": 2.14, "learning_rate": 3.971760210110841e-05, "loss": 0.7892, "step": 8375 }, { "epoch": 2.15, "learning_rate": 3.961037210007378e-05, "loss": 0.8123, "step": 8380 }, { "epoch": 2.15, "learning_rate": 3.950325128521075e-05, "loss": 0.7909, "step": 8385 }, { "epoch": 2.15, "learning_rate": 3.939623985019679e-05, "loss": 0.7824, "step": 8390 }, { "epoch": 2.15, "learning_rate": 3.9289337988511584e-05, "loss": 0.788, "step": 8395 }, { "epoch": 2.15, "learning_rate": 3.918254589343683e-05, "loss": 0.8426, "step": 8400 }, { "epoch": 2.15, "learning_rate": 3.9075863758055655e-05, "loss": 0.7851, "step": 8405 }, { "epoch": 2.15, "learning_rate": 3.896929177525235e-05, "loss": 0.7971, "step": 8410 }, { "epoch": 2.15, "learning_rate": 3.886283013771223e-05, "loss": 0.7831, "step": 8415 }, { "epoch": 2.16, "learning_rate": 3.8756479037920815e-05, "loss": 0.7948, "step": 8420 }, { "epoch": 2.16, "learning_rate": 3.865023866816405e-05, "loss": 0.7827, "step": 8425 }, { "epoch": 2.16, "learning_rate": 3.854410922052748e-05, "loss": 0.7868, "step": 8430 }, { "epoch": 2.16, "learning_rate": 3.843809088689626e-05, "loss": 0.8085, "step": 8435 }, { "epoch": 2.16, "learning_rate": 3.833218385895451e-05, "loss": 0.7999, "step": 8440 }, { "epoch": 2.16, "learning_rate": 3.8226388328185136e-05, "loss": 0.7836, "step": 8445 }, { "epoch": 2.16, "learning_rate": 3.812070448586955e-05, "loss": 0.8044, "step": 8450 }, { "epoch": 2.16, "learning_rate": 3.8015132523087116e-05, "loss": 0.8189, "step": 8455 }, { "epoch": 2.17, "learning_rate": 3.790967263071492e-05, "loss": 0.796, "step": 8460 }, { "epoch": 2.17, "learning_rate": 3.7825385535652703e-05, "loss": 0.7954, "step": 8465 }, { "epoch": 2.17, "learning_rate": 3.772012785038236e-05, "loss": 0.7662, "step": 8470 }, { "epoch": 2.17, "learning_rate": 3.76149827688992e-05, "loss": 0.7668, "step": 8475 }, { "epoch": 2.17, "learning_rate": 3.7509950481308496e-05, "loss": 0.8148, "step": 8480 }, { "epoch": 2.17, "learning_rate": 3.7405031177511594e-05, "loss": 0.8042, "step": 8485 }, { "epoch": 2.17, "learning_rate": 3.730022504720566e-05, "loss": 0.7851, "step": 8490 }, { "epoch": 2.17, "learning_rate": 3.721646175522023e-05, "loss": 0.8073, "step": 8495 }, { "epoch": 2.18, "learning_rate": 3.711185981458125e-05, "loss": 0.7768, "step": 8500 }, { "epoch": 2.18, "learning_rate": 3.700737157749543e-05, "loss": 0.7647, "step": 8505 }, { "epoch": 2.18, "learning_rate": 3.6902997232880454e-05, "loss": 0.7943, "step": 8510 }, { "epoch": 2.18, "learning_rate": 3.679873696944809e-05, "loss": 0.8003, "step": 8515 }, { "epoch": 2.18, "learning_rate": 3.671541102383639e-05, "loss": 0.7862, "step": 8520 }, { "epoch": 2.18, "learning_rate": 3.6611356581427884e-05, "loss": 0.7788, "step": 8525 }, { "epoch": 2.18, "learning_rate": 3.6507416747496634e-05, "loss": 0.8285, "step": 8530 }, { "epoch": 2.18, "learning_rate": 3.640359170996892e-05, "loss": 0.8049, "step": 8535 }, { "epoch": 2.19, "learning_rate": 3.6299881656563206e-05, "loss": 0.8017, "step": 8540 }, { "epoch": 2.19, "learning_rate": 3.6196286774790354e-05, "loss": 0.8083, "step": 8545 }, { "epoch": 2.19, "learning_rate": 3.6092807251952824e-05, "loss": 0.8056, "step": 8550 }, { "epoch": 2.19, "learning_rate": 3.598944327514452e-05, "loss": 0.7709, "step": 8555 }, { "epoch": 2.19, "learning_rate": 3.588619503125052e-05, "loss": 0.8217, "step": 8560 }, { "epoch": 2.19, "learning_rate": 3.578306270694658e-05, "loss": 0.7695, "step": 8565 }, { "epoch": 2.19, "learning_rate": 3.568004648869884e-05, "loss": 0.7713, "step": 8570 }, { "epoch": 2.2, "learning_rate": 3.557714656276363e-05, "loss": 0.7982, "step": 8575 }, { "epoch": 2.2, "learning_rate": 3.5474363115186905e-05, "loss": 0.7633, "step": 8580 }, { "epoch": 2.2, "learning_rate": 3.537169633180416e-05, "loss": 0.799, "step": 8585 }, { "epoch": 2.2, "learning_rate": 3.5269146398239726e-05, "loss": 0.8049, "step": 8590 }, { "epoch": 2.2, "learning_rate": 3.516671349990693e-05, "loss": 0.8212, "step": 8595 }, { "epoch": 2.2, "learning_rate": 3.506439782200735e-05, "loss": 0.778, "step": 8600 }, { "epoch": 2.2, "learning_rate": 3.496219954953061e-05, "loss": 0.8048, "step": 8605 }, { "epoch": 2.2, "learning_rate": 3.486011886725414e-05, "loss": 0.7691, "step": 8610 }, { "epoch": 2.21, "learning_rate": 3.4758155959742755e-05, "loss": 0.8099, "step": 8615 }, { "epoch": 2.21, "learning_rate": 3.465631101134822e-05, "loss": 0.8097, "step": 8620 }, { "epoch": 2.21, "learning_rate": 3.455458420620921e-05, "loss": 0.8042, "step": 8625 }, { "epoch": 2.21, "learning_rate": 3.4452975728250626e-05, "loss": 0.7527, "step": 8630 }, { "epoch": 2.21, "learning_rate": 3.435148576118353e-05, "loss": 0.763, "step": 8635 }, { "epoch": 2.21, "learning_rate": 3.425011448850463e-05, "loss": 0.8004, "step": 8640 }, { "epoch": 2.21, "learning_rate": 3.414886209349615e-05, "loss": 0.7536, "step": 8645 }, { "epoch": 2.21, "learning_rate": 3.404772875922528e-05, "loss": 0.7591, "step": 8650 }, { "epoch": 2.22, "learning_rate": 3.3946714668543945e-05, "loss": 0.7937, "step": 8655 }, { "epoch": 2.22, "learning_rate": 3.384582000408857e-05, "loss": 0.8083, "step": 8660 }, { "epoch": 2.22, "learning_rate": 3.3745044948279544e-05, "loss": 0.7987, "step": 8665 }, { "epoch": 2.22, "learning_rate": 3.3644389683321044e-05, "loss": 0.787, "step": 8670 }, { "epoch": 2.22, "learning_rate": 3.354385439120069e-05, "loss": 0.7325, "step": 8675 }, { "epoch": 2.22, "learning_rate": 3.344343925368916e-05, "loss": 0.802, "step": 8680 }, { "epoch": 2.22, "learning_rate": 3.3343144452339836e-05, "loss": 0.7558, "step": 8685 }, { "epoch": 2.22, "learning_rate": 3.3242970168488654e-05, "loss": 0.7981, "step": 8690 }, { "epoch": 2.23, "learning_rate": 3.3142916583253546e-05, "loss": 0.7896, "step": 8695 }, { "epoch": 2.23, "learning_rate": 3.304298387753426e-05, "loss": 0.7875, "step": 8700 }, { "epoch": 2.23, "learning_rate": 3.294317223201194e-05, "loss": 0.7928, "step": 8705 }, { "epoch": 2.23, "learning_rate": 3.284348182714896e-05, "loss": 0.815, "step": 8710 }, { "epoch": 2.23, "learning_rate": 3.274391284318838e-05, "loss": 0.784, "step": 8715 }, { "epoch": 2.23, "learning_rate": 3.264446546015375e-05, "loss": 0.8006, "step": 8720 }, { "epoch": 2.23, "learning_rate": 3.2545139857848814e-05, "loss": 0.7632, "step": 8725 }, { "epoch": 2.23, "learning_rate": 3.2445936215857096e-05, "loss": 0.7857, "step": 8730 }, { "epoch": 2.24, "learning_rate": 3.234685471354155e-05, "loss": 0.7726, "step": 8735 }, { "epoch": 2.24, "learning_rate": 3.224789553004446e-05, "loss": 0.7899, "step": 8740 }, { "epoch": 2.24, "learning_rate": 3.21490588442868e-05, "loss": 0.7997, "step": 8745 }, { "epoch": 2.24, "learning_rate": 3.2070077814146595e-05, "loss": 0.8176, "step": 8750 }, { "epoch": 2.24, "learning_rate": 3.199117538832358e-05, "loss": 0.8142, "step": 8755 }, { "epoch": 2.24, "learning_rate": 3.18926580324486e-05, "loss": 0.7462, "step": 8760 }, { "epoch": 2.24, "learning_rate": 3.179426381659256e-05, "loss": 0.7717, "step": 8765 }, { "epoch": 2.24, "learning_rate": 3.169599291865509e-05, "loss": 0.8044, "step": 8770 }, { "epoch": 2.25, "learning_rate": 3.159784551631273e-05, "loss": 0.7837, "step": 8775 }, { "epoch": 2.25, "learning_rate": 3.149982178701879e-05, "loss": 0.7939, "step": 8780 }, { "epoch": 2.25, "learning_rate": 3.140192190800294e-05, "loss": 0.7561, "step": 8785 }, { "epoch": 2.25, "learning_rate": 3.1304146056271024e-05, "loss": 0.801, "step": 8790 }, { "epoch": 2.25, "learning_rate": 3.120649440860454e-05, "loss": 0.7872, "step": 8795 }, { "epoch": 2.25, "learning_rate": 3.110896714156042e-05, "loss": 0.7874, "step": 8800 }, { "epoch": 2.25, "learning_rate": 3.101156443147082e-05, "loss": 0.7947, "step": 8805 }, { "epoch": 2.26, "learning_rate": 3.091428645444257e-05, "loss": 0.7554, "step": 8810 }, { "epoch": 2.26, "learning_rate": 3.081713338635702e-05, "loss": 0.8199, "step": 8815 }, { "epoch": 2.26, "learning_rate": 3.072010540286978e-05, "loss": 0.8038, "step": 8820 }, { "epoch": 2.26, "learning_rate": 3.062320267941008e-05, "loss": 0.7471, "step": 8825 }, { "epoch": 2.26, "learning_rate": 3.0526425391180925e-05, "loss": 0.7925, "step": 8830 }, { "epoch": 2.26, "learning_rate": 3.0429773713158327e-05, "loss": 0.7586, "step": 8835 }, { "epoch": 2.26, "learning_rate": 3.0333247820091348e-05, "loss": 0.8077, "step": 8840 }, { "epoch": 2.26, "learning_rate": 3.0236847886501542e-05, "loss": 0.7621, "step": 8845 }, { "epoch": 2.27, "learning_rate": 3.0140574086682705e-05, "loss": 0.7724, "step": 8850 }, { "epoch": 2.27, "learning_rate": 3.004442659470067e-05, "loss": 0.762, "step": 8855 }, { "epoch": 2.27, "learning_rate": 2.9967599659583968e-05, "loss": 0.7707, "step": 8860 }, { "epoch": 2.27, "learning_rate": 2.987167995962471e-05, "loss": 0.7806, "step": 8865 }, { "epoch": 2.27, "learning_rate": 2.9775887053670513e-05, "loss": 0.7871, "step": 8870 }, { "epoch": 2.27, "learning_rate": 2.9699344136986406e-05, "loss": 0.8071, "step": 8875 }, { "epoch": 2.27, "learning_rate": 2.9603779896541707e-05, "loss": 0.7619, "step": 8880 }, { "epoch": 2.27, "learning_rate": 2.9508342934473054e-05, "loss": 0.7615, "step": 8885 }, { "epoch": 2.28, "learning_rate": 2.9413033423333092e-05, "loss": 0.8218, "step": 8890 }, { "epoch": 2.28, "learning_rate": 2.931785153544421e-05, "loss": 0.7685, "step": 8895 }, { "epoch": 2.28, "learning_rate": 2.922279744289802e-05, "loss": 0.7681, "step": 8900 }, { "epoch": 2.28, "learning_rate": 2.9127871317554922e-05, "loss": 0.77, "step": 8905 }, { "epoch": 2.28, "learning_rate": 2.903307333104416e-05, "loss": 0.763, "step": 8910 }, { "epoch": 2.28, "learning_rate": 2.893840365476316e-05, "loss": 0.7474, "step": 8915 }, { "epoch": 2.28, "learning_rate": 2.8843862459877368e-05, "loss": 0.7853, "step": 8920 }, { "epoch": 2.28, "learning_rate": 2.8749449917320036e-05, "loss": 0.8067, "step": 8925 }, { "epoch": 2.29, "learning_rate": 2.865516619779166e-05, "loss": 0.8306, "step": 8930 }, { "epoch": 2.29, "learning_rate": 2.856101147175998e-05, "loss": 0.7622, "step": 8935 }, { "epoch": 2.29, "learning_rate": 2.8466985909459376e-05, "loss": 0.7536, "step": 8940 }, { "epoch": 2.29, "learning_rate": 2.8373089680890743e-05, "loss": 0.7628, "step": 8945 }, { "epoch": 2.29, "learning_rate": 2.827932295582121e-05, "loss": 0.764, "step": 8950 }, { "epoch": 2.29, "learning_rate": 2.8185685903783578e-05, "loss": 0.801, "step": 8955 }, { "epoch": 2.29, "learning_rate": 2.809217869407641e-05, "loss": 0.7734, "step": 8960 }, { "epoch": 2.29, "learning_rate": 2.7998801495763383e-05, "loss": 0.8021, "step": 8965 }, { "epoch": 2.3, "learning_rate": 2.790555447767309e-05, "loss": 0.7698, "step": 8970 }, { "epoch": 2.3, "learning_rate": 2.7812437808398883e-05, "loss": 0.7555, "step": 8975 }, { "epoch": 2.3, "learning_rate": 2.7719451656298313e-05, "loss": 0.7925, "step": 8980 }, { "epoch": 2.3, "learning_rate": 2.7626596189492983e-05, "loss": 0.7821, "step": 8985 }, { "epoch": 2.3, "learning_rate": 2.7533871575868276e-05, "loss": 0.7577, "step": 8990 }, { "epoch": 2.3, "learning_rate": 2.744127798307291e-05, "loss": 0.7862, "step": 8995 }, { "epoch": 2.3, "learning_rate": 2.7348815578518762e-05, "loss": 0.7488, "step": 9000 }, { "epoch": 2.3, "eval_loss": 1.5792468786239624, "eval_runtime": 2.0258, "eval_samples_per_second": 49.363, "eval_steps_per_second": 1.975, "step": 9000 }, { "epoch": 2.31, "learning_rate": 2.7256484529380442e-05, "loss": 0.7489, "step": 9005 }, { "epoch": 2.31, "learning_rate": 2.7164285002595213e-05, "loss": 0.7806, "step": 9010 }, { "epoch": 2.31, "learning_rate": 2.70722171648624e-05, "loss": 0.7767, "step": 9015 }, { "epoch": 2.31, "learning_rate": 2.6980281182643252e-05, "loss": 0.7674, "step": 9020 }, { "epoch": 2.31, "learning_rate": 2.6888477222160735e-05, "loss": 0.7475, "step": 9025 }, { "epoch": 2.31, "learning_rate": 2.6796805449398976e-05, "loss": 0.7891, "step": 9030 }, { "epoch": 2.31, "learning_rate": 2.6705266030103128e-05, "loss": 0.779, "step": 9035 }, { "epoch": 2.31, "learning_rate": 2.663212990038998e-05, "loss": 0.7869, "step": 9040 }, { "epoch": 2.32, "learning_rate": 2.654082913424668e-05, "loss": 0.7436, "step": 9045 }, { "epoch": 2.32, "learning_rate": 2.6449661184381836e-05, "loss": 0.8064, "step": 9050 }, { "epoch": 2.32, "learning_rate": 2.635862621562969e-05, "loss": 0.8043, "step": 9055 }, { "epoch": 2.32, "learning_rate": 2.6267724392584014e-05, "loss": 0.806, "step": 9060 }, { "epoch": 2.32, "learning_rate": 2.6176955879597987e-05, "loss": 0.7529, "step": 9065 }, { "epoch": 2.32, "learning_rate": 2.608632084078363e-05, "loss": 0.795, "step": 9070 }, { "epoch": 2.32, "learning_rate": 2.599581944001165e-05, "loss": 0.7276, "step": 9075 }, { "epoch": 2.32, "learning_rate": 2.590545184091122e-05, "loss": 0.7709, "step": 9080 }, { "epoch": 2.33, "learning_rate": 2.5815218206869517e-05, "loss": 0.792, "step": 9085 }, { "epoch": 2.33, "learning_rate": 2.5725118701031492e-05, "loss": 0.7417, "step": 9090 }, { "epoch": 2.33, "learning_rate": 2.563515348629969e-05, "loss": 0.7593, "step": 9095 }, { "epoch": 2.33, "learning_rate": 2.5545322725333755e-05, "loss": 0.7706, "step": 9100 }, { "epoch": 2.33, "learning_rate": 2.545562658055026e-05, "loss": 0.8017, "step": 9105 }, { "epoch": 2.33, "learning_rate": 2.5366065214122392e-05, "loss": 0.7551, "step": 9110 }, { "epoch": 2.33, "learning_rate": 2.527663878797968e-05, "loss": 0.7845, "step": 9115 }, { "epoch": 2.33, "learning_rate": 2.5187347463807653e-05, "loss": 0.775, "step": 9120 }, { "epoch": 2.34, "learning_rate": 2.5098191403047545e-05, "loss": 0.7423, "step": 9125 }, { "epoch": 2.34, "learning_rate": 2.5009170766896108e-05, "loss": 0.7769, "step": 9130 }, { "epoch": 2.34, "learning_rate": 2.4938051871861045e-05, "loss": 0.7546, "step": 9135 }, { "epoch": 2.34, "learning_rate": 2.484927540543911e-05, "loss": 0.7554, "step": 9140 }, { "epoch": 2.34, "learning_rate": 2.4760634813673057e-05, "loss": 0.7791, "step": 9145 }, { "epoch": 2.34, "learning_rate": 2.4672130256827565e-05, "loss": 0.7771, "step": 9150 }, { "epoch": 2.34, "learning_rate": 2.4583761894921463e-05, "loss": 0.7658, "step": 9155 }, { "epoch": 2.34, "learning_rate": 2.4495529887727255e-05, "loss": 0.7632, "step": 9160 }, { "epoch": 2.35, "learning_rate": 2.4407434394770878e-05, "loss": 0.8075, "step": 9165 }, { "epoch": 2.35, "learning_rate": 2.4319475575331586e-05, "loss": 0.7777, "step": 9170 }, { "epoch": 2.35, "learning_rate": 2.4231653588441383e-05, "loss": 0.7945, "step": 9175 }, { "epoch": 2.35, "learning_rate": 2.4143968592884903e-05, "loss": 0.7736, "step": 9180 }, { "epoch": 2.35, "learning_rate": 2.4056420747199215e-05, "loss": 0.7724, "step": 9185 }, { "epoch": 2.35, "learning_rate": 2.3969010209673215e-05, "loss": 0.7701, "step": 9190 }, { "epoch": 2.35, "learning_rate": 2.3881737138347728e-05, "loss": 0.7586, "step": 9195 }, { "epoch": 2.36, "learning_rate": 2.3794601691014896e-05, "loss": 0.7825, "step": 9200 }, { "epoch": 2.36, "learning_rate": 2.3707604025218154e-05, "loss": 0.7607, "step": 9205 }, { "epoch": 2.36, "learning_rate": 2.3620744298251752e-05, "loss": 0.7596, "step": 9210 }, { "epoch": 2.36, "learning_rate": 2.3534022667160516e-05, "loss": 0.7408, "step": 9215 }, { "epoch": 2.36, "learning_rate": 2.3447439288739693e-05, "loss": 0.7688, "step": 9220 }, { "epoch": 2.36, "learning_rate": 2.336099431953448e-05, "loss": 0.7484, "step": 9225 }, { "epoch": 2.36, "learning_rate": 2.32746879158398e-05, "loss": 0.7823, "step": 9230 }, { "epoch": 2.36, "learning_rate": 2.3205742664922004e-05, "loss": 0.7849, "step": 9235 }, { "epoch": 2.37, "learning_rate": 2.3119686072209857e-05, "loss": 0.7696, "step": 9240 }, { "epoch": 2.37, "learning_rate": 2.3033768481300443e-05, "loss": 0.7686, "step": 9245 }, { "epoch": 2.37, "learning_rate": 2.2947990047535307e-05, "loss": 0.7633, "step": 9250 }, { "epoch": 2.37, "learning_rate": 2.2862350926004227e-05, "loss": 0.7678, "step": 9255 }, { "epoch": 2.37, "learning_rate": 2.2776851271545252e-05, "loss": 0.7738, "step": 9260 }, { "epoch": 2.37, "learning_rate": 2.2708552068160115e-05, "loss": 0.7717, "step": 9265 }, { "epoch": 2.37, "learning_rate": 2.2623303843817457e-05, "loss": 0.7829, "step": 9270 }, { "epoch": 2.37, "learning_rate": 2.2538195518750848e-05, "loss": 0.7788, "step": 9275 }, { "epoch": 2.38, "learning_rate": 2.2453227246838538e-05, "loss": 0.8016, "step": 9280 }, { "epoch": 2.38, "learning_rate": 2.2368399181705567e-05, "loss": 0.835, "step": 9285 }, { "epoch": 2.38, "learning_rate": 2.2283711476723544e-05, "loss": 0.729, "step": 9290 }, { "epoch": 2.38, "learning_rate": 2.219916428501021e-05, "loss": 0.7551, "step": 9295 }, { "epoch": 2.38, "learning_rate": 2.211475775942936e-05, "loss": 0.7486, "step": 9300 }, { "epoch": 2.38, "learning_rate": 2.2030492052590368e-05, "loss": 0.7457, "step": 9305 }, { "epoch": 2.38, "learning_rate": 2.1946367316848016e-05, "loss": 0.7443, "step": 9310 }, { "epoch": 2.38, "learning_rate": 2.18623837043023e-05, "loss": 0.7558, "step": 9315 }, { "epoch": 2.39, "learning_rate": 2.1778541366797888e-05, "loss": 0.7406, "step": 9320 }, { "epoch": 2.39, "learning_rate": 2.1694840455924202e-05, "loss": 0.7999, "step": 9325 }, { "epoch": 2.39, "learning_rate": 2.1611281123014858e-05, "loss": 0.7633, "step": 9330 }, { "epoch": 2.39, "learning_rate": 2.1527863519147472e-05, "loss": 0.79, "step": 9335 }, { "epoch": 2.39, "learning_rate": 2.1444587795143535e-05, "loss": 0.7535, "step": 9340 }, { "epoch": 2.39, "learning_rate": 2.136145410156787e-05, "loss": 0.7685, "step": 9345 }, { "epoch": 2.39, "learning_rate": 2.1278462588728575e-05, "loss": 0.7441, "step": 9350 }, { "epoch": 2.39, "learning_rate": 2.1195613406676706e-05, "loss": 0.7542, "step": 9355 }, { "epoch": 2.4, "learning_rate": 2.1112906705205936e-05, "loss": 0.755, "step": 9360 }, { "epoch": 2.4, "learning_rate": 2.1030342633852307e-05, "loss": 0.7443, "step": 9365 }, { "epoch": 2.4, "learning_rate": 2.0947921341894082e-05, "loss": 0.7795, "step": 9370 }, { "epoch": 2.4, "learning_rate": 2.088208720964231e-05, "loss": 0.7806, "step": 9375 }, { "epoch": 2.4, "learning_rate": 2.08163446277606e-05, "loss": 0.7737, "step": 9380 }, { "epoch": 2.4, "learning_rate": 2.0734295258999025e-05, "loss": 0.7439, "step": 9385 }, { "epoch": 2.4, "learning_rate": 2.065238920489496e-05, "loss": 0.7622, "step": 9390 }, { "epoch": 2.4, "learning_rate": 2.0570626613536847e-05, "loss": 0.7953, "step": 9395 }, { "epoch": 2.41, "learning_rate": 2.0489007632753787e-05, "loss": 0.7815, "step": 9400 }, { "epoch": 2.41, "learning_rate": 2.040753241011525e-05, "loss": 0.7419, "step": 9405 }, { "epoch": 2.41, "learning_rate": 2.0326201092930764e-05, "loss": 0.7403, "step": 9410 }, { "epoch": 2.41, "learning_rate": 2.0245013828249625e-05, "loss": 0.7915, "step": 9415 }, { "epoch": 2.41, "learning_rate": 2.0163970762860783e-05, "loss": 0.7418, "step": 9420 }, { "epoch": 2.41, "learning_rate": 2.0083072043292407e-05, "loss": 0.7744, "step": 9425 }, { "epoch": 2.41, "learning_rate": 2.0002317815811645e-05, "loss": 0.7976, "step": 9430 }, { "epoch": 2.42, "learning_rate": 1.9921708226424495e-05, "loss": 0.7541, "step": 9435 }, { "epoch": 2.42, "learning_rate": 1.9841243420875398e-05, "loss": 0.7966, "step": 9440 }, { "epoch": 2.42, "learning_rate": 1.976092354464699e-05, "loss": 0.7762, "step": 9445 }, { "epoch": 2.42, "learning_rate": 1.9680748742959887e-05, "loss": 0.7457, "step": 9450 }, { "epoch": 2.42, "learning_rate": 1.9600719160772473e-05, "loss": 0.7275, "step": 9455 }, { "epoch": 2.42, "learning_rate": 1.9520834942780495e-05, "loss": 0.7617, "step": 9460 }, { "epoch": 2.42, "learning_rate": 1.9457032327675385e-05, "loss": 0.7532, "step": 9465 }, { "epoch": 2.42, "learning_rate": 1.9377410129029406e-05, "loss": 0.7461, "step": 9470 }, { "epoch": 2.43, "learning_rate": 1.9297933698327976e-05, "loss": 0.7516, "step": 9475 }, { "epoch": 2.43, "learning_rate": 1.921860317926675e-05, "loss": 0.7863, "step": 9480 }, { "epoch": 2.43, "learning_rate": 1.9139418715277546e-05, "loss": 0.7553, "step": 9485 }, { "epoch": 2.43, "learning_rate": 1.9060380449528137e-05, "loss": 0.7239, "step": 9490 }, { "epoch": 2.43, "learning_rate": 1.8981488524921996e-05, "loss": 0.7781, "step": 9495 }, { "epoch": 2.43, "learning_rate": 1.8902743084097963e-05, "loss": 0.7677, "step": 9500 }, { "epoch": 2.43, "learning_rate": 1.8824144269429998e-05, "loss": 0.7648, "step": 9505 }, { "epoch": 2.43, "learning_rate": 1.874569222302708e-05, "loss": 0.7279, "step": 9510 }, { "epoch": 2.44, "learning_rate": 1.8667387086732712e-05, "loss": 0.7506, "step": 9515 }, { "epoch": 2.44, "learning_rate": 1.8589229002124786e-05, "loss": 0.7782, "step": 9520 }, { "epoch": 2.44, "learning_rate": 1.8511218110515427e-05, "loss": 0.735, "step": 9525 }, { "epoch": 2.44, "learning_rate": 1.8433354552950456e-05, "loss": 0.7552, "step": 9530 }, { "epoch": 2.44, "learning_rate": 1.835563847020949e-05, "loss": 0.7474, "step": 9535 }, { "epoch": 2.44, "learning_rate": 1.827807000280537e-05, "loss": 0.7508, "step": 9540 }, { "epoch": 2.44, "learning_rate": 1.820064929098414e-05, "loss": 0.7394, "step": 9545 }, { "epoch": 2.44, "learning_rate": 1.812337647472465e-05, "loss": 0.7584, "step": 9550 }, { "epoch": 2.45, "learning_rate": 1.8046251693738327e-05, "loss": 0.7764, "step": 9555 }, { "epoch": 2.45, "learning_rate": 1.796927508746903e-05, "loss": 0.7447, "step": 9560 }, { "epoch": 2.45, "learning_rate": 1.789244679509262e-05, "loss": 0.7588, "step": 9565 }, { "epoch": 2.45, "learning_rate": 1.7815766955516834e-05, "loss": 0.7661, "step": 9570 }, { "epoch": 2.45, "learning_rate": 1.7739235707381064e-05, "loss": 0.7799, "step": 9575 }, { "epoch": 2.45, "learning_rate": 1.766285318905595e-05, "loss": 0.7121, "step": 9580 }, { "epoch": 2.45, "learning_rate": 1.7586619538643258e-05, "loss": 0.7543, "step": 9585 }, { "epoch": 2.45, "learning_rate": 1.7510534893975673e-05, "loss": 0.7647, "step": 9590 }, { "epoch": 2.46, "learning_rate": 1.7434599392616313e-05, "loss": 0.7636, "step": 9595 }, { "epoch": 2.46, "learning_rate": 1.735881317185879e-05, "loss": 0.7493, "step": 9600 }, { "epoch": 2.46, "learning_rate": 1.728317636872674e-05, "loss": 0.7475, "step": 9605 }, { "epoch": 2.46, "learning_rate": 1.7207689119973646e-05, "loss": 0.7704, "step": 9610 }, { "epoch": 2.46, "learning_rate": 1.71474070918498e-05, "loss": 0.7619, "step": 9615 }, { "epoch": 2.46, "learning_rate": 1.7072189384734395e-05, "loss": 0.7394, "step": 9620 }, { "epoch": 2.46, "learning_rate": 1.6997121613468458e-05, "loss": 0.7506, "step": 9625 }, { "epoch": 2.47, "learning_rate": 1.6922203913776714e-05, "loss": 0.7359, "step": 9630 }, { "epoch": 2.47, "learning_rate": 1.6847436421112393e-05, "loss": 0.7857, "step": 9635 }, { "epoch": 2.47, "learning_rate": 1.677281927065735e-05, "loss": 0.7879, "step": 9640 }, { "epoch": 2.47, "learning_rate": 1.6698352597321476e-05, "loss": 0.7077, "step": 9645 }, { "epoch": 2.47, "learning_rate": 1.6624036535742627e-05, "loss": 0.7556, "step": 9650 }, { "epoch": 2.47, "learning_rate": 1.6549871220286418e-05, "loss": 0.7445, "step": 9655 }, { "epoch": 2.47, "learning_rate": 1.6475856785045794e-05, "loss": 0.748, "step": 9660 }, { "epoch": 2.47, "learning_rate": 1.6401993363841038e-05, "loss": 0.727, "step": 9665 }, { "epoch": 2.48, "learning_rate": 1.6328281090219277e-05, "loss": 0.7457, "step": 9670 }, { "epoch": 2.48, "learning_rate": 1.62547200974544e-05, "loss": 0.7393, "step": 9675 }, { "epoch": 2.48, "learning_rate": 1.6181310518546856e-05, "loss": 0.7839, "step": 9680 }, { "epoch": 2.48, "learning_rate": 1.6108052486223147e-05, "loss": 0.7547, "step": 9685 }, { "epoch": 2.48, "learning_rate": 1.6034946132935946e-05, "loss": 0.7612, "step": 9690 }, { "epoch": 2.48, "learning_rate": 1.5961991590863678e-05, "loss": 0.732, "step": 9695 }, { "epoch": 2.48, "learning_rate": 1.588918899191013e-05, "loss": 0.7343, "step": 9700 }, { "epoch": 2.48, "learning_rate": 1.5816538467704567e-05, "loss": 0.7509, "step": 9705 }, { "epoch": 2.49, "learning_rate": 1.574404014960117e-05, "loss": 0.8046, "step": 9710 }, { "epoch": 2.49, "learning_rate": 1.5671694168678962e-05, "loss": 0.7523, "step": 9715 }, { "epoch": 2.49, "learning_rate": 1.561392715462098e-05, "loss": 0.757, "step": 9720 }, { "epoch": 2.49, "learning_rate": 1.5541855710064756e-05, "loss": 0.7218, "step": 9725 }, { "epoch": 2.49, "learning_rate": 1.5469936968245013e-05, "loss": 0.7728, "step": 9730 }, { "epoch": 2.49, "learning_rate": 1.5398171059192856e-05, "loss": 0.7402, "step": 9735 }, { "epoch": 2.49, "learning_rate": 1.532655811266307e-05, "loss": 0.722, "step": 9740 }, { "epoch": 2.49, "learning_rate": 1.5255098258133948e-05, "loss": 0.7417, "step": 9745 }, { "epoch": 2.5, "learning_rate": 1.5198040687583803e-05, "loss": 0.7544, "step": 9750 }, { "epoch": 2.5, "learning_rate": 1.512685672405737e-05, "loss": 0.7361, "step": 9755 }, { "epoch": 2.5, "learning_rate": 1.5055826213597391e-05, "loss": 0.7386, "step": 9760 }, { "epoch": 2.5, "learning_rate": 1.4984949284629046e-05, "loss": 0.7634, "step": 9765 }, { "epoch": 2.5, "learning_rate": 1.4914226065299886e-05, "loss": 0.7345, "step": 9770 }, { "epoch": 2.5, "learning_rate": 1.48436566834795e-05, "loss": 0.7916, "step": 9775 }, { "epoch": 2.5, "learning_rate": 1.4773241266759296e-05, "loss": 0.7199, "step": 9780 }, { "epoch": 2.5, "learning_rate": 1.4702979942452432e-05, "loss": 0.7367, "step": 9785 }, { "epoch": 2.51, "learning_rate": 1.4632872837593348e-05, "loss": 0.7473, "step": 9790 }, { "epoch": 2.51, "learning_rate": 1.4562920078937669e-05, "loss": 0.7712, "step": 9795 }, { "epoch": 2.51, "learning_rate": 1.4493121792962027e-05, "loss": 0.7526, "step": 9800 }, { "epoch": 2.51, "learning_rate": 1.4423478105863707e-05, "loss": 0.7523, "step": 9805 }, { "epoch": 2.51, "learning_rate": 1.4353989143560475e-05, "loss": 0.7577, "step": 9810 }, { "epoch": 2.51, "learning_rate": 1.428465503169033e-05, "loss": 0.7695, "step": 9815 }, { "epoch": 2.51, "learning_rate": 1.4215475895611374e-05, "loss": 0.7761, "step": 9820 }, { "epoch": 2.52, "learning_rate": 1.4146451860401445e-05, "loss": 0.7433, "step": 9825 }, { "epoch": 2.52, "learning_rate": 1.4077583050857934e-05, "loss": 0.7675, "step": 9830 }, { "epoch": 2.52, "learning_rate": 1.4008869591497665e-05, "loss": 0.7452, "step": 9835 }, { "epoch": 2.52, "learning_rate": 1.3940311606556522e-05, "loss": 0.7225, "step": 9840 }, { "epoch": 2.52, "learning_rate": 1.3871909219989266e-05, "loss": 0.7435, "step": 9845 }, { "epoch": 2.52, "learning_rate": 1.3803662555469422e-05, "loss": 0.7199, "step": 9850 }, { "epoch": 2.52, "learning_rate": 1.3749177426656478e-05, "loss": 0.7484, "step": 9855 }, { "epoch": 2.52, "learning_rate": 1.3681211372580205e-05, "loss": 0.7479, "step": 9860 }, { "epoch": 2.53, "learning_rate": 1.3613401385338464e-05, "loss": 0.7475, "step": 9865 }, { "epoch": 2.53, "learning_rate": 1.354574758753363e-05, "loss": 0.727, "step": 9870 }, { "epoch": 2.53, "learning_rate": 1.3478250101485746e-05, "loss": 0.7324, "step": 9875 }, { "epoch": 2.53, "learning_rate": 1.3410909049232167e-05, "loss": 0.6984, "step": 9880 }, { "epoch": 2.53, "learning_rate": 1.3343724552527414e-05, "loss": 0.7242, "step": 9885 }, { "epoch": 2.53, "learning_rate": 1.327669673284303e-05, "loss": 0.7516, "step": 9890 }, { "epoch": 2.53, "learning_rate": 1.3209825711367197e-05, "loss": 0.7372, "step": 9895 }, { "epoch": 2.53, "learning_rate": 1.3156441870154191e-05, "loss": 0.7671, "step": 9900 }, { "epoch": 2.54, "learning_rate": 1.3089853389942986e-05, "loss": 0.7611, "step": 9905 }, { "epoch": 2.54, "learning_rate": 1.3023422045758415e-05, "loss": 0.7429, "step": 9910 }, { "epoch": 2.54, "learning_rate": 1.2957147957710292e-05, "loss": 0.792, "step": 9915 }, { "epoch": 2.54, "learning_rate": 1.2891031245624019e-05, "loss": 0.7879, "step": 9920 }, { "epoch": 2.54, "learning_rate": 1.2825072029040496e-05, "loss": 0.7011, "step": 9925 }, { "epoch": 2.54, "learning_rate": 1.2759270427215942e-05, "loss": 0.7574, "step": 9930 }, { "epoch": 2.54, "learning_rate": 1.2693626559121418e-05, "loss": 0.74, "step": 9935 }, { "epoch": 2.54, "learning_rate": 1.2628140543443001e-05, "loss": 0.7096, "step": 9940 }, { "epoch": 2.55, "learning_rate": 1.2562812498581212e-05, "loss": 0.7617, "step": 9945 }, { "epoch": 2.55, "learning_rate": 1.2510663881062656e-05, "loss": 0.7322, "step": 9950 }, { "epoch": 2.55, "learning_rate": 1.2445620481128573e-05, "loss": 0.7158, "step": 9955 }, { "epoch": 2.55, "learning_rate": 1.2380735382012576e-05, "loss": 0.761, "step": 9960 }, { "epoch": 2.55, "learning_rate": 1.2316008701028847e-05, "loss": 0.7614, "step": 9965 }, { "epoch": 2.55, "learning_rate": 1.2251440555205106e-05, "loss": 0.7601, "step": 9970 }, { "epoch": 2.55, "learning_rate": 1.2187031061282327e-05, "loss": 0.7738, "step": 9975 }, { "epoch": 2.55, "learning_rate": 1.2122780335714757e-05, "loss": 0.7127, "step": 9980 }, { "epoch": 2.56, "learning_rate": 1.2058688494669546e-05, "loss": 0.7816, "step": 9985 }, { "epoch": 2.56, "learning_rate": 1.1994755654026535e-05, "loss": 0.7538, "step": 9990 }, { "epoch": 2.56, "learning_rate": 1.1930981929378171e-05, "loss": 0.7696, "step": 9995 }, { "epoch": 2.56, "learning_rate": 1.1867367436029131e-05, "loss": 0.7597, "step": 10000 }, { "epoch": 2.56, "eval_loss": 1.5886551141738892, "eval_runtime": 2.0259, "eval_samples_per_second": 49.36, "eval_steps_per_second": 1.974, "step": 10000 }, { "epoch": 2.56, "learning_rate": 1.1803912288996266e-05, "loss": 0.7156, "step": 10005 }, { "epoch": 2.56, "learning_rate": 1.1740616603008293e-05, "loss": 0.7169, "step": 10010 }, { "epoch": 2.56, "learning_rate": 1.1677480492505588e-05, "loss": 0.718, "step": 10015 }, { "epoch": 2.56, "learning_rate": 1.1614504071640098e-05, "loss": 0.7268, "step": 10020 }, { "epoch": 2.57, "learning_rate": 1.1551687454274906e-05, "loss": 0.7453, "step": 10025 }, { "epoch": 2.57, "learning_rate": 1.148903075398431e-05, "loss": 0.8059, "step": 10030 }, { "epoch": 2.57, "learning_rate": 1.1426534084053442e-05, "loss": 0.7176, "step": 10035 }, { "epoch": 2.57, "learning_rate": 1.1364197557477974e-05, "loss": 0.7775, "step": 10040 }, { "epoch": 2.57, "learning_rate": 1.1302021286964204e-05, "loss": 0.7312, "step": 10045 }, { "epoch": 2.57, "learning_rate": 1.1240005384928576e-05, "loss": 0.7386, "step": 10050 }, { "epoch": 2.57, "learning_rate": 1.1178149963497587e-05, "loss": 0.7418, "step": 10055 }, { "epoch": 2.58, "learning_rate": 1.1116455134507664e-05, "loss": 0.7339, "step": 10060 }, { "epoch": 2.58, "learning_rate": 1.105492100950477e-05, "loss": 0.7518, "step": 10065 }, { "epoch": 2.58, "learning_rate": 1.0993547699744366e-05, "loss": 0.743, "step": 10070 }, { "epoch": 2.58, "learning_rate": 1.0932335316191178e-05, "loss": 0.7235, "step": 10075 }, { "epoch": 2.58, "learning_rate": 1.0871283969518908e-05, "loss": 0.7435, "step": 10080 }, { "epoch": 2.58, "learning_rate": 1.0810393770110184e-05, "loss": 0.7282, "step": 10085 }, { "epoch": 2.58, "learning_rate": 1.074966482805615e-05, "loss": 0.6981, "step": 10090 }, { "epoch": 2.58, "learning_rate": 1.0689097253156532e-05, "loss": 0.754, "step": 10095 }, { "epoch": 2.59, "learning_rate": 1.0628691154919213e-05, "loss": 0.7308, "step": 10100 }, { "epoch": 2.59, "learning_rate": 1.0568446642560115e-05, "loss": 0.726, "step": 10105 }, { "epoch": 2.59, "learning_rate": 1.0508363825003087e-05, "loss": 0.7712, "step": 10110 }, { "epoch": 2.59, "learning_rate": 1.0448442810879555e-05, "loss": 0.7413, "step": 10115 }, { "epoch": 2.59, "learning_rate": 1.0388683708528413e-05, "loss": 0.7368, "step": 10120 }, { "epoch": 2.59, "learning_rate": 1.0329086625995843e-05, "loss": 0.7391, "step": 10125 }, { "epoch": 2.59, "learning_rate": 1.0269651671035063e-05, "loss": 0.7216, "step": 10130 }, { "epoch": 2.59, "learning_rate": 1.021037895110617e-05, "loss": 0.7444, "step": 10135 }, { "epoch": 2.6, "learning_rate": 1.0151268573375916e-05, "loss": 0.7729, "step": 10140 }, { "epoch": 2.6, "learning_rate": 1.009232064471759e-05, "loss": 0.7482, "step": 10145 }, { "epoch": 2.6, "learning_rate": 1.0033535271710714e-05, "loss": 0.7665, "step": 10150 }, { "epoch": 2.6, "learning_rate": 9.974912560640892e-06, "loss": 0.7547, "step": 10155 }, { "epoch": 2.6, "learning_rate": 9.92813157961594e-06, "loss": 0.7654, "step": 10160 }, { "epoch": 2.6, "learning_rate": 9.869801926933786e-06, "loss": 0.7369, "step": 10165 }, { "epoch": 2.6, "learning_rate": 9.811635232223281e-06, "loss": 0.7335, "step": 10170 }, { "epoch": 2.6, "learning_rate": 9.753631600651458e-06, "loss": 0.723, "step": 10175 }, { "epoch": 2.61, "learning_rate": 9.695791137090526e-06, "loss": 0.7534, "step": 10180 }, { "epoch": 2.61, "learning_rate": 9.638113946117722e-06, "loss": 0.7334, "step": 10185 }, { "epoch": 2.61, "learning_rate": 9.580600132015061e-06, "loss": 0.7401, "step": 10190 }, { "epoch": 2.61, "learning_rate": 9.534706781968339e-06, "loss": 0.7238, "step": 10195 }, { "epoch": 2.61, "learning_rate": 9.477487308079525e-06, "loss": 0.7591, "step": 10200 }, { "epoch": 2.61, "learning_rate": 9.420431501478411e-06, "loss": 0.7494, "step": 10205 }, { "epoch": 2.61, "learning_rate": 9.3635394653235e-06, "loss": 0.7431, "step": 10210 }, { "epoch": 2.61, "learning_rate": 9.318143820254332e-06, "loss": 0.7849, "step": 10215 }, { "epoch": 2.62, "learning_rate": 9.26154682991679e-06, "loss": 0.7534, "step": 10220 }, { "epoch": 2.62, "learning_rate": 9.20511389729345e-06, "loss": 0.7657, "step": 10225 }, { "epoch": 2.62, "learning_rate": 9.148845124416717e-06, "loss": 0.7238, "step": 10230 }, { "epoch": 2.62, "learning_rate": 9.092740613022099e-06, "loss": 0.7601, "step": 10235 }, { "epoch": 2.62, "learning_rate": 9.036800464548157e-06, "loss": 0.7155, "step": 10240 }, { "epoch": 2.62, "learning_rate": 8.981024780136304e-06, "loss": 0.7555, "step": 10245 }, { "epoch": 2.62, "learning_rate": 8.925413660630543e-06, "loss": 0.7489, "step": 10250 }, { "epoch": 2.63, "learning_rate": 8.869967206577378e-06, "loss": 0.7233, "step": 10255 }, { "epoch": 2.63, "learning_rate": 8.814685518225551e-06, "loss": 0.7043, "step": 10260 }, { "epoch": 2.63, "learning_rate": 8.759568695525988e-06, "loss": 0.7395, "step": 10265 }, { "epoch": 2.63, "learning_rate": 8.70461683813144e-06, "loss": 0.7285, "step": 10270 }, { "epoch": 2.63, "learning_rate": 8.649830045396435e-06, "loss": 0.7548, "step": 10275 }, { "epoch": 2.63, "learning_rate": 8.595208416377076e-06, "loss": 0.7466, "step": 10280 }, { "epoch": 2.63, "learning_rate": 8.540752049830825e-06, "loss": 0.7975, "step": 10285 }, { "epoch": 2.63, "learning_rate": 8.48646104421632e-06, "loss": 0.7699, "step": 10290 }, { "epoch": 2.64, "learning_rate": 8.4323354976933e-06, "loss": 0.7496, "step": 10295 }, { "epoch": 2.64, "learning_rate": 8.378375508122249e-06, "loss": 0.7521, "step": 10300 }, { "epoch": 2.64, "learning_rate": 8.324581173064394e-06, "loss": 0.7499, "step": 10305 }, { "epoch": 2.64, "learning_rate": 8.27095258978141e-06, "loss": 0.7918, "step": 10310 }, { "epoch": 2.64, "learning_rate": 8.217489855235339e-06, "loss": 0.7617, "step": 10315 }, { "epoch": 2.64, "learning_rate": 8.16419306608832e-06, "loss": 0.7163, "step": 10320 }, { "epoch": 2.64, "learning_rate": 8.111062318702433e-06, "loss": 0.7593, "step": 10325 }, { "epoch": 2.64, "learning_rate": 8.058097709139644e-06, "loss": 0.7391, "step": 10330 }, { "epoch": 2.65, "learning_rate": 8.005299333161453e-06, "loss": 0.7214, "step": 10335 }, { "epoch": 2.65, "learning_rate": 7.95266728622881e-06, "loss": 0.7695, "step": 10340 }, { "epoch": 2.65, "learning_rate": 7.900201663502016e-06, "loss": 0.7539, "step": 10345 }, { "epoch": 2.65, "learning_rate": 7.847902559840337e-06, "loss": 0.7683, "step": 10350 }, { "epoch": 2.65, "learning_rate": 7.795770069802088e-06, "loss": 0.7519, "step": 10355 }, { "epoch": 2.65, "learning_rate": 7.74380428764433e-06, "loss": 0.7332, "step": 10360 }, { "epoch": 2.65, "learning_rate": 7.692005307322626e-06, "loss": 0.7603, "step": 10365 }, { "epoch": 2.65, "learning_rate": 7.640373222491037e-06, "loss": 0.7235, "step": 10370 }, { "epoch": 2.66, "learning_rate": 7.588908126501859e-06, "loss": 0.7551, "step": 10375 }, { "epoch": 2.66, "learning_rate": 7.5376101124054136e-06, "loss": 0.702, "step": 10380 }, { "epoch": 2.66, "learning_rate": 7.48647927295002e-06, "loss": 0.7501, "step": 10385 }, { "epoch": 2.66, "learning_rate": 7.4355157005816634e-06, "loss": 0.7299, "step": 10390 }, { "epoch": 2.66, "learning_rate": 7.38471948744397e-06, "loss": 0.7368, "step": 10395 }, { "epoch": 2.66, "learning_rate": 7.334090725377918e-06, "loss": 0.75, "step": 10400 }, { "epoch": 2.66, "learning_rate": 7.283629505921752e-06, "loss": 0.7416, "step": 10405 }, { "epoch": 2.66, "learning_rate": 7.233335920310835e-06, "loss": 0.695, "step": 10410 }, { "epoch": 2.67, "learning_rate": 7.1832100594773525e-06, "loss": 0.6971, "step": 10415 }, { "epoch": 2.67, "learning_rate": 7.133252014050318e-06, "loss": 0.7392, "step": 10420 }, { "epoch": 2.67, "learning_rate": 7.083461874355335e-06, "loss": 0.7318, "step": 10425 }, { "epoch": 2.67, "learning_rate": 7.033839730414337e-06, "loss": 0.7247, "step": 10430 }, { "epoch": 2.67, "learning_rate": 6.984385671945626e-06, "loss": 0.701, "step": 10435 }, { "epoch": 2.67, "learning_rate": 6.935099788363531e-06, "loss": 0.7493, "step": 10440 }, { "epoch": 2.67, "learning_rate": 6.895792227308528e-06, "loss": 0.7523, "step": 10445 }, { "epoch": 2.68, "learning_rate": 6.8565924007017e-06, "loss": 0.7656, "step": 10450 }, { "epoch": 2.68, "learning_rate": 6.807744188101539e-06, "loss": 0.7349, "step": 10455 }, { "epoch": 2.68, "learning_rate": 6.759064469760823e-06, "loss": 0.7774, "step": 10460 }, { "epoch": 2.68, "learning_rate": 6.710553333693903e-06, "loss": 0.7456, "step": 10465 }, { "epoch": 2.68, "learning_rate": 6.662210867610297e-06, "loss": 0.7106, "step": 10470 }, { "epoch": 2.68, "learning_rate": 6.614037158914532e-06, "loss": 0.7537, "step": 10475 }, { "epoch": 2.68, "learning_rate": 6.5660322947060925e-06, "loss": 0.7384, "step": 10480 }, { "epoch": 2.68, "learning_rate": 6.5181963617791275e-06, "loss": 0.7542, "step": 10485 }, { "epoch": 2.69, "learning_rate": 6.470529446622353e-06, "loss": 0.7185, "step": 10490 }, { "epoch": 2.69, "learning_rate": 6.4230316354189195e-06, "loss": 0.7029, "step": 10495 }, { "epoch": 2.69, "learning_rate": 6.385155199022474e-06, "loss": 0.7254, "step": 10500 }, { "epoch": 2.69, "learning_rate": 6.337961991140551e-06, "loss": 0.7451, "step": 10505 }, { "epoch": 2.69, "learning_rate": 6.290938126897728e-06, "loss": 0.7559, "step": 10510 }, { "epoch": 2.69, "learning_rate": 6.244083691314484e-06, "loss": 0.7787, "step": 10515 }, { "epoch": 2.69, "learning_rate": 6.197398769104967e-06, "loss": 0.7704, "step": 10520 }, { "epoch": 2.69, "learning_rate": 6.1508834446768695e-06, "loss": 0.748, "step": 10525 }, { "epoch": 2.7, "learning_rate": 6.104537802131216e-06, "loss": 0.7112, "step": 10530 }, { "epoch": 2.7, "learning_rate": 6.058361925262235e-06, "loss": 0.7346, "step": 10535 }, { "epoch": 2.7, "learning_rate": 6.0123558975572645e-06, "loss": 0.7387, "step": 10540 }, { "epoch": 2.7, "learning_rate": 5.9665198021965015e-06, "loss": 0.7576, "step": 10545 }, { "epoch": 2.7, "learning_rate": 5.920853722052899e-06, "loss": 0.7444, "step": 10550 }, { "epoch": 2.7, "learning_rate": 5.875357739692066e-06, "loss": 0.723, "step": 10555 }, { "epoch": 2.7, "learning_rate": 5.830031937372004e-06, "loss": 0.7478, "step": 10560 }, { "epoch": 2.7, "learning_rate": 5.784876397043071e-06, "loss": 0.7321, "step": 10565 }, { "epoch": 2.71, "learning_rate": 5.739891200347791e-06, "loss": 0.7317, "step": 10570 }, { "epoch": 2.71, "learning_rate": 5.69507642862066e-06, "loss": 0.7677, "step": 10575 }, { "epoch": 2.71, "learning_rate": 5.650432162888087e-06, "loss": 0.7331, "step": 10580 }, { "epoch": 2.71, "learning_rate": 5.605958483868135e-06, "loss": 0.7325, "step": 10585 }, { "epoch": 2.71, "learning_rate": 5.561655471970539e-06, "loss": 0.7291, "step": 10590 }, { "epoch": 2.71, "learning_rate": 5.517523207296382e-06, "loss": 0.7529, "step": 10595 }, { "epoch": 2.71, "learning_rate": 5.473561769638058e-06, "loss": 0.7533, "step": 10600 }, { "epoch": 2.71, "learning_rate": 5.429771238479109e-06, "loss": 0.7306, "step": 10605 }, { "epoch": 2.72, "learning_rate": 5.3861516929940794e-06, "loss": 0.7554, "step": 10610 }, { "epoch": 2.72, "learning_rate": 5.35137921929918e-06, "loss": 0.764, "step": 10615 }, { "epoch": 2.72, "learning_rate": 5.3080676465600885e-06, "loss": 0.7494, "step": 10620 }, { "epoch": 2.72, "learning_rate": 5.264927279538467e-06, "loss": 0.7426, "step": 10625 }, { "epoch": 2.72, "learning_rate": 5.221958196233345e-06, "loss": 0.749, "step": 10630 }, { "epoch": 2.72, "learning_rate": 5.179160474334043e-06, "loss": 0.7352, "step": 10635 }, { "epoch": 2.72, "learning_rate": 5.136534191220044e-06, "loss": 0.7363, "step": 10640 }, { "epoch": 2.72, "learning_rate": 5.094079423960918e-06, "loss": 0.7293, "step": 10645 }, { "epoch": 2.73, "learning_rate": 5.051796249316076e-06, "loss": 0.7097, "step": 10650 }, { "epoch": 2.73, "learning_rate": 5.00968474373471e-06, "loss": 0.7339, "step": 10655 }, { "epoch": 2.73, "learning_rate": 4.967744983355638e-06, "loss": 0.7604, "step": 10660 }, { "epoch": 2.73, "learning_rate": 4.925977044007113e-06, "loss": 0.7612, "step": 10665 }, { "epoch": 2.73, "learning_rate": 4.884381001206795e-06, "loss": 0.7062, "step": 10670 }, { "epoch": 2.73, "learning_rate": 4.8429569301614795e-06, "loss": 0.759, "step": 10675 }, { "epoch": 2.73, "learning_rate": 4.8017049057671124e-06, "loss": 0.735, "step": 10680 }, { "epoch": 2.74, "learning_rate": 4.76062500260851e-06, "loss": 0.7663, "step": 10685 }, { "epoch": 2.74, "learning_rate": 4.719717294959303e-06, "loss": 0.7351, "step": 10690 }, { "epoch": 2.74, "learning_rate": 4.678981856781805e-06, "loss": 0.7365, "step": 10695 }, { "epoch": 2.74, "learning_rate": 4.638418761726859e-06, "loss": 0.748, "step": 10700 }, { "epoch": 2.74, "learning_rate": 4.598028083133676e-06, "loss": 0.7681, "step": 10705 }, { "epoch": 2.74, "learning_rate": 4.557809894029807e-06, "loss": 0.7592, "step": 10710 }, { "epoch": 2.74, "learning_rate": 4.517764267130808e-06, "loss": 0.7178, "step": 10715 }, { "epoch": 2.74, "learning_rate": 4.477891274840362e-06, "loss": 0.7166, "step": 10720 }, { "epoch": 2.75, "learning_rate": 4.438190989250002e-06, "loss": 0.7478, "step": 10725 }, { "epoch": 2.75, "learning_rate": 4.40655515784788e-06, "loss": 0.7539, "step": 10730 }, { "epoch": 2.75, "learning_rate": 4.367165924991401e-06, "loss": 0.7454, "step": 10735 }, { "epoch": 2.75, "learning_rate": 4.327949599029568e-06, "loss": 0.7385, "step": 10740 }, { "epoch": 2.75, "learning_rate": 4.288906250866609e-06, "loss": 0.7159, "step": 10745 }, { "epoch": 2.75, "learning_rate": 4.250035951094056e-06, "loss": 0.7283, "step": 10750 }, { "epoch": 2.75, "learning_rate": 4.21133876999047e-06, "loss": 0.7306, "step": 10755 }, { "epoch": 2.75, "learning_rate": 4.172814777521483e-06, "loss": 0.7292, "step": 10760 }, { "epoch": 2.76, "learning_rate": 4.134464043339592e-06, "loss": 0.7363, "step": 10765 }, { "epoch": 2.76, "learning_rate": 4.103908248567223e-06, "loss": 0.7536, "step": 10770 }, { "epoch": 2.76, "learning_rate": 4.065869553826307e-06, "loss": 0.7517, "step": 10775 }, { "epoch": 2.76, "learning_rate": 4.028004310732491e-06, "loss": 0.7423, "step": 10780 }, { "epoch": 2.76, "learning_rate": 3.99031258774718e-06, "loss": 0.7507, "step": 10785 }, { "epoch": 2.76, "learning_rate": 3.952794453018149e-06, "loss": 0.7566, "step": 10790 }, { "epoch": 2.76, "learning_rate": 3.915449974379204e-06, "loss": 0.7348, "step": 10795 }, { "epoch": 2.76, "learning_rate": 3.878279219350245e-06, "loss": 0.7017, "step": 10800 }, { "epoch": 2.77, "learning_rate": 3.84128225513708e-06, "loss": 0.7381, "step": 10805 }, { "epoch": 2.77, "learning_rate": 3.804459148631234e-06, "loss": 0.7061, "step": 10810 }, { "epoch": 2.77, "learning_rate": 3.767809966409963e-06, "loss": 0.7511, "step": 10815 }, { "epoch": 2.77, "learning_rate": 3.7313347747360393e-06, "loss": 0.7428, "step": 10820 }, { "epoch": 2.77, "learning_rate": 3.6950336395576322e-06, "loss": 0.7484, "step": 10825 }, { "epoch": 2.77, "learning_rate": 3.6589066265082404e-06, "loss": 0.7195, "step": 10830 }, { "epoch": 2.77, "learning_rate": 3.6229538009065476e-06, "loss": 0.7526, "step": 10835 }, { "epoch": 2.77, "learning_rate": 3.5871752277562566e-06, "loss": 0.7134, "step": 10840 }, { "epoch": 2.78, "learning_rate": 3.5515709717460876e-06, "loss": 0.6897, "step": 10845 }, { "epoch": 2.78, "learning_rate": 3.5161410972495147e-06, "loss": 0.7358, "step": 10850 }, { "epoch": 2.78, "learning_rate": 3.480885668324807e-06, "loss": 0.7443, "step": 10855 }, { "epoch": 2.78, "learning_rate": 3.4458047487147315e-06, "loss": 0.7133, "step": 10860 }, { "epoch": 2.78, "learning_rate": 3.4108984018466406e-06, "loss": 0.742, "step": 10865 }, { "epoch": 2.78, "learning_rate": 3.3761666908321942e-06, "loss": 0.7426, "step": 10870 }, { "epoch": 2.78, "learning_rate": 3.3416096784672836e-06, "loss": 0.7388, "step": 10875 }, { "epoch": 2.79, "learning_rate": 3.3140898936002806e-06, "loss": 0.763, "step": 10880 }, { "epoch": 2.79, "learning_rate": 3.286681987706497e-06, "loss": 0.7064, "step": 10885 }, { "epoch": 2.79, "learning_rate": 3.2525794834484593e-06, "loss": 0.7621, "step": 10890 }, { "epoch": 2.79, "learning_rate": 3.2186519012891827e-06, "loss": 0.7613, "step": 10895 }, { "epoch": 2.79, "learning_rate": 3.1848993025707206e-06, "loss": 0.7191, "step": 10900 }, { "epoch": 2.79, "learning_rate": 3.151321748318692e-06, "loss": 0.7314, "step": 10905 }, { "epoch": 2.79, "learning_rate": 3.1179192992423332e-06, "loss": 0.691, "step": 10910 }, { "epoch": 2.79, "learning_rate": 3.091323456302142e-06, "loss": 0.7511, "step": 10915 }, { "epoch": 2.8, "learning_rate": 3.058236348518406e-06, "loss": 0.7268, "step": 10920 }, { "epoch": 2.8, "learning_rate": 3.0253245142113807e-06, "loss": 0.7421, "step": 10925 }, { "epoch": 2.8, "learning_rate": 2.9925880128865657e-06, "loss": 0.7441, "step": 10930 }, { "epoch": 2.8, "learning_rate": 2.9600269037325377e-06, "loss": 0.7378, "step": 10935 }, { "epoch": 2.8, "learning_rate": 2.9276412456206826e-06, "loss": 0.7531, "step": 10940 }, { "epoch": 2.8, "learning_rate": 2.8954310971051946e-06, "loss": 0.7232, "step": 10945 }, { "epoch": 2.8, "learning_rate": 2.8633965164229672e-06, "loss": 0.7461, "step": 10950 }, { "epoch": 2.8, "learning_rate": 2.8315375614934137e-06, "loss": 0.6969, "step": 10955 }, { "epoch": 2.81, "learning_rate": 2.7998542899184355e-06, "loss": 0.7016, "step": 10960 }, { "epoch": 2.81, "learning_rate": 2.7683467589822874e-06, "loss": 0.7524, "step": 10965 }, { "epoch": 2.81, "learning_rate": 2.7370150256515127e-06, "loss": 0.7248, "step": 10970 }, { "epoch": 2.81, "learning_rate": 2.7058591465747517e-06, "loss": 0.7473, "step": 10975 }, { "epoch": 2.81, "learning_rate": 2.6748791780827344e-06, "loss": 0.7665, "step": 10980 }, { "epoch": 2.81, "learning_rate": 2.6440751761881323e-06, "loss": 0.713, "step": 10985 }, { "epoch": 2.81, "learning_rate": 2.61344719658545e-06, "loss": 0.7783, "step": 10990 }, { "epoch": 2.81, "learning_rate": 2.5829952946509585e-06, "loss": 0.7176, "step": 10995 }, { "epoch": 2.82, "learning_rate": 2.5527195254425596e-06, "loss": 0.715, "step": 11000 }, { "epoch": 2.82, "eval_loss": 1.5941790342330933, "eval_runtime": 2.0287, "eval_samples_per_second": 49.291, "eval_steps_per_second": 1.972, "step": 11000 }, { "epoch": 2.82, "learning_rate": 2.5226199436997113e-06, "loss": 0.7453, "step": 11005 }, { "epoch": 2.82, "learning_rate": 2.4926966038433254e-06, "loss": 0.7323, "step": 11010 }, { "epoch": 2.82, "learning_rate": 2.462949559975625e-06, "loss": 0.734, "step": 11015 }, { "epoch": 2.82, "learning_rate": 2.433378865880154e-06, "loss": 0.7219, "step": 11020 }, { "epoch": 2.82, "learning_rate": 2.403984575021556e-06, "loss": 0.7302, "step": 11025 }, { "epoch": 2.82, "learning_rate": 2.374766740545531e-06, "loss": 0.7524, "step": 11030 }, { "epoch": 2.82, "learning_rate": 2.3457254152787766e-06, "loss": 0.7725, "step": 11035 }, { "epoch": 2.83, "learning_rate": 2.316860651728836e-06, "loss": 0.7354, "step": 11040 }, { "epoch": 2.83, "learning_rate": 2.2881725020840184e-06, "loss": 0.7248, "step": 11045 }, { "epoch": 2.83, "learning_rate": 2.259661018213333e-06, "loss": 0.7433, "step": 11050 }, { "epoch": 2.83, "learning_rate": 2.2313262516663436e-06, "loss": 0.7765, "step": 11055 }, { "epoch": 2.83, "learning_rate": 2.2031682536731158e-06, "loss": 0.7678, "step": 11060 }, { "epoch": 2.83, "learning_rate": 2.175187075144136e-06, "loss": 0.6898, "step": 11065 }, { "epoch": 2.83, "learning_rate": 2.1473827666701364e-06, "loss": 0.7526, "step": 11070 }, { "epoch": 2.84, "learning_rate": 2.119755378522137e-06, "loss": 0.7479, "step": 11075 }, { "epoch": 2.84, "learning_rate": 2.0923049606512147e-06, "loss": 0.7417, "step": 11080 }, { "epoch": 2.84, "learning_rate": 2.0650315626885243e-06, "loss": 0.733, "step": 11085 }, { "epoch": 2.84, "learning_rate": 2.037935233945154e-06, "loss": 0.7329, "step": 11090 }, { "epoch": 2.84, "learning_rate": 2.0110160234120156e-06, "loss": 0.752, "step": 11095 }, { "epoch": 2.84, "learning_rate": 1.984273979759843e-06, "loss": 0.754, "step": 11100 }, { "epoch": 2.84, "learning_rate": 1.9577091513389933e-06, "loss": 0.7595, "step": 11105 }, { "epoch": 2.84, "learning_rate": 1.9313215861794352e-06, "loss": 0.7345, "step": 11110 }, { "epoch": 2.85, "learning_rate": 1.905111331990661e-06, "loss": 0.7556, "step": 11115 }, { "epoch": 2.85, "learning_rate": 1.8790784361615189e-06, "loss": 0.7045, "step": 11120 }, { "epoch": 2.85, "learning_rate": 1.8583798491577876e-06, "loss": 0.7597, "step": 11125 }, { "epoch": 2.85, "learning_rate": 1.8326663167724024e-06, "loss": 0.7237, "step": 11130 }, { "epoch": 2.85, "learning_rate": 1.8071302737293295e-06, "loss": 0.7016, "step": 11135 }, { "epoch": 2.85, "learning_rate": 1.786829262658285e-06, "loss": 0.7563, "step": 11140 }, { "epoch": 2.85, "learning_rate": 1.7616128165638179e-06, "loss": 0.7167, "step": 11145 }, { "epoch": 2.85, "learning_rate": 1.7365739882784116e-06, "loss": 0.7622, "step": 11150 }, { "epoch": 2.86, "learning_rate": 1.7117128230729862e-06, "loss": 0.752, "step": 11155 }, { "epoch": 2.86, "learning_rate": 1.6870293658972525e-06, "loss": 0.7209, "step": 11160 }, { "epoch": 2.86, "learning_rate": 1.662523661379578e-06, "loss": 0.705, "step": 11165 }, { "epoch": 2.86, "learning_rate": 1.63819575382701e-06, "loss": 0.7215, "step": 11170 }, { "epoch": 2.86, "learning_rate": 1.6140456872250742e-06, "loss": 0.747, "step": 11175 }, { "epoch": 2.86, "learning_rate": 1.5900735052377992e-06, "loss": 0.7034, "step": 11180 }, { "epoch": 2.86, "learning_rate": 1.5662792512075586e-06, "loss": 0.7534, "step": 11185 }, { "epoch": 2.86, "learning_rate": 1.5426629681550509e-06, "loss": 0.7393, "step": 11190 }, { "epoch": 2.87, "learning_rate": 1.5192246987791981e-06, "loss": 0.6835, "step": 11195 }, { "epoch": 2.87, "learning_rate": 1.4959644854570465e-06, "loss": 0.7143, "step": 11200 }, { "epoch": 2.87, "learning_rate": 1.4728823702437334e-06, "loss": 0.7465, "step": 11205 }, { "epoch": 2.87, "learning_rate": 1.4499783948723756e-06, "loss": 0.6884, "step": 11210 }, { "epoch": 2.87, "learning_rate": 1.4272526007540144e-06, "loss": 0.7223, "step": 11215 }, { "epoch": 2.87, "learning_rate": 1.4047050289775598e-06, "loss": 0.7425, "step": 11220 }, { "epoch": 2.87, "learning_rate": 1.3823357203096243e-06, "loss": 0.7353, "step": 11225 }, { "epoch": 2.87, "learning_rate": 1.3601447151945889e-06, "loss": 0.7128, "step": 11230 }, { "epoch": 2.88, "learning_rate": 1.338132053754404e-06, "loss": 0.7654, "step": 11235 }, { "epoch": 2.88, "learning_rate": 1.3162977757886108e-06, "loss": 0.7225, "step": 11240 }, { "epoch": 2.88, "learning_rate": 1.2946419207741867e-06, "loss": 0.7191, "step": 11245 }, { "epoch": 2.88, "learning_rate": 1.2774457276102381e-06, "loss": 0.7095, "step": 11250 }, { "epoch": 2.88, "learning_rate": 1.2561111323605712e-06, "loss": 0.7442, "step": 11255 }, { "epoch": 2.88, "learning_rate": 1.2391719971949435e-06, "loss": 0.7712, "step": 11260 }, { "epoch": 2.88, "learning_rate": 1.218158786688306e-06, "loss": 0.7304, "step": 11265 }, { "epoch": 2.88, "learning_rate": 1.1973241765711352e-06, "loss": 0.7266, "step": 11270 }, { "epoch": 2.89, "learning_rate": 1.1766682045130207e-06, "loss": 0.788, "step": 11275 }, { "epoch": 2.89, "learning_rate": 1.1561909078605215e-06, "loss": 0.7357, "step": 11280 }, { "epoch": 2.89, "learning_rate": 1.1358923236371888e-06, "loss": 0.7393, "step": 11285 }, { "epoch": 2.89, "learning_rate": 1.115772488543454e-06, "loss": 0.7702, "step": 11290 }, { "epoch": 2.89, "learning_rate": 1.0958314389565294e-06, "loss": 0.7311, "step": 11295 }, { "epoch": 2.89, "learning_rate": 1.0760692109304304e-06, "loss": 0.7572, "step": 11300 }, { "epoch": 2.89, "learning_rate": 1.0564858401957979e-06, "loss": 0.7633, "step": 11305 }, { "epoch": 2.9, "learning_rate": 1.0370813621599528e-06, "loss": 0.7534, "step": 11310 }, { "epoch": 2.9, "learning_rate": 1.0178558119067315e-06, "loss": 0.7509, "step": 11315 }, { "epoch": 2.9, "learning_rate": 9.98809224196473e-07, "loss": 0.7378, "step": 11320 }, { "epoch": 2.9, "learning_rate": 9.79941633465964e-07, "loss": 0.7193, "step": 11325 }, { "epoch": 2.9, "learning_rate": 9.612530738283387e-07, "loss": 0.7503, "step": 11330 }, { "epoch": 2.9, "learning_rate": 9.427435790730576e-07, "loss": 0.7165, "step": 11335 }, { "epoch": 2.9, "learning_rate": 9.244131826658065e-07, "loss": 0.7455, "step": 11340 }, { "epoch": 2.9, "learning_rate": 9.062619177484632e-07, "loss": 0.7504, "step": 11345 }, { "epoch": 2.91, "learning_rate": 8.88289817139043e-07, "loss": 0.7197, "step": 11350 }, { "epoch": 2.91, "learning_rate": 8.704969133316087e-07, "loss": 0.7668, "step": 11355 }, { "epoch": 2.91, "learning_rate": 8.52883238496227e-07, "loss": 0.7303, "step": 11360 }, { "epoch": 2.91, "learning_rate": 8.354488244789572e-07, "loss": 0.721, "step": 11365 }, { "epoch": 2.91, "learning_rate": 8.181937028016951e-07, "loss": 0.7242, "step": 11370 }, { "epoch": 2.91, "learning_rate": 8.04518716920466e-07, "loss": 0.7498, "step": 11375 }, { "epoch": 2.91, "learning_rate": 7.909585139335041e-07, "loss": 0.7349, "step": 11380 }, { "epoch": 2.91, "learning_rate": 7.741696975364643e-07, "loss": 0.7437, "step": 11385 }, { "epoch": 2.92, "learning_rate": 7.575602842738616e-07, "loss": 0.7473, "step": 11390 }, { "epoch": 2.92, "learning_rate": 7.411303041759632e-07, "loss": 0.7231, "step": 11395 }, { "epoch": 2.92, "learning_rate": 7.248797869486623e-07, "loss": 0.7626, "step": 11400 }, { "epoch": 2.92, "learning_rate": 7.088087619733453e-07, "loss": 0.7439, "step": 11405 }, { "epoch": 2.92, "learning_rate": 6.92917258306891e-07, "loss": 0.7556, "step": 11410 }, { "epoch": 2.92, "learning_rate": 6.77205304681594e-07, "loss": 0.7185, "step": 11415 }, { "epoch": 2.92, "learning_rate": 6.616729295051083e-07, "loss": 0.7589, "step": 11420 }, { "epoch": 2.92, "learning_rate": 6.463201608604031e-07, "loss": 0.7593, "step": 11425 }, { "epoch": 2.93, "learning_rate": 6.311470265057517e-07, "loss": 0.7631, "step": 11430 }, { "epoch": 2.93, "learning_rate": 6.161535538745878e-07, "loss": 0.7323, "step": 11435 }, { "epoch": 2.93, "learning_rate": 6.013397700755375e-07, "loss": 0.7328, "step": 11440 }, { "epoch": 2.93, "learning_rate": 5.867057018923761e-07, "loss": 0.7285, "step": 11445 }, { "epoch": 2.93, "learning_rate": 5.722513757838722e-07, "loss": 0.7268, "step": 11450 }, { "epoch": 2.93, "learning_rate": 5.579768178838984e-07, "loss": 0.7321, "step": 11455 }, { "epoch": 2.93, "learning_rate": 5.438820540012435e-07, "loss": 0.7657, "step": 11460 }, { "epoch": 2.93, "learning_rate": 5.299671096196334e-07, "loss": 0.7271, "step": 11465 }, { "epoch": 2.94, "learning_rate": 5.162320098977103e-07, "loss": 0.7236, "step": 11470 }, { "epoch": 2.94, "learning_rate": 5.026767796689091e-07, "loss": 0.7433, "step": 11475 }, { "epoch": 2.94, "learning_rate": 4.89301443441481e-07, "loss": 0.7542, "step": 11480 }, { "epoch": 2.94, "learning_rate": 4.761060253984151e-07, "loss": 0.7277, "step": 11485 }, { "epoch": 2.94, "learning_rate": 4.630905493973936e-07, "loss": 0.7282, "step": 11490 }, { "epoch": 2.94, "learning_rate": 4.502550389707927e-07, "loss": 0.736, "step": 11495 }, { "epoch": 2.94, "learning_rate": 4.375995173255598e-07, "loss": 0.732, "step": 11500 }, { "epoch": 2.95, "learning_rate": 4.251240073432361e-07, "loss": 0.7365, "step": 11505 }, { "epoch": 2.95, "learning_rate": 4.1282853157992295e-07, "loss": 0.7325, "step": 11510 }, { "epoch": 2.95, "learning_rate": 4.0071311226617116e-07, "loss": 0.768, "step": 11515 }, { "epoch": 2.95, "learning_rate": 3.88777771307014e-07, "loss": 0.7324, "step": 11520 }, { "epoch": 2.95, "learning_rate": 3.793591694673082e-07, "loss": 0.7499, "step": 11525 }, { "epoch": 2.95, "learning_rate": 3.677480237077813e-07, "loss": 0.7301, "step": 11530 }, { "epoch": 2.95, "learning_rate": 3.5631701590467246e-07, "loss": 0.7656, "step": 11535 }, { "epoch": 2.95, "learning_rate": 3.450661667255495e-07, "loss": 0.7299, "step": 11540 }, { "epoch": 2.96, "learning_rate": 3.3399549651229603e-07, "loss": 0.7466, "step": 11545 }, { "epoch": 2.96, "learning_rate": 3.231050252809897e-07, "loss": 0.7071, "step": 11550 }, { "epoch": 2.96, "learning_rate": 3.145224048057727e-07, "loss": 0.7183, "step": 11555 }, { "epoch": 2.96, "learning_rate": 3.0395634114250836e-07, "loss": 0.7307, "step": 11560 }, { "epoch": 2.96, "learning_rate": 2.9357053077285755e-07, "loss": 0.764, "step": 11565 }, { "epoch": 2.96, "learning_rate": 2.8336499247463287e-07, "loss": 0.7169, "step": 11570 }, { "epoch": 2.96, "learning_rate": 2.733397446997632e-07, "loss": 0.6721, "step": 11575 }, { "epoch": 2.96, "learning_rate": 2.634948055741715e-07, "loss": 0.7416, "step": 11580 }, { "epoch": 2.97, "learning_rate": 2.53830192897786e-07, "loss": 0.704, "step": 11585 }, { "epoch": 2.97, "learning_rate": 2.4434592414450676e-07, "loss": 0.711, "step": 11590 }, { "epoch": 2.97, "learning_rate": 2.3504201646215028e-07, "loss": 0.7354, "step": 11595 }, { "epoch": 2.97, "learning_rate": 2.2591848667247173e-07, "loss": 0.7408, "step": 11600 }, { "epoch": 2.97, "learning_rate": 2.1697535127105372e-07, "loss": 0.7419, "step": 11605 }, { "epoch": 2.97, "learning_rate": 2.0821262642733985e-07, "loss": 0.7498, "step": 11610 }, { "epoch": 2.97, "learning_rate": 1.9963032798460125e-07, "loss": 0.7346, "step": 11615 }, { "epoch": 2.97, "learning_rate": 1.912284714598811e-07, "loss": 0.7794, "step": 11620 }, { "epoch": 2.98, "learning_rate": 1.8300707204395028e-07, "loss": 0.7755, "step": 11625 }, { "epoch": 2.98, "learning_rate": 1.7496614460135176e-07, "loss": 0.7596, "step": 11630 }, { "epoch": 2.98, "learning_rate": 1.6710570367031164e-07, "loss": 0.7305, "step": 11635 }, { "epoch": 2.98, "learning_rate": 1.594257634627283e-07, "loss": 0.7371, "step": 11640 }, { "epoch": 2.98, "learning_rate": 1.5192633786415e-07, "loss": 0.7111, "step": 11645 }, { "epoch": 2.98, "learning_rate": 1.4460744043376385e-07, "loss": 0.7592, "step": 11650 }, { "epoch": 2.98, "learning_rate": 1.3746908440432915e-07, "loss": 0.7156, "step": 11655 }, { "epoch": 2.98, "learning_rate": 1.318883980735097e-07, "loss": 0.7421, "step": 11660 }, { "epoch": 2.99, "learning_rate": 1.2507504887044884e-07, "loss": 0.7729, "step": 11665 }, { "epoch": 2.99, "learning_rate": 1.197543841632398e-07, "loss": 0.7528, "step": 11670 }, { "epoch": 2.99, "learning_rate": 1.1326608169920372e-07, "loss": 0.7502, "step": 11675 }, { "epoch": 2.99, "learning_rate": 1.0695837730220203e-07, "loss": 0.7245, "step": 11680 }, { "epoch": 2.99, "learning_rate": 1.0083128237673434e-07, "loss": 0.7325, "step": 11685 }, { "epoch": 2.99, "learning_rate": 9.488480800077249e-08, "loss": 0.7045, "step": 11690 }, { "epoch": 2.99, "learning_rate": 8.911896492571625e-08, "loss": 0.7447, "step": 11695 }, { "epoch": 3.0, "learning_rate": 8.353376357636001e-08, "loss": 0.7317, "step": 11700 }, { "epoch": 3.0, "learning_rate": 7.8129214050926e-08, "loss": 0.7322, "step": 11705 }, { "epoch": 3.0, "learning_rate": 7.290532612096445e-08, "loss": 0.7227, "step": 11710 }, { "epoch": 3.0, "learning_rate": 6.786210923145353e-08, "loss": 0.7408, "step": 11715 }, { "epoch": 3.0, "step": 11718, "total_flos": 995899367424000.0, "train_loss": 1.3950741625133278, "train_runtime": 240198.6559, "train_samples_per_second": 6.245, "train_steps_per_second": 0.049 } ], "max_steps": 11718, "num_train_epochs": 3, "total_flos": 995899367424000.0, "trial_name": null, "trial_params": null }