{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9998415967052114, "global_step": 2367, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.00010416666666666667, "loss": 5.6655, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.00020833333333333335, "loss": 4.5516, "step": 10 }, { "epoch": 0.02, "learning_rate": 0.0003125, "loss": 3.7337, "step": 15 }, { "epoch": 0.03, "learning_rate": 0.0004166666666666667, "loss": 3.2268, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.0005208333333333334, "loss": 2.9287, "step": 25 }, { "epoch": 0.04, "learning_rate": 0.000625, "loss": 2.8609, "step": 30 }, { "epoch": 0.04, "learning_rate": 0.0007291666666666666, "loss": 2.7533, "step": 35 }, { "epoch": 0.05, "learning_rate": 0.0008333333333333334, "loss": 2.7291, "step": 40 }, { "epoch": 0.06, "learning_rate": 0.0009375, "loss": 2.5916, "step": 45 }, { "epoch": 0.06, "learning_rate": 0.0009999981647383803, "loss": 2.605, "step": 50 }, { "epoch": 0.07, "learning_rate": 0.0009999775181998842, "loss": 2.6466, "step": 55 }, { "epoch": 0.08, "learning_rate": 0.0009999339319963169, "loss": 2.5633, "step": 60 }, { "epoch": 0.08, "learning_rate": 0.000999867408127474, "loss": 2.628, "step": 65 }, { "epoch": 0.09, "learning_rate": 0.0009997779496455639, "loss": 2.5335, "step": 70 }, { "epoch": 0.1, "learning_rate": 0.0009996655606550656, "loss": 2.5589, "step": 75 }, { "epoch": 0.1, "learning_rate": 0.000999530246312543, "loss": 2.5707, "step": 80 }, { "epoch": 0.11, "learning_rate": 0.0009993720128264065, "loss": 2.428, "step": 85 }, { "epoch": 0.11, "learning_rate": 0.000999190867456629, "loss": 2.4371, "step": 90 }, { "epoch": 0.12, "learning_rate": 0.0009989868185144122, "loss": 2.4955, "step": 95 }, { "epoch": 0.13, "learning_rate": 0.0009987598753618061, "loss": 2.4295, "step": 100 }, { "epoch": 0.13, "learning_rate": 0.0009985100484112787, "loss": 2.3623, "step": 105 }, { "epoch": 0.14, "learning_rate": 0.0009982373491252382, "loss": 2.3422, "step": 110 }, { "epoch": 0.15, "learning_rate": 0.0009979417900155081, "loss": 2.3134, "step": 115 }, { "epoch": 0.15, "learning_rate": 0.0009976233846427521, "loss": 2.4091, "step": 120 }, { "epoch": 0.16, "learning_rate": 0.000997282147615852, "loss": 2.4004, "step": 125 }, { "epoch": 0.16, "learning_rate": 0.0009969180945912381, "loss": 2.3233, "step": 130 }, { "epoch": 0.17, "learning_rate": 0.0009965312422721704, "loss": 2.3183, "step": 135 }, { "epoch": 0.18, "learning_rate": 0.0009961216084079724, "loss": 2.3771, "step": 140 }, { "epoch": 0.18, "learning_rate": 0.0009956892117932161, "loss": 2.2779, "step": 145 }, { "epoch": 0.19, "learning_rate": 0.000995234072266861, "loss": 2.303, "step": 150 }, { "epoch": 0.2, "learning_rate": 0.0009947562107113423, "loss": 2.3452, "step": 155 }, { "epoch": 0.2, "learning_rate": 0.000994255649051614, "loss": 2.2872, "step": 160 }, { "epoch": 0.21, "learning_rate": 0.0009937324102541423, "loss": 2.3187, "step": 165 }, { "epoch": 0.22, "learning_rate": 0.0009931865183258524, "loss": 2.2837, "step": 170 }, { "epoch": 0.22, "learning_rate": 0.0009926179983130263, "loss": 2.4024, "step": 175 }, { "epoch": 0.23, "learning_rate": 0.0009920268763001542, "loss": 2.347, "step": 180 }, { "epoch": 0.23, "learning_rate": 0.0009914131794087377, "loss": 2.3126, "step": 185 }, { "epoch": 0.24, "learning_rate": 0.0009907769357960449, "loss": 2.2042, "step": 190 }, { "epoch": 0.25, "learning_rate": 0.0009901181746538195, "loss": 2.1674, "step": 195 }, { "epoch": 0.25, "learning_rate": 0.0009894369262069401, "loss": 2.1963, "step": 200 }, { "epoch": 0.26, "learning_rate": 0.0009887332217120347, "loss": 2.2971, "step": 205 }, { "epoch": 0.27, "learning_rate": 0.0009880070934560458, "loss": 2.1949, "step": 210 }, { "epoch": 0.27, "learning_rate": 0.0009872585747547494, "loss": 2.1544, "step": 215 }, { "epoch": 0.28, "learning_rate": 0.0009864876999512264, "loss": 2.1866, "step": 220 }, { "epoch": 0.29, "learning_rate": 0.0009856945044142864, "loss": 2.198, "step": 225 }, { "epoch": 0.29, "learning_rate": 0.000984879024536846, "loss": 2.2507, "step": 230 }, { "epoch": 0.3, "learning_rate": 0.0009840412977342582, "loss": 2.1326, "step": 235 }, { "epoch": 0.3, "learning_rate": 0.000983181362442595, "loss": 2.204, "step": 240 }, { "epoch": 0.31, "learning_rate": 0.0009822992581168867, "loss": 2.2197, "step": 245 }, { "epoch": 0.32, "learning_rate": 0.0009813950252293075, "loss": 2.1562, "step": 250 }, { "epoch": 0.32, "learning_rate": 0.0009804687052673228, "loss": 2.2193, "step": 255 }, { "epoch": 0.33, "learning_rate": 0.0009795203407317828, "loss": 2.2668, "step": 260 }, { "epoch": 0.34, "learning_rate": 0.0009785499751349732, "loss": 2.2434, "step": 265 }, { "epoch": 0.34, "learning_rate": 0.0009775576529986198, "loss": 2.1776, "step": 270 }, { "epoch": 0.35, "learning_rate": 0.0009765434198518448, "loss": 2.2592, "step": 275 }, { "epoch": 0.35, "learning_rate": 0.0009755073222290775, "loss": 2.1072, "step": 280 }, { "epoch": 0.36, "learning_rate": 0.0009744494076679204, "loss": 2.0784, "step": 285 }, { "epoch": 0.37, "learning_rate": 0.0009733697247069677, "loss": 2.1776, "step": 290 }, { "epoch": 0.37, "learning_rate": 0.0009722683228835774, "loss": 2.1752, "step": 295 }, { "epoch": 0.38, "learning_rate": 0.0009711452527315997, "loss": 2.0958, "step": 300 }, { "epoch": 0.39, "learning_rate": 0.0009700005657790578, "loss": 2.1055, "step": 305 }, { "epoch": 0.39, "learning_rate": 0.0009688343145457837, "loss": 2.1135, "step": 310 }, { "epoch": 0.4, "learning_rate": 0.0009676465525410088, "loss": 2.0275, "step": 315 }, { "epoch": 0.41, "learning_rate": 0.0009664373342609086, "loss": 2.0031, "step": 320 }, { "epoch": 0.41, "learning_rate": 0.0009652067151861023, "loss": 2.1489, "step": 325 }, { "epoch": 0.42, "learning_rate": 0.0009639547517791077, "loss": 2.1305, "step": 330 }, { "epoch": 0.42, "learning_rate": 0.0009626815014817496, "loss": 2.0894, "step": 335 }, { "epoch": 0.43, "learning_rate": 0.0009613870227125259, "loss": 2.0822, "step": 340 }, { "epoch": 0.44, "learning_rate": 0.0009600713748639258, "loss": 2.1499, "step": 345 }, { "epoch": 0.44, "learning_rate": 0.0009587346182997054, "loss": 2.0848, "step": 350 }, { "epoch": 0.45, "learning_rate": 0.0009573768143521182, "loss": 2.0754, "step": 355 }, { "epoch": 0.46, "learning_rate": 0.000955998025319101, "loss": 2.1364, "step": 360 }, { "epoch": 0.46, "learning_rate": 0.0009545983144614152, "loss": 2.066, "step": 365 }, { "epoch": 0.47, "learning_rate": 0.0009531777459997453, "loss": 2.1016, "step": 370 }, { "epoch": 0.48, "learning_rate": 0.0009517363851117511, "loss": 2.0479, "step": 375 }, { "epoch": 0.48, "learning_rate": 0.0009502742979290782, "loss": 2.0646, "step": 380 }, { "epoch": 0.49, "learning_rate": 0.0009487915515343235, "loss": 2.1756, "step": 385 }, { "epoch": 0.49, "learning_rate": 0.0009472882139579572, "loss": 2.0339, "step": 390 }, { "epoch": 0.5, "learning_rate": 0.0009457643541752017, "loss": 2.1112, "step": 395 }, { "epoch": 0.51, "learning_rate": 0.0009442200421028666, "loss": 2.1327, "step": 400 }, { "epoch": 0.51, "learning_rate": 0.0009426553485961416, "loss": 2.0274, "step": 405 }, { "epoch": 0.52, "learning_rate": 0.0009410703454453442, "loss": 1.9823, "step": 410 }, { "epoch": 0.53, "learning_rate": 0.0009394651053726273, "loss": 1.9665, "step": 415 }, { "epoch": 0.53, "learning_rate": 0.0009378397020286417, "loss": 2.0468, "step": 420 }, { "epoch": 0.54, "learning_rate": 0.0009361942099891571, "loss": 1.9778, "step": 425 }, { "epoch": 0.54, "learning_rate": 0.0009345287047516411, "loss": 2.0264, "step": 430 }, { "epoch": 0.55, "learning_rate": 0.0009328432627317937, "loss": 2.1288, "step": 435 }, { "epoch": 0.56, "learning_rate": 0.0009311379612600432, "loss": 1.9488, "step": 440 }, { "epoch": 0.56, "learning_rate": 0.0009294128785779969, "loss": 1.9984, "step": 445 }, { "epoch": 0.57, "learning_rate": 0.0009276680938348512, "loss": 2.0063, "step": 450 }, { "epoch": 0.58, "learning_rate": 0.000925903687083761, "loss": 2.052, "step": 455 }, { "epoch": 0.58, "learning_rate": 0.0009241197392781662, "loss": 2.0739, "step": 460 }, { "epoch": 0.59, "learning_rate": 0.000922316332268077, "loss": 1.9291, "step": 465 }, { "epoch": 0.6, "learning_rate": 0.00092049354879632, "loss": 2.0575, "step": 470 }, { "epoch": 0.6, "learning_rate": 0.0009186514724947399, "loss": 1.9736, "step": 475 }, { "epoch": 0.61, "learning_rate": 0.0009167901878803637, "loss": 2.019, "step": 480 }, { "epoch": 0.61, "learning_rate": 0.0009149097803515225, "loss": 1.8767, "step": 485 }, { "epoch": 0.62, "learning_rate": 0.0009130103361839337, "loss": 2.144, "step": 490 }, { "epoch": 0.63, "learning_rate": 0.0009110919425267414, "loss": 1.9369, "step": 495 }, { "epoch": 0.63, "learning_rate": 0.0009091546873985196, "loss": 2.0787, "step": 500 }, { "epoch": 0.64, "learning_rate": 0.0009071986596832321, "loss": 1.8462, "step": 505 }, { "epoch": 0.65, "learning_rate": 0.0009052239491261558, "loss": 1.9782, "step": 510 }, { "epoch": 0.65, "learning_rate": 0.0009032306463297619, "loss": 1.912, "step": 515 }, { "epoch": 0.66, "learning_rate": 0.0009012188427495597, "loss": 2.0624, "step": 520 }, { "epoch": 0.67, "learning_rate": 0.0008991886306899002, "loss": 1.9532, "step": 525 }, { "epoch": 0.67, "learning_rate": 0.0008971401032997409, "loss": 2.0872, "step": 530 }, { "epoch": 0.68, "learning_rate": 0.0008950733545683725, "loss": 2.0329, "step": 535 }, { "epoch": 0.68, "learning_rate": 0.0008929884793211061, "loss": 1.9024, "step": 540 }, { "epoch": 0.69, "learning_rate": 0.0008908855732149224, "loss": 1.9682, "step": 545 }, { "epoch": 0.7, "learning_rate": 0.0008887647327340832, "loss": 1.9982, "step": 550 }, { "epoch": 0.7, "learning_rate": 0.0008866260551857045, "loss": 1.9342, "step": 555 }, { "epoch": 0.71, "learning_rate": 0.0008844696386952916, "loss": 1.8963, "step": 560 }, { "epoch": 0.72, "learning_rate": 0.0008822955822022372, "loss": 1.9586, "step": 565 }, { "epoch": 0.72, "learning_rate": 0.0008801039854552822, "loss": 1.8984, "step": 570 }, { "epoch": 0.73, "learning_rate": 0.0008778949490079383, "loss": 1.8707, "step": 575 }, { "epoch": 0.73, "learning_rate": 0.0008756685742138751, "loss": 1.9667, "step": 580 }, { "epoch": 0.74, "learning_rate": 0.0008734249632222702, "loss": 1.931, "step": 585 }, { "epoch": 0.75, "learning_rate": 0.0008711642189731211, "loss": 1.9109, "step": 590 }, { "epoch": 0.75, "learning_rate": 0.0008688864451925238, "loss": 1.8908, "step": 595 }, { "epoch": 0.76, "learning_rate": 0.0008665917463879124, "loss": 2.0963, "step": 600 }, { "epoch": 0.77, "learning_rate": 0.0008642802278432649, "loss": 1.9331, "step": 605 }, { "epoch": 0.77, "learning_rate": 0.0008619519956142725, "loss": 2.0053, "step": 610 }, { "epoch": 0.78, "learning_rate": 0.0008596071565234733, "loss": 1.9626, "step": 615 }, { "epoch": 0.79, "learning_rate": 0.0008572458181553517, "loss": 1.9638, "step": 620 }, { "epoch": 0.79, "learning_rate": 0.0008548680888514013, "loss": 1.8846, "step": 625 }, { "epoch": 0.8, "learning_rate": 0.0008524740777051554, "loss": 2.0264, "step": 630 }, { "epoch": 0.8, "learning_rate": 0.0008500638945571804, "loss": 1.9709, "step": 635 }, { "epoch": 0.81, "learning_rate": 0.0008476376499900369, "loss": 1.8799, "step": 640 }, { "epoch": 0.82, "learning_rate": 0.0008451954553232055, "loss": 1.9257, "step": 645 }, { "epoch": 0.82, "learning_rate": 0.0008427374226079803, "loss": 1.9897, "step": 650 }, { "epoch": 0.83, "learning_rate": 0.0008402636646223262, "loss": 1.9573, "step": 655 }, { "epoch": 0.84, "learning_rate": 0.000837774294865706, "loss": 1.9529, "step": 660 }, { "epoch": 0.84, "learning_rate": 0.0008352694275538723, "loss": 2.0161, "step": 665 }, { "epoch": 0.85, "learning_rate": 0.0008327491776136267, "loss": 1.942, "step": 670 }, { "epoch": 0.86, "learning_rate": 0.0008302136606775479, "loss": 1.8917, "step": 675 }, { "epoch": 0.86, "learning_rate": 0.000827662993078685, "loss": 1.9157, "step": 680 }, { "epoch": 0.87, "learning_rate": 0.0008250972918452207, "loss": 1.9969, "step": 685 }, { "epoch": 0.87, "learning_rate": 0.0008225166746951022, "loss": 1.8957, "step": 690 }, { "epoch": 0.88, "learning_rate": 0.0008199212600306396, "loss": 1.8494, "step": 695 }, { "epoch": 0.89, "learning_rate": 0.0008173111669330733, "loss": 1.8495, "step": 700 }, { "epoch": 0.89, "learning_rate": 0.0008146865151571108, "loss": 1.8407, "step": 705 }, { "epoch": 0.9, "learning_rate": 0.0008120474251254319, "loss": 1.7461, "step": 710 }, { "epoch": 0.91, "learning_rate": 0.0008093940179231643, "loss": 1.8608, "step": 715 }, { "epoch": 0.91, "learning_rate": 0.0008067264152923268, "loss": 1.9029, "step": 720 }, { "epoch": 0.92, "learning_rate": 0.0008040447396262442, "loss": 1.8156, "step": 725 }, { "epoch": 0.93, "learning_rate": 0.0008013491139639324, "loss": 1.8412, "step": 730 }, { "epoch": 0.93, "learning_rate": 0.0007986396619844518, "loss": 1.8522, "step": 735 }, { "epoch": 0.94, "learning_rate": 0.0007959165080012341, "loss": 1.9613, "step": 740 }, { "epoch": 0.94, "learning_rate": 0.0007931797769563777, "loss": 1.917, "step": 745 }, { "epoch": 0.95, "learning_rate": 0.0007904295944149156, "loss": 1.8676, "step": 750 }, { "epoch": 0.96, "learning_rate": 0.0007876660865590539, "loss": 1.839, "step": 755 }, { "epoch": 0.96, "learning_rate": 0.0007848893801823833, "loss": 1.8689, "step": 760 }, { "epoch": 0.97, "learning_rate": 0.0007820996026840606, "loss": 1.9667, "step": 765 }, { "epoch": 0.98, "learning_rate": 0.000779296882062964, "loss": 1.8947, "step": 770 }, { "epoch": 0.98, "learning_rate": 0.0007764813469118201, "loss": 1.8329, "step": 775 }, { "epoch": 0.99, "learning_rate": 0.0007736531264113041, "loss": 1.7263, "step": 780 }, { "epoch": 0.99, "learning_rate": 0.0007708123503241124, "loss": 1.8701, "step": 785 }, { "epoch": 1.0, "eval_loss": 1.8378170728683472, "eval_runtime": 367.2627, "eval_samples_per_second": 34.874, "eval_steps_per_second": 2.181, "step": 789 }, { "epoch": 1.0, "learning_rate": 0.0007679591489890098, "loss": 1.8124, "step": 790 }, { "epoch": 1.01, "learning_rate": 0.0007650936533148485, "loss": 1.7062, "step": 795 }, { "epoch": 1.01, "learning_rate": 0.0007622159947745617, "loss": 1.6682, "step": 800 }, { "epoch": 1.02, "learning_rate": 0.0007593263053991325, "loss": 1.6572, "step": 805 }, { "epoch": 1.03, "learning_rate": 0.0007564247177715349, "loss": 1.6605, "step": 810 }, { "epoch": 1.03, "learning_rate": 0.0007535113650206517, "loss": 1.7364, "step": 815 }, { "epoch": 1.04, "learning_rate": 0.0007505863808151661, "loss": 1.628, "step": 820 }, { "epoch": 1.05, "learning_rate": 0.0007476498993574277, "loss": 1.5975, "step": 825 }, { "epoch": 1.05, "learning_rate": 0.0007447020553772973, "loss": 1.6411, "step": 830 }, { "epoch": 1.06, "learning_rate": 0.0007417429841259631, "loss": 1.6513, "step": 835 }, { "epoch": 1.06, "learning_rate": 0.0007387728213697366, "loss": 1.6971, "step": 840 }, { "epoch": 1.07, "learning_rate": 0.0007357917033838228, "loss": 1.6577, "step": 845 }, { "epoch": 1.08, "learning_rate": 0.0007327997669460682, "loss": 1.6068, "step": 850 }, { "epoch": 1.08, "learning_rate": 0.0007297971493306848, "loss": 1.5742, "step": 855 }, { "epoch": 1.09, "learning_rate": 0.0007267839883019515, "loss": 1.6666, "step": 860 }, { "epoch": 1.1, "learning_rate": 0.0007237604221078942, "loss": 1.7017, "step": 865 }, { "epoch": 1.1, "learning_rate": 0.000720726589473942, "loss": 1.6573, "step": 870 }, { "epoch": 1.11, "learning_rate": 0.000717682629596563, "loss": 1.4819, "step": 875 }, { "epoch": 1.12, "learning_rate": 0.0007146286821368767, "loss": 1.6235, "step": 880 }, { "epoch": 1.12, "learning_rate": 0.0007115648872142474, "loss": 1.6736, "step": 885 }, { "epoch": 1.13, "learning_rate": 0.0007084913853998542, "loss": 1.6477, "step": 890 }, { "epoch": 1.13, "learning_rate": 0.0007054083177102423, "loss": 1.6538, "step": 895 }, { "epoch": 1.14, "learning_rate": 0.0007023158256008521, "loss": 1.5688, "step": 900 }, { "epoch": 1.15, "learning_rate": 0.0006992140509595303, "loss": 1.5525, "step": 905 }, { "epoch": 1.15, "learning_rate": 0.0006961031361000184, "loss": 1.7425, "step": 910 }, { "epoch": 1.16, "learning_rate": 0.0006929832237554241, "loss": 1.559, "step": 915 }, { "epoch": 1.17, "learning_rate": 0.0006898544570716722, "loss": 1.5379, "step": 920 }, { "epoch": 1.17, "learning_rate": 0.0006867169796009369, "loss": 1.586, "step": 925 }, { "epoch": 1.18, "learning_rate": 0.0006835709352950558, "loss": 1.6424, "step": 930 }, { "epoch": 1.19, "learning_rate": 0.000680416468498924, "loss": 1.6002, "step": 935 }, { "epoch": 1.19, "learning_rate": 0.0006772537239438732, "loss": 1.5361, "step": 940 }, { "epoch": 1.2, "learning_rate": 0.0006740828467410294, "loss": 1.5436, "step": 945 }, { "epoch": 1.2, "learning_rate": 0.0006709039823746564, "loss": 1.5845, "step": 950 }, { "epoch": 1.21, "learning_rate": 0.0006677172766954794, "loss": 1.5275, "step": 955 }, { "epoch": 1.22, "learning_rate": 0.0006645228759139949, "loss": 1.5879, "step": 960 }, { "epoch": 1.22, "learning_rate": 0.0006613209265937611, "loss": 1.5809, "step": 965 }, { "epoch": 1.23, "learning_rate": 0.0006581115756446733, "loss": 1.6111, "step": 970 }, { "epoch": 1.24, "learning_rate": 0.000654894970316224, "loss": 1.5794, "step": 975 }, { "epoch": 1.24, "learning_rate": 0.0006516712581907471, "loss": 1.5756, "step": 980 }, { "epoch": 1.25, "learning_rate": 0.000648440587176646, "loss": 1.5913, "step": 985 }, { "epoch": 1.25, "learning_rate": 0.0006452031055016072, "loss": 1.615, "step": 990 }, { "epoch": 1.26, "learning_rate": 0.0006419589617058008, "loss": 1.6578, "step": 995 }, { "epoch": 1.27, "learning_rate": 0.000638708304635063, "loss": 1.6285, "step": 1000 }, { "epoch": 1.27, "learning_rate": 0.0006354512834340694, "loss": 1.5911, "step": 1005 }, { "epoch": 1.28, "learning_rate": 0.0006321880475394899, "loss": 1.6383, "step": 1010 }, { "epoch": 1.29, "learning_rate": 0.0006289187466731334, "loss": 1.5337, "step": 1015 }, { "epoch": 1.29, "learning_rate": 0.0006256435308350785, "loss": 1.6334, "step": 1020 }, { "epoch": 1.3, "learning_rate": 0.0006223625502967903, "loss": 1.7418, "step": 1025 }, { "epoch": 1.31, "learning_rate": 0.000619075955594227, "loss": 1.5987, "step": 1030 }, { "epoch": 1.31, "learning_rate": 0.0006157838975209322, "loss": 1.6512, "step": 1035 }, { "epoch": 1.32, "learning_rate": 0.0006124865271211165, "loss": 1.68, "step": 1040 }, { "epoch": 1.32, "learning_rate": 0.0006091839956827278, "loss": 1.5891, "step": 1045 }, { "epoch": 1.33, "learning_rate": 0.0006058764547305088, "loss": 1.6131, "step": 1050 }, { "epoch": 1.34, "learning_rate": 0.0006025640560190467, "loss": 1.6289, "step": 1055 }, { "epoch": 1.34, "learning_rate": 0.0005992469515258089, "loss": 1.5907, "step": 1060 }, { "epoch": 1.35, "learning_rate": 0.0005959252934441706, "loss": 1.6156, "step": 1065 }, { "epoch": 1.36, "learning_rate": 0.0005925992341764323, "loss": 1.5184, "step": 1070 }, { "epoch": 1.36, "learning_rate": 0.000589268926326827, "loss": 1.5729, "step": 1075 }, { "epoch": 1.37, "learning_rate": 0.0005859345226945177, "loss": 1.5053, "step": 1080 }, { "epoch": 1.38, "learning_rate": 0.000582596176266589, "loss": 1.6205, "step": 1085 }, { "epoch": 1.38, "learning_rate": 0.0005792540402110257, "loss": 1.5363, "step": 1090 }, { "epoch": 1.39, "learning_rate": 0.0005759082678696857, "loss": 1.6992, "step": 1095 }, { "epoch": 1.39, "learning_rate": 0.0005725590127512657, "loss": 1.5543, "step": 1100 }, { "epoch": 1.4, "learning_rate": 0.0005692064285242558, "loss": 1.6408, "step": 1105 }, { "epoch": 1.41, "learning_rate": 0.0005658506690098916, "loss": 1.6324, "step": 1110 }, { "epoch": 1.41, "learning_rate": 0.0005624918881750943, "loss": 1.5604, "step": 1115 }, { "epoch": 1.42, "learning_rate": 0.0005591302401254076, "loss": 1.5888, "step": 1120 }, { "epoch": 1.43, "learning_rate": 0.000555765879097928, "loss": 1.661, "step": 1125 }, { "epoch": 1.43, "learning_rate": 0.0005523989594542258, "loss": 1.6155, "step": 1130 }, { "epoch": 1.44, "learning_rate": 0.0005490296356732649, "loss": 1.5069, "step": 1135 }, { "epoch": 1.44, "learning_rate": 0.0005456580623443145, "loss": 1.4833, "step": 1140 }, { "epoch": 1.45, "learning_rate": 0.000542284394159856, "loss": 1.4966, "step": 1145 }, { "epoch": 1.46, "learning_rate": 0.0005389087859084854, "loss": 1.6364, "step": 1150 }, { "epoch": 1.46, "learning_rate": 0.0005355313924678123, "loss": 1.6311, "step": 1155 }, { "epoch": 1.47, "learning_rate": 0.0005321523687973526, "loss": 1.5597, "step": 1160 }, { "epoch": 1.48, "learning_rate": 0.00052877186993142, "loss": 1.6759, "step": 1165 }, { "epoch": 1.48, "learning_rate": 0.0005253900509720118, "loss": 1.5734, "step": 1170 }, { "epoch": 1.49, "learning_rate": 0.0005220070670816932, "loss": 1.6767, "step": 1175 }, { "epoch": 1.5, "learning_rate": 0.0005186230734764782, "loss": 1.603, "step": 1180 }, { "epoch": 1.5, "learning_rate": 0.0005152382254187076, "loss": 1.5121, "step": 1185 }, { "epoch": 1.51, "learning_rate": 0.0005118526782099258, "loss": 1.5978, "step": 1190 }, { "epoch": 1.51, "learning_rate": 0.0005084665871837553, "loss": 1.4273, "step": 1195 }, { "epoch": 1.52, "learning_rate": 0.0005050801076987699, "loss": 1.5823, "step": 1200 }, { "epoch": 1.53, "learning_rate": 0.0005016933951313659, "loss": 1.6582, "step": 1205 }, { "epoch": 1.53, "learning_rate": 0.0004983066048686342, "loss": 1.5263, "step": 1210 }, { "epoch": 1.54, "learning_rate": 0.0004949198923012302, "loss": 1.6287, "step": 1215 }, { "epoch": 1.55, "learning_rate": 0.0004915334128162448, "loss": 1.5244, "step": 1220 }, { "epoch": 1.55, "learning_rate": 0.00048814732179007443, "loss": 1.5054, "step": 1225 }, { "epoch": 1.56, "learning_rate": 0.0004847617745812925, "loss": 1.5065, "step": 1230 }, { "epoch": 1.57, "learning_rate": 0.00048137692652352196, "loss": 1.649, "step": 1235 }, { "epoch": 1.57, "learning_rate": 0.0004779929329183068, "loss": 1.5417, "step": 1240 }, { "epoch": 1.58, "learning_rate": 0.0004746099490279883, "loss": 1.4152, "step": 1245 }, { "epoch": 1.58, "learning_rate": 0.00047122813006858017, "loss": 1.5171, "step": 1250 }, { "epoch": 1.59, "learning_rate": 0.0004678476312026475, "loss": 1.5013, "step": 1255 }, { "epoch": 1.6, "learning_rate": 0.00046446860753218797, "loss": 1.5518, "step": 1260 }, { "epoch": 1.6, "learning_rate": 0.0004610912140915146, "loss": 1.5121, "step": 1265 }, { "epoch": 1.61, "learning_rate": 0.00045771560584014414, "loss": 1.4661, "step": 1270 }, { "epoch": 1.62, "learning_rate": 0.0004543419376556855, "loss": 1.5079, "step": 1275 }, { "epoch": 1.62, "learning_rate": 0.00045097036432673515, "loss": 1.5807, "step": 1280 }, { "epoch": 1.63, "learning_rate": 0.0004476010405457744, "loss": 1.5803, "step": 1285 }, { "epoch": 1.63, "learning_rate": 0.00044423412090207216, "loss": 1.5011, "step": 1290 }, { "epoch": 1.64, "learning_rate": 0.0004408697598745924, "loss": 1.5691, "step": 1295 }, { "epoch": 1.65, "learning_rate": 0.0004375081118249058, "loss": 1.5332, "step": 1300 }, { "epoch": 1.65, "learning_rate": 0.0004341493309901085, "loss": 1.5385, "step": 1305 }, { "epoch": 1.66, "learning_rate": 0.00043079357147574434, "loss": 1.6153, "step": 1310 }, { "epoch": 1.67, "learning_rate": 0.0004274409872487345, "loss": 1.6695, "step": 1315 }, { "epoch": 1.67, "learning_rate": 0.0004240917321303143, "loss": 1.5654, "step": 1320 }, { "epoch": 1.68, "learning_rate": 0.0004207459597889744, "loss": 1.5585, "step": 1325 }, { "epoch": 1.69, "learning_rate": 0.000417403823733411, "loss": 1.5072, "step": 1330 }, { "epoch": 1.69, "learning_rate": 0.0004140654773054824, "loss": 1.5723, "step": 1335 }, { "epoch": 1.7, "learning_rate": 0.00041073107367317315, "loss": 1.6242, "step": 1340 }, { "epoch": 1.7, "learning_rate": 0.0004074007658235678, "loss": 1.5305, "step": 1345 }, { "epoch": 1.71, "learning_rate": 0.00040407470655582934, "loss": 1.5084, "step": 1350 }, { "epoch": 1.72, "learning_rate": 0.00040075304847419114, "loss": 1.5424, "step": 1355 }, { "epoch": 1.72, "learning_rate": 0.0003974359439809534, "loss": 1.5136, "step": 1360 }, { "epoch": 1.73, "learning_rate": 0.0003941235452694913, "loss": 1.5349, "step": 1365 }, { "epoch": 1.74, "learning_rate": 0.0003908160043172725, "loss": 1.4194, "step": 1370 }, { "epoch": 1.74, "learning_rate": 0.0003875134728788834, "loss": 1.6139, "step": 1375 }, { "epoch": 1.75, "learning_rate": 0.0003842161024790679, "loss": 1.4876, "step": 1380 }, { "epoch": 1.76, "learning_rate": 0.00038092404440577315, "loss": 1.5042, "step": 1385 }, { "epoch": 1.76, "learning_rate": 0.0003776374497032097, "loss": 1.5229, "step": 1390 }, { "epoch": 1.77, "learning_rate": 0.0003743564691649216, "loss": 1.5647, "step": 1395 }, { "epoch": 1.77, "learning_rate": 0.0003710812533268666, "loss": 1.5142, "step": 1400 }, { "epoch": 1.78, "learning_rate": 0.0003678119524605102, "loss": 1.5012, "step": 1405 }, { "epoch": 1.79, "learning_rate": 0.0003645487165659305, "loss": 1.4997, "step": 1410 }, { "epoch": 1.79, "learning_rate": 0.000361291695364937, "loss": 1.3816, "step": 1415 }, { "epoch": 1.8, "learning_rate": 0.00035804103829419943, "loss": 1.4572, "step": 1420 }, { "epoch": 1.81, "learning_rate": 0.0003547968944983927, "loss": 1.5515, "step": 1425 }, { "epoch": 1.81, "learning_rate": 0.00035155941282335415, "loss": 1.6266, "step": 1430 }, { "epoch": 1.82, "learning_rate": 0.0003483287418092528, "loss": 1.4554, "step": 1435 }, { "epoch": 1.82, "learning_rate": 0.000345105029683776, "loss": 1.4924, "step": 1440 }, { "epoch": 1.83, "learning_rate": 0.00034188842435532686, "loss": 1.4861, "step": 1445 }, { "epoch": 1.84, "learning_rate": 0.000338679073406239, "loss": 1.4455, "step": 1450 }, { "epoch": 1.84, "learning_rate": 0.0003354771240860052, "loss": 1.5319, "step": 1455 }, { "epoch": 1.85, "learning_rate": 0.0003322827233045206, "loss": 1.5121, "step": 1460 }, { "epoch": 1.86, "learning_rate": 0.0003290960176253438, "loss": 1.4594, "step": 1465 }, { "epoch": 1.86, "learning_rate": 0.0003259171532589707, "loss": 1.5755, "step": 1470 }, { "epoch": 1.87, "learning_rate": 0.00032274627605612685, "loss": 1.6141, "step": 1475 }, { "epoch": 1.88, "learning_rate": 0.00031958353150107597, "loss": 1.4925, "step": 1480 }, { "epoch": 1.88, "learning_rate": 0.0003164290647049443, "loss": 1.5114, "step": 1485 }, { "epoch": 1.89, "learning_rate": 0.0003132830203990631, "loss": 1.4687, "step": 1490 }, { "epoch": 1.89, "learning_rate": 0.00031014554292832795, "loss": 1.392, "step": 1495 }, { "epoch": 1.9, "learning_rate": 0.0003070167762445759, "loss": 1.4322, "step": 1500 }, { "epoch": 1.91, "learning_rate": 0.0003038968638999816, "loss": 1.5359, "step": 1505 }, { "epoch": 1.91, "learning_rate": 0.00030078594904046967, "loss": 1.5745, "step": 1510 }, { "epoch": 1.92, "learning_rate": 0.0002976841743991479, "loss": 1.4865, "step": 1515 }, { "epoch": 1.93, "learning_rate": 0.0002945916822897579, "loss": 1.4958, "step": 1520 }, { "epoch": 1.93, "learning_rate": 0.0002915086146001458, "loss": 1.4678, "step": 1525 }, { "epoch": 1.94, "learning_rate": 0.0002884351127857526, "loss": 1.4629, "step": 1530 }, { "epoch": 1.95, "learning_rate": 0.00028537131786312333, "loss": 1.5381, "step": 1535 }, { "epoch": 1.95, "learning_rate": 0.00028231737040343717, "loss": 1.5322, "step": 1540 }, { "epoch": 1.96, "learning_rate": 0.000279273410526058, "loss": 1.4523, "step": 1545 }, { "epoch": 1.96, "learning_rate": 0.000276239577892106, "loss": 1.552, "step": 1550 }, { "epoch": 1.97, "learning_rate": 0.00027321601169804873, "loss": 1.4431, "step": 1555 }, { "epoch": 1.98, "learning_rate": 0.00027020285066931525, "loss": 1.5036, "step": 1560 }, { "epoch": 1.98, "learning_rate": 0.0002672002330539318, "loss": 1.5313, "step": 1565 }, { "epoch": 1.99, "learning_rate": 0.0002642082966161772, "loss": 1.5092, "step": 1570 }, { "epoch": 2.0, "learning_rate": 0.0002612271786302636, "loss": 1.5065, "step": 1575 }, { "epoch": 2.0, "eval_loss": 1.6175580024719238, "eval_runtime": 366.4829, "eval_samples_per_second": 34.948, "eval_steps_per_second": 2.186, "step": 1578 }, { "epoch": 2.0, "learning_rate": 0.0002582570158740372, "loss": 1.5297, "step": 1580 }, { "epoch": 2.01, "learning_rate": 0.00025529794462270285, "loss": 1.3664, "step": 1585 }, { "epoch": 2.02, "learning_rate": 0.0002523501006425724, "loss": 1.2522, "step": 1590 }, { "epoch": 2.02, "learning_rate": 0.000249413619184834, "loss": 1.2119, "step": 1595 }, { "epoch": 2.03, "learning_rate": 0.00024648863497934824, "loss": 1.3337, "step": 1600 }, { "epoch": 2.03, "learning_rate": 0.00024357528222846515, "loss": 1.2604, "step": 1605 }, { "epoch": 2.04, "learning_rate": 0.00024067369460086769, "loss": 1.2815, "step": 1610 }, { "epoch": 2.05, "learning_rate": 0.00023778400522543846, "loss": 1.2383, "step": 1615 }, { "epoch": 2.05, "learning_rate": 0.00023490634668515154, "loss": 1.2959, "step": 1620 }, { "epoch": 2.06, "learning_rate": 0.00023204085101099015, "loss": 1.3136, "step": 1625 }, { "epoch": 2.07, "learning_rate": 0.00022918764967588763, "loss": 1.2596, "step": 1630 }, { "epoch": 2.07, "learning_rate": 0.00022634687358869595, "loss": 1.272, "step": 1635 }, { "epoch": 2.08, "learning_rate": 0.00022351865308817991, "loss": 1.218, "step": 1640 }, { "epoch": 2.08, "learning_rate": 0.00022070311793703608, "loss": 1.3847, "step": 1645 }, { "epoch": 2.09, "learning_rate": 0.00021790039731593946, "loss": 1.2601, "step": 1650 }, { "epoch": 2.1, "learning_rate": 0.00021511061981761666, "loss": 1.3791, "step": 1655 }, { "epoch": 2.1, "learning_rate": 0.0002123339134409461, "loss": 1.2843, "step": 1660 }, { "epoch": 2.11, "learning_rate": 0.0002095704055850846, "loss": 1.2707, "step": 1665 }, { "epoch": 2.12, "learning_rate": 0.00020682022304362226, "loss": 1.2781, "step": 1670 }, { "epoch": 2.12, "learning_rate": 0.00020408349199876587, "loss": 1.3632, "step": 1675 }, { "epoch": 2.13, "learning_rate": 0.0002013603380155482, "loss": 1.2773, "step": 1680 }, { "epoch": 2.14, "learning_rate": 0.00019865088603606773, "loss": 1.2757, "step": 1685 }, { "epoch": 2.14, "learning_rate": 0.00019595526037375588, "loss": 1.299, "step": 1690 }, { "epoch": 2.15, "learning_rate": 0.00019327358470767325, "loss": 1.2752, "step": 1695 }, { "epoch": 2.15, "learning_rate": 0.0001906059820768357, "loss": 1.3311, "step": 1700 }, { "epoch": 2.16, "learning_rate": 0.00018795257487456797, "loss": 1.2696, "step": 1705 }, { "epoch": 2.17, "learning_rate": 0.00018531348484288925, "loss": 1.2492, "step": 1710 }, { "epoch": 2.17, "learning_rate": 0.00018268883306692674, "loss": 1.3153, "step": 1715 }, { "epoch": 2.18, "learning_rate": 0.0001800787399693604, "loss": 1.274, "step": 1720 }, { "epoch": 2.19, "learning_rate": 0.0001774833253048978, "loss": 1.3302, "step": 1725 }, { "epoch": 2.19, "learning_rate": 0.0001749027081547792, "loss": 1.1726, "step": 1730 }, { "epoch": 2.2, "learning_rate": 0.0001723370069213151, "loss": 1.2367, "step": 1735 }, { "epoch": 2.21, "learning_rate": 0.0001697863393224522, "loss": 1.2647, "step": 1740 }, { "epoch": 2.21, "learning_rate": 0.0001672508223863732, "loss": 1.2689, "step": 1745 }, { "epoch": 2.22, "learning_rate": 0.00016473057244612778, "loss": 1.2523, "step": 1750 }, { "epoch": 2.22, "learning_rate": 0.00016222570513429403, "loss": 1.2597, "step": 1755 }, { "epoch": 2.23, "learning_rate": 0.00015973633537767396, "loss": 1.2528, "step": 1760 }, { "epoch": 2.24, "learning_rate": 0.00015726257739201994, "loss": 1.2704, "step": 1765 }, { "epoch": 2.24, "learning_rate": 0.0001548045446767945, "loss": 1.1525, "step": 1770 }, { "epoch": 2.25, "learning_rate": 0.0001523623500099633, "loss": 1.2283, "step": 1775 }, { "epoch": 2.26, "learning_rate": 0.0001499361054428196, "loss": 1.3046, "step": 1780 }, { "epoch": 2.26, "learning_rate": 0.00014752592229484463, "loss": 1.3047, "step": 1785 }, { "epoch": 2.27, "learning_rate": 0.00014513191114859869, "loss": 1.2188, "step": 1790 }, { "epoch": 2.27, "learning_rate": 0.0001427541818446484, "loss": 1.2769, "step": 1795 }, { "epoch": 2.28, "learning_rate": 0.0001403928434765267, "loss": 1.2688, "step": 1800 }, { "epoch": 2.29, "learning_rate": 0.00013804800438572745, "loss": 1.2727, "step": 1805 }, { "epoch": 2.29, "learning_rate": 0.00013571977215673508, "loss": 1.1562, "step": 1810 }, { "epoch": 2.3, "learning_rate": 0.00013340825361208758, "loss": 1.3305, "step": 1815 }, { "epoch": 2.31, "learning_rate": 0.00013111355480747622, "loss": 1.2663, "step": 1820 }, { "epoch": 2.31, "learning_rate": 0.0001288357810268791, "loss": 1.2736, "step": 1825 }, { "epoch": 2.32, "learning_rate": 0.00012657503677772992, "loss": 1.2877, "step": 1830 }, { "epoch": 2.33, "learning_rate": 0.00012433142578612493, "loss": 1.3585, "step": 1835 }, { "epoch": 2.33, "learning_rate": 0.00012210505099206177, "loss": 1.3242, "step": 1840 }, { "epoch": 2.34, "learning_rate": 0.00011989601454471788, "loss": 1.3037, "step": 1845 }, { "epoch": 2.34, "learning_rate": 0.00011770441779776281, "loss": 1.2969, "step": 1850 }, { "epoch": 2.35, "learning_rate": 0.0001155303613047084, "loss": 1.2783, "step": 1855 }, { "epoch": 2.36, "learning_rate": 0.00011337394481429563, "loss": 1.2409, "step": 1860 }, { "epoch": 2.36, "learning_rate": 0.00011123526726591682, "loss": 1.2276, "step": 1865 }, { "epoch": 2.37, "learning_rate": 0.00010911442678507771, "loss": 1.261, "step": 1870 }, { "epoch": 2.38, "learning_rate": 0.00010701152067889408, "loss": 1.2741, "step": 1875 }, { "epoch": 2.38, "learning_rate": 0.0001049266454316275, "loss": 1.2337, "step": 1880 }, { "epoch": 2.39, "learning_rate": 0.00010285989670025914, "loss": 1.2259, "step": 1885 }, { "epoch": 2.4, "learning_rate": 0.00010081136931009982, "loss": 1.3068, "step": 1890 }, { "epoch": 2.4, "learning_rate": 9.878115725044045e-05, "loss": 1.2444, "step": 1895 }, { "epoch": 2.41, "learning_rate": 9.676935367023826e-05, "loss": 1.2301, "step": 1900 }, { "epoch": 2.41, "learning_rate": 9.477605087384428e-05, "loss": 1.2882, "step": 1905 }, { "epoch": 2.42, "learning_rate": 9.280134031676795e-05, "loss": 1.2423, "step": 1910 }, { "epoch": 2.43, "learning_rate": 9.084531260148043e-05, "loss": 1.2471, "step": 1915 }, { "epoch": 2.43, "learning_rate": 8.890805747325864e-05, "loss": 1.2215, "step": 1920 }, { "epoch": 2.44, "learning_rate": 8.698966381606633e-05, "loss": 1.359, "step": 1925 }, { "epoch": 2.45, "learning_rate": 8.509021964847752e-05, "loss": 1.2103, "step": 1930 }, { "epoch": 2.45, "learning_rate": 8.320981211963647e-05, "loss": 1.291, "step": 1935 }, { "epoch": 2.46, "learning_rate": 8.134852750526017e-05, "loss": 1.2588, "step": 1940 }, { "epoch": 2.47, "learning_rate": 7.950645120368011e-05, "loss": 1.3012, "step": 1945 }, { "epoch": 2.47, "learning_rate": 7.768366773192286e-05, "loss": 1.2732, "step": 1950 }, { "epoch": 2.48, "learning_rate": 7.58802607218339e-05, "loss": 1.2364, "step": 1955 }, { "epoch": 2.48, "learning_rate": 7.409631291623903e-05, "loss": 1.3049, "step": 1960 }, { "epoch": 2.49, "learning_rate": 7.23319061651489e-05, "loss": 1.334, "step": 1965 }, { "epoch": 2.5, "learning_rate": 7.058712142200324e-05, "loss": 1.1297, "step": 1970 }, { "epoch": 2.5, "learning_rate": 6.886203873995672e-05, "loss": 1.3076, "step": 1975 }, { "epoch": 2.51, "learning_rate": 6.715673726820626e-05, "loss": 1.2343, "step": 1980 }, { "epoch": 2.52, "learning_rate": 6.547129524835904e-05, "loss": 1.2474, "step": 1985 }, { "epoch": 2.52, "learning_rate": 6.38057900108428e-05, "loss": 1.2312, "step": 1990 }, { "epoch": 2.53, "learning_rate": 6.216029797135842e-05, "loss": 1.1307, "step": 1995 }, { "epoch": 2.53, "learning_rate": 6.053489462737283e-05, "loss": 1.2532, "step": 2000 }, { "epoch": 2.54, "learning_rate": 5.8929654554655985e-05, "loss": 1.2598, "step": 2005 }, { "epoch": 2.55, "learning_rate": 5.734465140385864e-05, "loss": 1.1858, "step": 2010 }, { "epoch": 2.55, "learning_rate": 5.5779957897133415e-05, "loss": 1.225, "step": 2015 }, { "epoch": 2.56, "learning_rate": 5.423564582479845e-05, "loss": 1.2459, "step": 2020 }, { "epoch": 2.57, "learning_rate": 5.2711786042042854e-05, "loss": 1.1665, "step": 2025 }, { "epoch": 2.57, "learning_rate": 5.120844846567657e-05, "loss": 1.2943, "step": 2030 }, { "epoch": 2.58, "learning_rate": 4.972570207092186e-05, "loss": 1.3287, "step": 2035 }, { "epoch": 2.59, "learning_rate": 4.826361488824898e-05, "loss": 1.2245, "step": 2040 }, { "epoch": 2.59, "learning_rate": 4.68222540002548e-05, "loss": 1.1462, "step": 2045 }, { "epoch": 2.6, "learning_rate": 4.5401685538584756e-05, "loss": 1.2929, "step": 2050 }, { "epoch": 2.6, "learning_rate": 4.400197468089906e-05, "loss": 1.2585, "step": 2055 }, { "epoch": 2.61, "learning_rate": 4.26231856478817e-05, "loss": 1.1936, "step": 2060 }, { "epoch": 2.62, "learning_rate": 4.126538170029453e-05, "loss": 1.3289, "step": 2065 }, { "epoch": 2.62, "learning_rate": 3.99286251360742e-05, "loss": 1.2575, "step": 2070 }, { "epoch": 2.63, "learning_rate": 3.8612977287474093e-05, "loss": 1.2051, "step": 2075 }, { "epoch": 2.64, "learning_rate": 3.731849851825042e-05, "loss": 1.265, "step": 2080 }, { "epoch": 2.64, "learning_rate": 3.604524822089245e-05, "loss": 1.273, "step": 2085 }, { "epoch": 2.65, "learning_rate": 3.479328481389771e-05, "loss": 1.2193, "step": 2090 }, { "epoch": 2.66, "learning_rate": 3.35626657390915e-05, "loss": 1.3077, "step": 2095 }, { "epoch": 2.66, "learning_rate": 3.235344745899116e-05, "loss": 1.177, "step": 2100 }, { "epoch": 2.67, "learning_rate": 3.1165685454216333e-05, "loss": 1.317, "step": 2105 }, { "epoch": 2.67, "learning_rate": 2.9999434220942255e-05, "loss": 1.2496, "step": 2110 }, { "epoch": 2.68, "learning_rate": 2.8854747268400318e-05, "loss": 1.2356, "step": 2115 }, { "epoch": 2.69, "learning_rate": 2.773167711642266e-05, "loss": 1.2738, "step": 2120 }, { "epoch": 2.69, "learning_rate": 2.663027529303236e-05, "loss": 1.2312, "step": 2125 }, { "epoch": 2.7, "learning_rate": 2.5550592332079515e-05, "loss": 1.2528, "step": 2130 }, { "epoch": 2.71, "learning_rate": 2.449267777092251e-05, "loss": 1.1745, "step": 2135 }, { "epoch": 2.71, "learning_rate": 2.3456580148155272e-05, "loss": 1.2588, "step": 2140 }, { "epoch": 2.72, "learning_rate": 2.2442347001380148e-05, "loss": 1.1721, "step": 2145 }, { "epoch": 2.72, "learning_rate": 2.145002486502684e-05, "loss": 1.2337, "step": 2150 }, { "epoch": 2.73, "learning_rate": 2.0479659268217378e-05, "loss": 1.2546, "step": 2155 }, { "epoch": 2.74, "learning_rate": 1.953129473267723e-05, "loss": 1.2842, "step": 2160 }, { "epoch": 2.74, "learning_rate": 1.8604974770692508e-05, "loss": 1.1871, "step": 2165 }, { "epoch": 2.75, "learning_rate": 1.770074188311349e-05, "loss": 1.1609, "step": 2170 }, { "epoch": 2.76, "learning_rate": 1.681863755740487e-05, "loss": 1.1607, "step": 2175 }, { "epoch": 2.76, "learning_rate": 1.5958702265741977e-05, "loss": 1.2254, "step": 2180 }, { "epoch": 2.77, "learning_rate": 1.512097546315394e-05, "loss": 1.2322, "step": 2185 }, { "epoch": 2.78, "learning_rate": 1.430549558571359e-05, "loss": 1.1937, "step": 2190 }, { "epoch": 2.78, "learning_rate": 1.3512300048773685e-05, "loss": 1.2344, "step": 2195 }, { "epoch": 2.79, "learning_rate": 1.2741425245250626e-05, "loss": 1.2383, "step": 2200 }, { "epoch": 2.79, "learning_rate": 1.199290654395424e-05, "loss": 1.2553, "step": 2205 }, { "epoch": 2.8, "learning_rate": 1.1266778287965297e-05, "loss": 1.381, "step": 2210 }, { "epoch": 2.81, "learning_rate": 1.056307379305993e-05, "loss": 1.1926, "step": 2215 }, { "epoch": 2.81, "learning_rate": 9.88182534618054e-06, "loss": 1.1419, "step": 2220 }, { "epoch": 2.82, "learning_rate": 9.223064203955078e-06, "loss": 1.2016, "step": 2225 }, { "epoch": 2.83, "learning_rate": 8.586820591262367e-06, "loss": 1.2364, "step": 2230 }, { "epoch": 2.83, "learning_rate": 7.973123699845819e-06, "loss": 1.2936, "step": 2235 }, { "epoch": 2.84, "learning_rate": 7.382001686973782e-06, "loss": 1.2675, "step": 2240 }, { "epoch": 2.85, "learning_rate": 6.8134816741476364e-06, "loss": 1.2742, "step": 2245 }, { "epoch": 2.85, "learning_rate": 6.267589745857727e-06, "loss": 1.2404, "step": 2250 }, { "epoch": 2.86, "learning_rate": 5.744350948386101e-06, "loss": 1.311, "step": 2255 }, { "epoch": 2.86, "learning_rate": 5.243789288657818e-06, "loss": 1.2043, "step": 2260 }, { "epoch": 2.87, "learning_rate": 4.765927733139108e-06, "loss": 1.2024, "step": 2265 }, { "epoch": 2.88, "learning_rate": 4.310788206783822e-06, "loss": 1.3395, "step": 2270 }, { "epoch": 2.88, "learning_rate": 3.87839159202763e-06, "loss": 1.2397, "step": 2275 }, { "epoch": 2.89, "learning_rate": 3.4687577278295635e-06, "loss": 1.2176, "step": 2280 }, { "epoch": 2.9, "learning_rate": 3.0819054087619093e-06, "loss": 1.1944, "step": 2285 }, { "epoch": 2.9, "learning_rate": 2.7178523841481227e-06, "loss": 1.2135, "step": 2290 }, { "epoch": 2.91, "learning_rate": 2.3766153572480353e-06, "loss": 1.259, "step": 2295 }, { "epoch": 2.91, "learning_rate": 2.05820998449191e-06, "loss": 1.1943, "step": 2300 }, { "epoch": 2.92, "learning_rate": 1.7626508747617954e-06, "loss": 1.2961, "step": 2305 }, { "epoch": 2.93, "learning_rate": 1.4899515887213943e-06, "loss": 1.157, "step": 2310 }, { "epoch": 2.93, "learning_rate": 1.2401246381938958e-06, "loss": 1.1454, "step": 2315 }, { "epoch": 2.94, "learning_rate": 1.0131814855877663e-06, "loss": 1.1584, "step": 2320 }, { "epoch": 2.95, "learning_rate": 8.091325433710606e-07, "loss": 1.1784, "step": 2325 }, { "epoch": 2.95, "learning_rate": 6.27987173593525e-07, "loss": 1.2977, "step": 2330 }, { "epoch": 2.96, "learning_rate": 4.6975368745705296e-07, "loss": 1.1467, "step": 2335 }, { "epoch": 2.97, "learning_rate": 3.3443934493443407e-07, "loss": 1.2271, "step": 2340 }, { "epoch": 2.97, "learning_rate": 2.2205035443623178e-07, "loss": 1.2055, "step": 2345 }, { "epoch": 2.98, "learning_rate": 1.325918725258446e-07, "loss": 1.166, "step": 2350 }, { "epoch": 2.98, "learning_rate": 6.60680036831396e-08, "loss": 1.2394, "step": 2355 }, { "epoch": 2.99, "learning_rate": 2.248180011588108e-08, "loss": 1.2422, "step": 2360 }, { "epoch": 3.0, "learning_rate": 1.8352616197314652e-09, "loss": 1.1873, "step": 2365 }, { "epoch": 3.0, "eval_loss": 1.5839784145355225, "eval_runtime": 366.1985, "eval_samples_per_second": 34.976, "eval_steps_per_second": 2.187, "step": 2367 }, { "epoch": 3.0, "step": 2367, "total_flos": 5.627889061063557e+17, "train_loss": 1.6624892393546513, "train_runtime": 33680.2242, "train_samples_per_second": 8.997, "train_steps_per_second": 0.07 } ], "max_steps": 2367, "num_train_epochs": 3, "total_flos": 5.627889061063557e+17, "trial_name": null, "trial_params": null }