{ "best_metric": null, "best_model_checkpoint": null, "epoch": 25.0, "eval_steps": 500, "global_step": 1100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11, "learning_rate": 0.001999898043009433, "loss": 4.5094, "step": 5 }, { "epoch": 0.23, "learning_rate": 0.0019995921928281893, "loss": 3.8047, "step": 10 }, { "epoch": 0.34, "learning_rate": 0.001999082511823396, "loss": 3.8813, "step": 15 }, { "epoch": 0.45, "learning_rate": 0.0019983691039261358, "loss": 3.7188, "step": 20 }, { "epoch": 0.57, "learning_rate": 0.0019974521146102534, "loss": 3.6695, "step": 25 }, { "epoch": 0.68, "learning_rate": 0.001996331730862691, "loss": 3.7078, "step": 30 }, { "epoch": 0.8, "learning_rate": 0.0019950081811453595, "loss": 3.6844, "step": 35 }, { "epoch": 0.91, "learning_rate": 0.0019934817353485504, "loss": 3.6961, "step": 40 }, { "epoch": 1.02, "learning_rate": 0.0019917527047359027, "loss": 3.5758, "step": 45 }, { "epoch": 1.14, "learning_rate": 0.001989821441880933, "loss": 3.4102, "step": 50 }, { "epoch": 1.25, "learning_rate": 0.0019876883405951376, "loss": 3.3984, "step": 55 }, { "epoch": 1.36, "learning_rate": 0.001985353835847693, "loss": 3.3602, "step": 60 }, { "epoch": 1.48, "learning_rate": 0.0019828184036767556, "loss": 3.4461, "step": 65 }, { "epoch": 1.59, "learning_rate": 0.0019800825610923932, "loss": 3.3461, "step": 70 }, { "epoch": 1.7, "learning_rate": 0.0019771468659711597, "loss": 3.4172, "step": 75 }, { "epoch": 1.82, "learning_rate": 0.0019740119169423336, "loss": 3.4359, "step": 80 }, { "epoch": 1.93, "learning_rate": 0.0019706783532658523, "loss": 3.5141, "step": 85 }, { "epoch": 2.05, "learning_rate": 0.001967146854701957, "loss": 3.2242, "step": 90 }, { "epoch": 2.16, "learning_rate": 0.0019634181413725788, "loss": 3.0227, "step": 95 }, { "epoch": 2.27, "learning_rate": 0.0019594929736144974, "loss": 2.8984, "step": 100 }, { "epoch": 2.39, "learning_rate": 0.001955372151824297, "loss": 3.0781, "step": 105 }, { "epoch": 2.5, "learning_rate": 0.0019510565162951536, "loss": 3.1203, "step": 110 }, { "epoch": 2.61, "learning_rate": 0.00194654694704549, "loss": 3.1828, "step": 115 }, { "epoch": 2.73, "learning_rate": 0.0019418443636395248, "loss": 3.0531, "step": 120 }, { "epoch": 2.84, "learning_rate": 0.001936949724999762, "loss": 3.1523, "step": 125 }, { "epoch": 2.95, "learning_rate": 0.0019318640292114524, "loss": 3.1156, "step": 130 }, { "epoch": 3.07, "learning_rate": 0.0019265883133190713, "loss": 2.7844, "step": 135 }, { "epoch": 3.18, "learning_rate": 0.0019211236531148502, "loss": 2.6711, "step": 140 }, { "epoch": 3.3, "learning_rate": 0.0019154711629194062, "loss": 2.6609, "step": 145 }, { "epoch": 3.41, "learning_rate": 0.0019096319953545184, "loss": 2.7531, "step": 150 }, { "epoch": 3.52, "learning_rate": 0.0019036073411080917, "loss": 2.7977, "step": 155 }, { "epoch": 3.64, "learning_rate": 0.0018973984286913585, "loss": 2.7914, "step": 160 }, { "epoch": 3.75, "learning_rate": 0.0018910065241883678, "loss": 2.8188, "step": 165 }, { "epoch": 3.86, "learning_rate": 0.0018844329309978143, "loss": 2.8945, "step": 170 }, { "epoch": 3.98, "learning_rate": 0.0018776789895672556, "loss": 2.8883, "step": 175 }, { "epoch": 4.09, "learning_rate": 0.0018707460771197773, "loss": 2.4617, "step": 180 }, { "epoch": 4.2, "learning_rate": 0.001863635607373157, "loss": 2.4633, "step": 185 }, { "epoch": 4.32, "learning_rate": 0.001856349030251589, "loss": 2.5094, "step": 190 }, { "epoch": 4.43, "learning_rate": 0.0018488878315900226, "loss": 2.432, "step": 195 }, { "epoch": 4.55, "learning_rate": 0.0018412535328311812, "loss": 2.5648, "step": 200 }, { "epoch": 4.66, "learning_rate": 0.0018334476907153176, "loss": 2.4836, "step": 205 }, { "epoch": 4.77, "learning_rate": 0.001825471896962774, "loss": 2.6617, "step": 210 }, { "epoch": 4.89, "learning_rate": 0.0018173277779494068, "loss": 2.6734, "step": 215 }, { "epoch": 5.0, "learning_rate": 0.0018090169943749475, "loss": 2.6742, "step": 220 }, { "epoch": 5.11, "learning_rate": 0.0018005412409243604, "loss": 2.1379, "step": 225 }, { "epoch": 5.23, "learning_rate": 0.0017919022459222751, "loss": 2.1508, "step": 230 }, { "epoch": 5.34, "learning_rate": 0.0017831017709805555, "loss": 2.2582, "step": 235 }, { "epoch": 5.45, "learning_rate": 0.0017741416106390826, "loss": 2.2367, "step": 240 }, { "epoch": 5.57, "learning_rate": 0.0017650235919998232, "loss": 2.325, "step": 245 }, { "epoch": 5.68, "learning_rate": 0.0017557495743542584, "loss": 2.2703, "step": 250 }, { "epoch": 5.8, "learning_rate": 0.0017463214488042471, "loss": 2.3703, "step": 255 }, { "epoch": 5.91, "learning_rate": 0.001736741137876405, "loss": 2.4648, "step": 260 }, { "epoch": 6.02, "learning_rate": 0.0017270105951300739, "loss": 2.2734, "step": 265 }, { "epoch": 6.14, "learning_rate": 0.0017171318047589637, "loss": 1.9898, "step": 270 }, { "epoch": 6.25, "learning_rate": 0.0017071067811865474, "loss": 1.9816, "step": 275 }, { "epoch": 6.36, "learning_rate": 0.0016969375686552938, "loss": 1.9648, "step": 280 }, { "epoch": 6.48, "learning_rate": 0.0016866262408098134, "loss": 2.1672, "step": 285 }, { "epoch": 6.59, "learning_rate": 0.0016761749002740195, "loss": 2.0074, "step": 290 }, { "epoch": 6.7, "learning_rate": 0.0016655856782223683, "loss": 2.1598, "step": 295 }, { "epoch": 6.82, "learning_rate": 0.0016548607339452852, "loss": 2.0996, "step": 300 }, { "epoch": 6.93, "learning_rate": 0.0016440022544088554, "loss": 2.1434, "step": 305 }, { "epoch": 7.05, "learning_rate": 0.0016330124538088703, "loss": 2.0699, "step": 310 }, { "epoch": 7.16, "learning_rate": 0.0016218935731193223, "loss": 1.7312, "step": 315 }, { "epoch": 7.27, "learning_rate": 0.0016106478796354383, "loss": 1.7799, "step": 320 }, { "epoch": 7.39, "learning_rate": 0.0015992776665113468, "loss": 1.7008, "step": 325 }, { "epoch": 7.5, "learning_rate": 0.0015877852522924731, "loss": 1.8969, "step": 330 }, { "epoch": 7.61, "learning_rate": 0.0015761729804427528, "loss": 1.8156, "step": 335 }, { "epoch": 7.73, "learning_rate": 0.0015644432188667695, "loss": 1.9336, "step": 340 }, { "epoch": 7.84, "learning_rate": 0.0015525983594269026, "loss": 1.9918, "step": 345 }, { "epoch": 7.95, "learning_rate": 0.0015406408174555976, "loss": 2.0055, "step": 350 }, { "epoch": 8.07, "learning_rate": 0.0015285730312628418, "loss": 1.7168, "step": 355 }, { "epoch": 8.18, "learning_rate": 0.001516397461638962, "loss": 1.5531, "step": 360 }, { "epoch": 8.3, "learning_rate": 0.001504116591352832, "loss": 1.5922, "step": 365 }, { "epoch": 8.41, "learning_rate": 0.001491732924645604, "loss": 1.618, "step": 370 }, { "epoch": 8.52, "learning_rate": 0.0014792489867200569, "loss": 1.6738, "step": 375 }, { "epoch": 8.64, "learning_rate": 0.0014666673232256737, "loss": 1.7461, "step": 380 }, { "epoch": 8.75, "learning_rate": 0.0014539904997395467, "loss": 1.6746, "step": 385 }, { "epoch": 8.86, "learning_rate": 0.0014412211012432212, "loss": 1.7711, "step": 390 }, { "epoch": 8.98, "learning_rate": 0.0014283617315955814, "loss": 1.8387, "step": 395 }, { "epoch": 9.09, "learning_rate": 0.0014154150130018866, "loss": 1.475, "step": 400 }, { "epoch": 9.2, "learning_rate": 0.001402383585479068, "loss": 1.4523, "step": 405 }, { "epoch": 9.32, "learning_rate": 0.0013892701063173917, "loss": 1.4812, "step": 410 }, { "epoch": 9.43, "learning_rate": 0.0013760772495385997, "loss": 1.525, "step": 415 }, { "epoch": 9.55, "learning_rate": 0.001362807705350641, "loss": 1.398, "step": 420 }, { "epoch": 9.66, "learning_rate": 0.0013494641795990985, "loss": 1.4477, "step": 425 }, { "epoch": 9.77, "learning_rate": 0.00133604939321543, "loss": 1.5801, "step": 430 }, { "epoch": 9.89, "learning_rate": 0.0013225660816621341, "loss": 1.6422, "step": 435 }, { "epoch": 10.0, "learning_rate": 0.0013090169943749475, "loss": 1.5535, "step": 440 }, { "epoch": 10.11, "learning_rate": 0.0012954048942022001, "loss": 1.2324, "step": 445 }, { "epoch": 10.23, "learning_rate": 0.0012817325568414298, "loss": 1.2613, "step": 450 }, { "epoch": 10.34, "learning_rate": 0.001268002770273379, "loss": 1.3293, "step": 455 }, { "epoch": 10.45, "learning_rate": 0.0012542183341934872, "loss": 1.2852, "step": 460 }, { "epoch": 10.57, "learning_rate": 0.0012403820594409924, "loss": 1.3295, "step": 465 }, { "epoch": 10.68, "learning_rate": 0.0012264967674257645, "loss": 1.3287, "step": 470 }, { "epoch": 10.8, "learning_rate": 0.0012125652895529767, "loss": 1.3566, "step": 475 }, { "epoch": 10.91, "learning_rate": 0.0011985904666457455, "loss": 1.4414, "step": 480 }, { "epoch": 11.02, "learning_rate": 0.0011845751483658454, "loss": 1.3695, "step": 485 }, { "epoch": 11.14, "learning_rate": 0.0011705221926326238, "loss": 1.1363, "step": 490 }, { "epoch": 11.25, "learning_rate": 0.001156434465040231, "loss": 1.1354, "step": 495 }, { "epoch": 11.36, "learning_rate": 0.0011423148382732854, "loss": 1.0725, "step": 500 }, { "epoch": 11.48, "learning_rate": 0.001128166191521093, "loss": 1.1754, "step": 505 }, { "epoch": 11.59, "learning_rate": 0.0011139914098905405, "loss": 1.1848, "step": 510 }, { "epoch": 11.7, "learning_rate": 0.0010997933838177826, "loss": 1.2354, "step": 515 }, { "epoch": 11.82, "learning_rate": 0.0010855750084788399, "loss": 1.1984, "step": 520 }, { "epoch": 11.93, "learning_rate": 0.0010713391831992322, "loss": 1.2666, "step": 525 }, { "epoch": 12.05, "learning_rate": 0.001057088810862768, "loss": 1.1408, "step": 530 }, { "epoch": 12.16, "learning_rate": 0.0010428267973196027, "loss": 0.9385, "step": 535 }, { "epoch": 12.27, "learning_rate": 0.0010285560507936962, "loss": 1.0158, "step": 540 }, { "epoch": 12.39, "learning_rate": 0.0010142794812897874, "loss": 0.9936, "step": 545 }, { "epoch": 12.5, "learning_rate": 0.001, "loss": 0.9891, "step": 550 }, { "epoch": 12.61, "learning_rate": 0.000985720518710213, "loss": 1.0684, "step": 555 }, { "epoch": 12.73, "learning_rate": 0.0009714439492063038, "loss": 1.076, "step": 560 }, { "epoch": 12.84, "learning_rate": 0.0009571732026803976, "loss": 1.0609, "step": 565 }, { "epoch": 12.95, "learning_rate": 0.000942911189137232, "loss": 1.1297, "step": 570 }, { "epoch": 13.07, "learning_rate": 0.0009286608168007677, "loss": 0.9342, "step": 575 }, { "epoch": 13.18, "learning_rate": 0.0009144249915211606, "loss": 0.8511, "step": 580 }, { "epoch": 13.3, "learning_rate": 0.0009002066161822172, "loss": 0.8336, "step": 585 }, { "epoch": 13.41, "learning_rate": 0.0008860085901094594, "loss": 0.8652, "step": 590 }, { "epoch": 13.52, "learning_rate": 0.0008718338084789072, "loss": 0.9744, "step": 595 }, { "epoch": 13.64, "learning_rate": 0.000857685161726715, "loss": 0.9006, "step": 600 }, { "epoch": 13.75, "learning_rate": 0.000843565534959769, "loss": 0.9619, "step": 605 }, { "epoch": 13.86, "learning_rate": 0.0008294778073673762, "loss": 0.9123, "step": 610 }, { "epoch": 13.98, "learning_rate": 0.0008154248516341547, "loss": 0.9959, "step": 615 }, { "epoch": 14.09, "learning_rate": 0.0008014095333542549, "loss": 0.7503, "step": 620 }, { "epoch": 14.2, "learning_rate": 0.0007874347104470233, "loss": 0.7357, "step": 625 }, { "epoch": 14.32, "learning_rate": 0.0007735032325742355, "loss": 0.7477, "step": 630 }, { "epoch": 14.43, "learning_rate": 0.0007596179405590076, "loss": 0.8088, "step": 635 }, { "epoch": 14.55, "learning_rate": 0.0007457816658065133, "loss": 0.7652, "step": 640 }, { "epoch": 14.66, "learning_rate": 0.0007319972297266214, "loss": 0.7847, "step": 645 }, { "epoch": 14.77, "learning_rate": 0.0007182674431585703, "loss": 0.7984, "step": 650 }, { "epoch": 14.89, "learning_rate": 0.0007045951057978, "loss": 0.8732, "step": 655 }, { "epoch": 15.0, "learning_rate": 0.0006909830056250527, "loss": 0.8258, "step": 660 }, { "epoch": 15.11, "learning_rate": 0.0006774339183378663, "loss": 0.6311, "step": 665 }, { "epoch": 15.23, "learning_rate": 0.0006639506067845697, "loss": 0.6543, "step": 670 }, { "epoch": 15.34, "learning_rate": 0.0006505358204009018, "loss": 0.6421, "step": 675 }, { "epoch": 15.45, "learning_rate": 0.0006371922946493591, "loss": 0.6937, "step": 680 }, { "epoch": 15.57, "learning_rate": 0.0006239227504614003, "loss": 0.6887, "step": 685 }, { "epoch": 15.68, "learning_rate": 0.0006107298936826086, "loss": 0.7097, "step": 690 }, { "epoch": 15.8, "learning_rate": 0.0005976164145209322, "loss": 0.6778, "step": 695 }, { "epoch": 15.91, "learning_rate": 0.0005845849869981136, "loss": 0.7124, "step": 700 }, { "epoch": 16.02, "learning_rate": 0.000571638268404419, "loss": 0.7053, "step": 705 }, { "epoch": 16.14, "learning_rate": 0.0005587788987567784, "loss": 0.5863, "step": 710 }, { "epoch": 16.25, "learning_rate": 0.0005460095002604533, "loss": 0.5588, "step": 715 }, { "epoch": 16.36, "learning_rate": 0.0005333326767743263, "loss": 0.5363, "step": 720 }, { "epoch": 16.48, "learning_rate": 0.0005207510132799435, "loss": 0.6137, "step": 725 }, { "epoch": 16.59, "learning_rate": 0.0005082670753543961, "loss": 0.5606, "step": 730 }, { "epoch": 16.7, "learning_rate": 0.0004958834086471683, "loss": 0.629, "step": 735 }, { "epoch": 16.82, "learning_rate": 0.00048360253836103817, "loss": 0.5754, "step": 740 }, { "epoch": 16.93, "learning_rate": 0.0004714269687371581, "loss": 0.6239, "step": 745 }, { "epoch": 17.05, "learning_rate": 0.0004593591825444028, "loss": 0.5807, "step": 750 }, { "epoch": 17.16, "learning_rate": 0.0004474016405730973, "loss": 0.465, "step": 755 }, { "epoch": 17.27, "learning_rate": 0.00043555678113323104, "loss": 0.4871, "step": 760 }, { "epoch": 17.39, "learning_rate": 0.00042382701955724725, "loss": 0.4623, "step": 765 }, { "epoch": 17.5, "learning_rate": 0.00041221474770752696, "loss": 0.5059, "step": 770 }, { "epoch": 17.61, "learning_rate": 0.00040072233348865304, "loss": 0.5021, "step": 775 }, { "epoch": 17.73, "learning_rate": 0.0003893521203645618, "loss": 0.5138, "step": 780 }, { "epoch": 17.84, "learning_rate": 0.00037810642688067796, "loss": 0.5212, "step": 785 }, { "epoch": 17.95, "learning_rate": 0.00036698754619112975, "loss": 0.5611, "step": 790 }, { "epoch": 18.07, "learning_rate": 0.00035599774559114475, "loss": 0.4956, "step": 795 }, { "epoch": 18.18, "learning_rate": 0.000345139266054715, "loss": 0.4243, "step": 800 }, { "epoch": 18.3, "learning_rate": 0.0003344143217776319, "loss": 0.4391, "step": 805 }, { "epoch": 18.41, "learning_rate": 0.00032382509972598086, "loss": 0.4627, "step": 810 }, { "epoch": 18.52, "learning_rate": 0.0003133737591901864, "loss": 0.4208, "step": 815 }, { "epoch": 18.64, "learning_rate": 0.0003030624313447067, "loss": 0.45, "step": 820 }, { "epoch": 18.75, "learning_rate": 0.00029289321881345256, "loss": 0.44, "step": 825 }, { "epoch": 18.86, "learning_rate": 0.0002828681952410366, "loss": 0.4451, "step": 830 }, { "epoch": 18.98, "learning_rate": 0.0002729894048699265, "loss": 0.4494, "step": 835 }, { "epoch": 19.09, "learning_rate": 0.00026325886212359495, "loss": 0.3839, "step": 840 }, { "epoch": 19.2, "learning_rate": 0.0002536785511957531, "loss": 0.3728, "step": 845 }, { "epoch": 19.32, "learning_rate": 0.00024425042564574185, "loss": 0.4126, "step": 850 }, { "epoch": 19.43, "learning_rate": 0.00023497640800017682, "loss": 0.4183, "step": 855 }, { "epoch": 19.55, "learning_rate": 0.0002258583893609175, "loss": 0.3778, "step": 860 }, { "epoch": 19.66, "learning_rate": 0.00021689822901944456, "loss": 0.3758, "step": 865 }, { "epoch": 19.77, "learning_rate": 0.000208097754077725, "loss": 0.4034, "step": 870 }, { "epoch": 19.89, "learning_rate": 0.0001994587590756397, "loss": 0.4085, "step": 875 }, { "epoch": 20.0, "learning_rate": 0.00019098300562505265, "loss": 0.3673, "step": 880 }, { "epoch": 20.11, "learning_rate": 0.0001826722220505931, "loss": 0.363, "step": 885 }, { "epoch": 20.23, "learning_rate": 0.000174528103037226, "loss": 0.3707, "step": 890 }, { "epoch": 20.34, "learning_rate": 0.00016655230928468257, "loss": 0.369, "step": 895 }, { "epoch": 20.45, "learning_rate": 0.00015874646716881869, "loss": 0.3528, "step": 900 }, { "epoch": 20.57, "learning_rate": 0.00015111216840997744, "loss": 0.3581, "step": 905 }, { "epoch": 20.68, "learning_rate": 0.00014365096974841107, "loss": 0.3466, "step": 910 }, { "epoch": 20.8, "learning_rate": 0.00013636439262684297, "loss": 0.3274, "step": 915 }, { "epoch": 20.91, "learning_rate": 0.00012925392288022297, "loss": 0.3401, "step": 920 }, { "epoch": 21.02, "learning_rate": 0.00012232101043274435, "loss": 0.3435, "step": 925 }, { "epoch": 21.14, "learning_rate": 0.00011556706900218572, "loss": 0.2972, "step": 930 }, { "epoch": 21.25, "learning_rate": 0.00010899347581163222, "loss": 0.3153, "step": 935 }, { "epoch": 21.36, "learning_rate": 0.00010260157130864178, "loss": 0.3315, "step": 940 }, { "epoch": 21.48, "learning_rate": 9.639265889190829e-05, "loss": 0.3264, "step": 945 }, { "epoch": 21.59, "learning_rate": 9.036800464548156e-05, "loss": 0.3427, "step": 950 }, { "epoch": 21.7, "learning_rate": 8.4528837080594e-05, "loss": 0.3415, "step": 955 }, { "epoch": 21.82, "learning_rate": 7.887634688515e-05, "loss": 0.323, "step": 960 }, { "epoch": 21.93, "learning_rate": 7.341168668092857e-05, "loss": 0.2961, "step": 965 }, { "epoch": 22.05, "learning_rate": 6.813597078854772e-05, "loss": 0.3276, "step": 970 }, { "epoch": 22.16, "learning_rate": 6.305027500023842e-05, "loss": 0.3045, "step": 975 }, { "epoch": 22.27, "learning_rate": 5.8155636360475384e-05, "loss": 0.3167, "step": 980 }, { "epoch": 22.39, "learning_rate": 5.345305295450997e-05, "loss": 0.319, "step": 985 }, { "epoch": 22.5, "learning_rate": 4.894348370484647e-05, "loss": 0.2852, "step": 990 }, { "epoch": 22.61, "learning_rate": 4.4627848175703315e-05, "loss": 0.3034, "step": 995 }, { "epoch": 22.73, "learning_rate": 4.050702638550274e-05, "loss": 0.2845, "step": 1000 }, { "epoch": 22.84, "learning_rate": 3.658185862742103e-05, "loss": 0.3136, "step": 1005 }, { "epoch": 22.95, "learning_rate": 3.285314529804295e-05, "loss": 0.3187, "step": 1010 }, { "epoch": 23.07, "learning_rate": 2.93216467341475e-05, "loss": 0.2907, "step": 1015 }, { "epoch": 23.18, "learning_rate": 2.5988083057666535e-05, "loss": 0.2955, "step": 1020 }, { "epoch": 23.3, "learning_rate": 2.2853134028840594e-05, "loss": 0.2785, "step": 1025 }, { "epoch": 23.41, "learning_rate": 1.9917438907606554e-05, "loss": 0.3369, "step": 1030 }, { "epoch": 23.52, "learning_rate": 1.7181596323244453e-05, "loss": 0.2837, "step": 1035 }, { "epoch": 23.64, "learning_rate": 1.4646164152307017e-05, "loss": 0.3002, "step": 1040 }, { "epoch": 23.75, "learning_rate": 1.231165940486234e-05, "loss": 0.3062, "step": 1045 }, { "epoch": 23.86, "learning_rate": 1.0178558119067316e-05, "loss": 0.2859, "step": 1050 }, { "epoch": 23.98, "learning_rate": 8.247295264097288e-06, "loss": 0.284, "step": 1055 }, { "epoch": 24.09, "learning_rate": 6.518264651449779e-06, "loss": 0.2607, "step": 1060 }, { "epoch": 24.2, "learning_rate": 4.991818854640395e-06, "loss": 0.3164, "step": 1065 }, { "epoch": 24.32, "learning_rate": 3.6682691373086663e-06, "loss": 0.2597, "step": 1070 }, { "epoch": 24.43, "learning_rate": 2.5478853897464847e-06, "loss": 0.2907, "step": 1075 }, { "epoch": 24.55, "learning_rate": 1.630896073864352e-06, "loss": 0.3033, "step": 1080 }, { "epoch": 24.66, "learning_rate": 9.174881766043087e-07, "loss": 0.3089, "step": 1085 }, { "epoch": 24.77, "learning_rate": 4.078071718107701e-07, "loss": 0.2964, "step": 1090 }, { "epoch": 24.89, "learning_rate": 1.0195699056669839e-07, "loss": 0.2995, "step": 1095 }, { "epoch": 25.0, "learning_rate": 0.0, "loss": 0.2936, "step": 1100 }, { "epoch": 25.0, "step": 1100, "total_flos": 5.602696856046797e+17, "train_loss": 1.3768115234375, "train_runtime": 24197.7873, "train_samples_per_second": 0.724, "train_steps_per_second": 0.045 } ], "logging_steps": 5, "max_steps": 1100, "num_input_tokens_seen": 0, "num_train_epochs": 25, "save_steps": 100, "total_flos": 5.602696856046797e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }