{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.9984901862103674, "global_step": 1240, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.2e-05, "loss": 6.4085, "step": 5 }, { "epoch": 0.04, "learning_rate": 2.4e-05, "loss": 5.2039, "step": 10 }, { "epoch": 0.06, "learning_rate": 3.6e-05, "loss": 4.8094, "step": 15 }, { "epoch": 0.08, "learning_rate": 4.8e-05, "loss": 4.3557, "step": 20 }, { "epoch": 0.1, "learning_rate": 6e-05, "loss": 4.0778, "step": 25 }, { "epoch": 0.12, "learning_rate": 5.999749289566312e-05, "loss": 3.9019, "step": 30 }, { "epoch": 0.14, "learning_rate": 5.998997200169063e-05, "loss": 3.744, "step": 35 }, { "epoch": 0.16, "learning_rate": 5.9977438575126904e-05, "loss": 3.5981, "step": 40 }, { "epoch": 0.18, "learning_rate": 5.99598947108125e-05, "loss": 3.4746, "step": 45 }, { "epoch": 0.2, "learning_rate": 5.993734334103397e-05, "loss": 3.3712, "step": 50 }, { "epoch": 0.22, "learning_rate": 5.990978823503376e-05, "loss": 3.3405, "step": 55 }, { "epoch": 0.24, "learning_rate": 5.987723399838028e-05, "loss": 3.3019, "step": 60 }, { "epoch": 0.26, "learning_rate": 5.983968607219804e-05, "loss": 3.2548, "step": 65 }, { "epoch": 0.28, "learning_rate": 5.979715073225829e-05, "loss": 3.2847, "step": 70 }, { "epoch": 0.3, "learning_rate": 5.974963508793003e-05, "loss": 3.2371, "step": 75 }, { "epoch": 0.32, "learning_rate": 5.9697147080991817e-05, "loss": 3.1677, "step": 80 }, { "epoch": 0.34, "learning_rate": 5.963969548430428e-05, "loss": 3.1617, "step": 85 }, { "epoch": 0.36, "learning_rate": 5.957728990034389e-05, "loss": 3.2266, "step": 90 }, { "epoch": 0.38, "learning_rate": 5.950994075959803e-05, "loss": 3.1008, "step": 95 }, { "epoch": 0.4, "learning_rate": 5.9437659318821544e-05, "loss": 3.0858, "step": 100 }, { "epoch": 0.42, "learning_rate": 5.9360457659155325e-05, "loss": 3.018, "step": 105 }, { "epoch": 0.44, "learning_rate": 5.927834868410711e-05, "loss": 3.0951, "step": 110 }, { "epoch": 0.46, "learning_rate": 5.919134611739472e-05, "loss": 3.1038, "step": 115 }, { "epoch": 0.48, "learning_rate": 5.90994645006523e-05, "loss": 3.0275, "step": 120 }, { "epoch": 0.5, "learning_rate": 5.900271919099987e-05, "loss": 3.0632, "step": 125 }, { "epoch": 0.52, "learning_rate": 5.8901126358476414e-05, "loss": 3.0435, "step": 130 }, { "epoch": 0.54, "learning_rate": 5.8794702983337354e-05, "loss": 3.0403, "step": 135 }, { "epoch": 0.56, "learning_rate": 5.868346685321639e-05, "loss": 3.0174, "step": 140 }, { "epoch": 0.58, "learning_rate": 5.856743656015246e-05, "loss": 2.9917, "step": 145 }, { "epoch": 0.6, "learning_rate": 5.844663149748229e-05, "loss": 2.9663, "step": 150 }, { "epoch": 0.62, "learning_rate": 5.8321071856599e-05, "loss": 2.8901, "step": 155 }, { "epoch": 0.64, "learning_rate": 5.819077862357725e-05, "loss": 2.9219, "step": 160 }, { "epoch": 0.66, "learning_rate": 5.8055773575665695e-05, "loss": 2.9779, "step": 165 }, { "epoch": 0.68, "learning_rate": 5.791607927764706e-05, "loss": 2.8977, "step": 170 }, { "epoch": 0.7, "learning_rate": 5.777171907806672e-05, "loss": 2.8961, "step": 175 }, { "epoch": 0.72, "learning_rate": 5.762271710533015e-05, "loss": 2.8841, "step": 180 }, { "epoch": 0.74, "learning_rate": 5.746909826367018e-05, "loss": 2.8661, "step": 185 }, { "epoch": 0.76, "learning_rate": 5.73108882289844e-05, "loss": 2.8671, "step": 190 }, { "epoch": 0.79, "learning_rate": 5.714811344454376e-05, "loss": 2.9775, "step": 195 }, { "epoch": 0.81, "learning_rate": 5.698080111657279e-05, "loss": 2.9726, "step": 200 }, { "epoch": 0.83, "learning_rate": 5.680897920970237e-05, "loss": 2.9081, "step": 205 }, { "epoch": 0.85, "learning_rate": 5.663267644229568e-05, "loss": 2.8349, "step": 210 }, { "epoch": 0.87, "learning_rate": 5.645192228164824e-05, "loss": 2.9355, "step": 215 }, { "epoch": 0.89, "learning_rate": 5.6266746939062725e-05, "loss": 2.904, "step": 220 }, { "epoch": 0.91, "learning_rate": 5.607718136479943e-05, "loss": 2.9315, "step": 225 }, { "epoch": 0.93, "learning_rate": 5.5883257242903255e-05, "loss": 2.8953, "step": 230 }, { "epoch": 0.95, "learning_rate": 5.568500698590799e-05, "loss": 2.818, "step": 235 }, { "epoch": 0.97, "learning_rate": 5.548246372941892e-05, "loss": 2.8779, "step": 240 }, { "epoch": 0.99, "learning_rate": 5.5275661326574485e-05, "loss": 2.8299, "step": 245 }, { "epoch": 1.0, "eval_loss": 2.7971479892730713, "eval_runtime": 163.2621, "eval_samples_per_second": 49.638, "eval_steps_per_second": 6.205, "step": 248 }, { "epoch": 1.01, "learning_rate": 5.50646343423881e-05, "loss": 3.1054, "step": 250 }, { "epoch": 1.03, "learning_rate": 5.48494180479709e-05, "loss": 2.8559, "step": 255 }, { "epoch": 1.05, "learning_rate": 5.463004841463656e-05, "loss": 2.8828, "step": 260 }, { "epoch": 1.07, "learning_rate": 5.4406562107889024e-05, "loss": 2.8264, "step": 265 }, { "epoch": 1.09, "learning_rate": 5.417899648129423e-05, "loss": 2.8157, "step": 270 }, { "epoch": 1.11, "learning_rate": 5.3947389570236784e-05, "loss": 2.8592, "step": 275 }, { "epoch": 1.13, "learning_rate": 5.371178008556278e-05, "loss": 2.8296, "step": 280 }, { "epoch": 1.15, "learning_rate": 5.347220740710959e-05, "loss": 2.8519, "step": 285 }, { "epoch": 1.17, "learning_rate": 5.322871157712397e-05, "loss": 2.8342, "step": 290 }, { "epoch": 1.19, "learning_rate": 5.298133329356934e-05, "loss": 2.8232, "step": 295 }, { "epoch": 1.21, "learning_rate": 5.273011390332354e-05, "loss": 2.7835, "step": 300 }, { "epoch": 1.23, "learning_rate": 5.247509539526808e-05, "loss": 2.8445, "step": 305 }, { "epoch": 1.25, "learning_rate": 5.221632039327014e-05, "loss": 2.8093, "step": 310 }, { "epoch": 1.27, "learning_rate": 5.195383214905837e-05, "loss": 2.8389, "step": 315 }, { "epoch": 1.29, "learning_rate": 5.168767453499379e-05, "loss": 2.8264, "step": 320 }, { "epoch": 1.31, "learning_rate": 5.141789203673698e-05, "loss": 2.7895, "step": 325 }, { "epoch": 1.33, "learning_rate": 5.114452974581269e-05, "loss": 2.7584, "step": 330 }, { "epoch": 1.35, "learning_rate": 5.0867633352073254e-05, "loss": 2.7946, "step": 335 }, { "epoch": 1.37, "learning_rate": 5.0587249136062016e-05, "loss": 2.7828, "step": 340 }, { "epoch": 1.39, "learning_rate": 5.0303423961277864e-05, "loss": 2.833, "step": 345 }, { "epoch": 1.41, "learning_rate": 5.001620526634258e-05, "loss": 2.7956, "step": 350 }, { "epoch": 1.43, "learning_rate": 4.972564105707191e-05, "loss": 2.8566, "step": 355 }, { "epoch": 1.45, "learning_rate": 4.943177989845176e-05, "loss": 2.7128, "step": 360 }, { "epoch": 1.47, "learning_rate": 4.9134670906521176e-05, "loss": 2.7443, "step": 365 }, { "epoch": 1.49, "learning_rate": 4.883436374016296e-05, "loss": 2.8008, "step": 370 }, { "epoch": 1.51, "learning_rate": 4.8530908592803714e-05, "loss": 2.7743, "step": 375 }, { "epoch": 1.53, "learning_rate": 4.8224356184024506e-05, "loss": 2.7487, "step": 380 }, { "epoch": 1.55, "learning_rate": 4.791475775108359e-05, "loss": 2.7707, "step": 385 }, { "epoch": 1.57, "learning_rate": 4.760216504035254e-05, "loss": 2.7924, "step": 390 }, { "epoch": 1.59, "learning_rate": 4.7286630298667406e-05, "loss": 2.8114, "step": 395 }, { "epoch": 1.61, "learning_rate": 4.6968206264596164e-05, "loss": 2.7762, "step": 400 }, { "epoch": 1.63, "learning_rate": 4.664694615962391e-05, "loss": 2.7792, "step": 405 }, { "epoch": 1.65, "learning_rate": 4.632290367925748e-05, "loss": 2.8243, "step": 410 }, { "epoch": 1.67, "learning_rate": 4.5996132984050744e-05, "loss": 2.7644, "step": 415 }, { "epoch": 1.69, "learning_rate": 4.566668869055216e-05, "loss": 2.7652, "step": 420 }, { "epoch": 1.71, "learning_rate": 4.5334625862176195e-05, "loss": 2.7722, "step": 425 }, { "epoch": 1.73, "learning_rate": 4.5e-05, "loss": 2.7455, "step": 430 }, { "epoch": 1.75, "learning_rate": 4.4662867033486924e-05, "loss": 2.7365, "step": 435 }, { "epoch": 1.77, "learning_rate": 4.4323283311138474e-05, "loss": 2.7176, "step": 440 }, { "epoch": 1.79, "learning_rate": 4.398130559107617e-05, "loss": 2.7344, "step": 445 }, { "epoch": 1.81, "learning_rate": 4.363699103155502e-05, "loss": 2.7425, "step": 450 }, { "epoch": 1.83, "learning_rate": 4.3290397181410056e-05, "loss": 2.7304, "step": 455 }, { "epoch": 1.85, "learning_rate": 4.294158197043761e-05, "loss": 2.7893, "step": 460 }, { "epoch": 1.87, "learning_rate": 4.259060369971287e-05, "loss": 2.7134, "step": 465 }, { "epoch": 1.89, "learning_rate": 4.2237521031845506e-05, "loss": 2.82, "step": 470 }, { "epoch": 1.91, "learning_rate": 4.1882392981174704e-05, "loss": 2.7194, "step": 475 }, { "epoch": 1.93, "learning_rate": 4.152527890390553e-05, "loss": 2.7565, "step": 480 }, { "epoch": 1.95, "learning_rate": 4.11662384881881e-05, "loss": 2.7037, "step": 485 }, { "epoch": 1.97, "learning_rate": 4.080533174414131e-05, "loss": 2.7439, "step": 490 }, { "epoch": 1.99, "learning_rate": 4.0442618993822714e-05, "loss": 2.6984, "step": 495 }, { "epoch": 2.0, "eval_loss": 2.6826047897338867, "eval_runtime": 163.0962, "eval_samples_per_second": 49.688, "eval_steps_per_second": 6.211, "step": 496 }, { "epoch": 2.02, "learning_rate": 4.007816086114627e-05, "loss": 2.8637, "step": 500 }, { "epoch": 2.04, "learning_rate": 3.971201826174964e-05, "loss": 2.709, "step": 505 }, { "epoch": 2.06, "learning_rate": 3.934425239281274e-05, "loss": 2.6646, "step": 510 }, { "epoch": 2.08, "learning_rate": 3.897492472282926e-05, "loss": 2.7183, "step": 515 }, { "epoch": 2.1, "learning_rate": 3.860409698133271e-05, "loss": 2.7506, "step": 520 }, { "epoch": 2.12, "learning_rate": 3.8231831148579046e-05, "loss": 2.7579, "step": 525 }, { "epoch": 2.14, "learning_rate": 3.785818944518718e-05, "loss": 2.685, "step": 530 }, { "epoch": 2.16, "learning_rate": 3.748323432173944e-05, "loss": 2.6694, "step": 535 }, { "epoch": 2.18, "learning_rate": 3.710702844834357e-05, "loss": 2.7386, "step": 540 }, { "epoch": 2.2, "learning_rate": 3.672963470415803e-05, "loss": 2.692, "step": 545 }, { "epoch": 2.22, "learning_rate": 3.6351116166882325e-05, "loss": 2.7219, "step": 550 }, { "epoch": 2.24, "learning_rate": 3.5971536102214215e-05, "loss": 2.769, "step": 555 }, { "epoch": 2.26, "learning_rate": 3.559095795327547e-05, "loss": 2.7076, "step": 560 }, { "epoch": 2.28, "learning_rate": 3.520944533000792e-05, "loss": 2.7279, "step": 565 }, { "epoch": 2.3, "learning_rate": 3.482706199854172e-05, "loss": 2.7273, "step": 570 }, { "epoch": 2.32, "learning_rate": 3.444387187053747e-05, "loss": 2.6949, "step": 575 }, { "epoch": 2.34, "learning_rate": 3.405993899250394e-05, "loss": 2.681, "step": 580 }, { "epoch": 2.36, "learning_rate": 3.3675327535093366e-05, "loss": 2.8151, "step": 585 }, { "epoch": 2.38, "learning_rate": 3.329010178237595e-05, "loss": 2.7137, "step": 590 }, { "epoch": 2.4, "learning_rate": 3.2904326121095375e-05, "loss": 2.6656, "step": 595 }, { "epoch": 2.42, "learning_rate": 3.25180650299072e-05, "loss": 2.7158, "step": 600 }, { "epoch": 2.44, "learning_rate": 3.2131383068601905e-05, "loss": 2.7081, "step": 605 }, { "epoch": 2.46, "learning_rate": 3.174434486731428e-05, "loss": 2.644, "step": 610 }, { "epoch": 2.48, "learning_rate": 3.135701511572119e-05, "loss": 2.698, "step": 615 }, { "epoch": 2.5, "learning_rate": 3.0969458552229324e-05, "loss": 2.6591, "step": 620 }, { "epoch": 2.52, "learning_rate": 3.058173995315473e-05, "loss": 2.6591, "step": 625 }, { "epoch": 2.54, "learning_rate": 3.019392412189618e-05, "loss": 2.7327, "step": 630 }, { "epoch": 2.56, "learning_rate": 2.9806075878103817e-05, "loss": 2.6711, "step": 635 }, { "epoch": 2.58, "learning_rate": 2.9418260046845272e-05, "loss": 2.7034, "step": 640 }, { "epoch": 2.6, "learning_rate": 2.9030541447770684e-05, "loss": 2.6119, "step": 645 }, { "epoch": 2.62, "learning_rate": 2.8642984884278812e-05, "loss": 2.641, "step": 650 }, { "epoch": 2.64, "learning_rate": 2.8255655132685727e-05, "loss": 2.6479, "step": 655 }, { "epoch": 2.66, "learning_rate": 2.7868616931398106e-05, "loss": 2.6298, "step": 660 }, { "epoch": 2.68, "learning_rate": 2.7481934970092792e-05, "loss": 2.6951, "step": 665 }, { "epoch": 2.7, "learning_rate": 2.709567387890463e-05, "loss": 2.681, "step": 670 }, { "epoch": 2.72, "learning_rate": 2.6709898217624053e-05, "loss": 2.6787, "step": 675 }, { "epoch": 2.74, "learning_rate": 2.6324672464906643e-05, "loss": 2.7171, "step": 680 }, { "epoch": 2.76, "learning_rate": 2.5940061007496078e-05, "loss": 2.7116, "step": 685 }, { "epoch": 2.78, "learning_rate": 2.5556128129462543e-05, "loss": 2.6834, "step": 690 }, { "epoch": 2.8, "learning_rate": 2.517293800145828e-05, "loss": 2.6827, "step": 695 }, { "epoch": 2.82, "learning_rate": 2.479055466999209e-05, "loss": 2.6158, "step": 700 }, { "epoch": 2.84, "learning_rate": 2.4409042046724537e-05, "loss": 2.7102, "step": 705 }, { "epoch": 2.86, "learning_rate": 2.4028463897785787e-05, "loss": 2.6505, "step": 710 }, { "epoch": 2.88, "learning_rate": 2.3648883833117687e-05, "loss": 2.6876, "step": 715 }, { "epoch": 2.9, "learning_rate": 2.327036529584198e-05, "loss": 2.6841, "step": 720 }, { "epoch": 2.92, "learning_rate": 2.2892971551656424e-05, "loss": 2.683, "step": 725 }, { "epoch": 2.94, "learning_rate": 2.2516765678260564e-05, "loss": 2.714, "step": 730 }, { "epoch": 2.96, "learning_rate": 2.2141810554812824e-05, "loss": 2.6786, "step": 735 }, { "epoch": 2.98, "learning_rate": 2.176816885142096e-05, "loss": 2.7022, "step": 740 }, { "epoch": 3.0, "eval_loss": 2.6361331939697266, "eval_runtime": 163.5049, "eval_samples_per_second": 49.564, "eval_steps_per_second": 6.196, "step": 744 }, { "epoch": 3.0, "learning_rate": 2.132162043617224e-05, "loss": 2.8742, "step": 745 }, { "epoch": 3.02, "learning_rate": 2.0951087758432035e-05, "loss": 2.6404, "step": 750 }, { "epoch": 3.04, "learning_rate": 2.0582067518500164e-05, "loss": 2.7206, "step": 755 }, { "epoch": 3.06, "learning_rate": 2.0214621394526204e-05, "loss": 2.6592, "step": 760 }, { "epoch": 3.08, "learning_rate": 1.984881080156157e-05, "loss": 2.6667, "step": 765 }, { "epoch": 3.1, "learning_rate": 1.9484696881294518e-05, "loss": 2.6568, "step": 770 }, { "epoch": 3.12, "learning_rate": 1.9122340491830983e-05, "loss": 2.645, "step": 775 }, { "epoch": 3.14, "learning_rate": 1.8761802197522638e-05, "loss": 2.5945, "step": 780 }, { "epoch": 3.17, "learning_rate": 1.8403142258844258e-05, "loss": 2.6536, "step": 785 }, { "epoch": 3.19, "learning_rate": 1.8046420622321678e-05, "loss": 2.6664, "step": 790 }, { "epoch": 3.21, "learning_rate": 1.769169691051236e-05, "loss": 2.6797, "step": 795 }, { "epoch": 3.23, "learning_rate": 1.733903041204006e-05, "loss": 2.6706, "step": 800 }, { "epoch": 3.25, "learning_rate": 1.6988480071685293e-05, "loss": 2.7273, "step": 805 }, { "epoch": 3.27, "learning_rate": 1.664010448053332e-05, "loss": 2.6833, "step": 810 }, { "epoch": 3.29, "learning_rate": 1.6293961866181154e-05, "loss": 2.7188, "step": 815 }, { "epoch": 3.31, "learning_rate": 1.5950110083005433e-05, "loss": 2.6276, "step": 820 }, { "epoch": 3.33, "learning_rate": 1.5608606602492622e-05, "loss": 2.7026, "step": 825 }, { "epoch": 3.35, "learning_rate": 1.526950850363318e-05, "loss": 2.6412, "step": 830 }, { "epoch": 3.37, "learning_rate": 1.4932872463381392e-05, "loss": 2.6308, "step": 835 }, { "epoch": 3.39, "learning_rate": 1.4598754747182377e-05, "loss": 2.5865, "step": 840 }, { "epoch": 3.41, "learning_rate": 1.4267211199567806e-05, "loss": 2.7103, "step": 845 }, { "epoch": 3.43, "learning_rate": 1.3938297234822089e-05, "loss": 2.6321, "step": 850 }, { "epoch": 3.45, "learning_rate": 1.361206782772039e-05, "loss": 2.6398, "step": 855 }, { "epoch": 3.47, "learning_rate": 1.328857750434012e-05, "loss": 2.6513, "step": 860 }, { "epoch": 3.49, "learning_rate": 1.2967880332947474e-05, "loss": 2.692, "step": 865 }, { "epoch": 3.51, "learning_rate": 1.2650029914960406e-05, "loss": 2.5955, "step": 870 }, { "epoch": 3.53, "learning_rate": 1.2335079375989666e-05, "loss": 2.6501, "step": 875 }, { "epoch": 3.55, "learning_rate": 1.2023081356959398e-05, "loss": 2.646, "step": 880 }, { "epoch": 3.57, "learning_rate": 1.1714088005308714e-05, "loss": 2.6988, "step": 885 }, { "epoch": 3.59, "learning_rate": 1.140815096627573e-05, "loss": 2.6395, "step": 890 }, { "epoch": 3.61, "learning_rate": 1.1105321374265619e-05, "loss": 2.7236, "step": 895 }, { "epoch": 3.63, "learning_rate": 1.0805649844303954e-05, "loss": 2.7175, "step": 900 }, { "epoch": 3.65, "learning_rate": 1.0509186463576888e-05, "loss": 2.7241, "step": 905 }, { "epoch": 3.67, "learning_rate": 1.0215980783059585e-05, "loss": 2.6334, "step": 910 }, { "epoch": 3.69, "learning_rate": 9.926081809234262e-06, "loss": 2.6283, "step": 915 }, { "epoch": 3.71, "learning_rate": 9.639537995899214e-06, "loss": 2.6598, "step": 920 }, { "epoch": 3.73, "learning_rate": 9.356397236070267e-06, "loss": 2.6099, "step": 925 }, { "epoch": 3.75, "learning_rate": 9.076706853975862e-06, "loss": 2.613, "step": 930 }, { "epoch": 3.77, "learning_rate": 8.800513597147349e-06, "loss": 2.5609, "step": 935 }, { "epoch": 3.79, "learning_rate": 8.527863628605514e-06, "loss": 2.7093, "step": 940 }, { "epoch": 3.81, "learning_rate": 8.258802519144935e-06, "loss": 2.5844, "step": 945 }, { "epoch": 3.83, "learning_rate": 7.993375239717228e-06, "loss": 2.6648, "step": 950 }, { "epoch": 3.85, "learning_rate": 7.731626153914636e-06, "loss": 2.6364, "step": 955 }, { "epoch": 3.87, "learning_rate": 7.473599010555025e-06, "loss": 2.738, "step": 960 }, { "epoch": 3.89, "learning_rate": 7.219336936369749e-06, "loss": 2.6826, "step": 965 }, { "epoch": 3.91, "learning_rate": 6.968882428795393e-06, "loss": 2.717, "step": 970 }, { "epoch": 3.93, "learning_rate": 6.722277348870772e-06, "loss": 2.7357, "step": 975 }, { "epoch": 3.95, "learning_rate": 6.47956291424023e-06, "loss": 2.648, "step": 980 }, { "epoch": 3.97, "learning_rate": 6.240779692264567e-06, "loss": 2.6178, "step": 985 }, { "epoch": 3.99, "learning_rate": 6.005967593240522e-06, "loss": 2.6436, "step": 990 }, { "epoch": 4.0, "eval_loss": 2.6245498657226562, "eval_runtime": 163.5799, "eval_samples_per_second": 49.542, "eval_steps_per_second": 6.193, "step": 992 }, { "epoch": 4.01, "learning_rate": 5.7751658637302344e-06, "loss": 2.8379, "step": 995 }, { "epoch": 4.03, "learning_rate": 5.548413080001497e-06, "loss": 2.6199, "step": 1000 }, { "epoch": 4.05, "learning_rate": 5.325747141580131e-06, "loss": 2.6231, "step": 1005 }, { "epoch": 4.07, "learning_rate": 5.107205264915482e-06, "loss": 2.6267, "step": 1010 }, { "epoch": 4.09, "learning_rate": 4.8928239771599935e-06, "loss": 2.69, "step": 1015 }, { "epoch": 4.11, "learning_rate": 4.6826391100640785e-06, "loss": 2.7156, "step": 1020 }, { "epoch": 4.13, "learning_rate": 4.476685793987202e-06, "loss": 2.729, "step": 1025 }, { "epoch": 4.15, "learning_rate": 4.274998452026159e-06, "loss": 2.6732, "step": 1030 }, { "epoch": 4.17, "learning_rate": 4.0776107942615855e-06, "loss": 2.6484, "step": 1035 }, { "epoch": 4.19, "learning_rate": 3.884555812123685e-06, "loss": 2.6133, "step": 1040 }, { "epoch": 4.21, "learning_rate": 3.6958657728779856e-06, "loss": 2.6863, "step": 1045 }, { "epoch": 4.23, "learning_rate": 3.5115722142321906e-06, "loss": 2.6385, "step": 1050 }, { "epoch": 4.25, "learning_rate": 3.3317059390649906e-06, "loss": 2.6193, "step": 1055 }, { "epoch": 4.27, "learning_rate": 3.1562970102776177e-06, "loss": 2.655, "step": 1060 }, { "epoch": 4.29, "learning_rate": 2.985374745769134e-06, "loss": 2.6629, "step": 1065 }, { "epoch": 4.31, "learning_rate": 2.8189677135362635e-06, "loss": 2.6214, "step": 1070 }, { "epoch": 4.33, "learning_rate": 2.6571037268984742e-06, "loss": 2.7275, "step": 1075 }, { "epoch": 4.35, "learning_rate": 2.499809839849292e-06, "loss": 2.6338, "step": 1080 }, { "epoch": 4.37, "learning_rate": 2.3471123425344863e-06, "loss": 2.6312, "step": 1085 }, { "epoch": 4.39, "learning_rate": 2.1990367568578982e-06, "loss": 2.5713, "step": 1090 }, { "epoch": 4.41, "learning_rate": 2.055607832215727e-06, "loss": 2.708, "step": 1095 }, { "epoch": 4.43, "learning_rate": 1.9168495413599183e-06, "loss": 2.6313, "step": 1100 }, { "epoch": 4.45, "learning_rate": 1.7827850763913167e-06, "loss": 2.5938, "step": 1105 }, { "epoch": 4.48, "learning_rate": 1.6534368448833581e-06, "loss": 2.6137, "step": 1110 }, { "epoch": 4.5, "learning_rate": 1.5288264661368523e-06, "loss": 2.6919, "step": 1115 }, { "epoch": 4.52, "learning_rate": 1.4089747675665387e-06, "loss": 2.6168, "step": 1120 }, { "epoch": 4.54, "learning_rate": 1.2939017812199584e-06, "loss": 2.6687, "step": 1125 }, { "epoch": 4.56, "learning_rate": 1.1836267404293311e-06, "loss": 2.5993, "step": 1130 }, { "epoch": 4.58, "learning_rate": 1.0781680765968515e-06, "loss": 2.6695, "step": 1135 }, { "epoch": 4.6, "learning_rate": 9.775434161140861e-07, "loss": 2.6461, "step": 1140 }, { "epoch": 4.62, "learning_rate": 8.817695774158729e-07, "loss": 2.6796, "step": 1145 }, { "epoch": 4.64, "learning_rate": 7.908625681693138e-07, "loss": 2.6503, "step": 1150 }, { "epoch": 4.66, "learning_rate": 7.048375825982112e-07, "loss": 2.6532, "step": 1155 }, { "epoch": 4.68, "learning_rate": 6.237089989435263e-07, "loss": 2.6171, "step": 1160 }, { "epoch": 4.7, "learning_rate": 5.474903770601913e-07, "loss": 2.6785, "step": 1165 }, { "epoch": 4.72, "learning_rate": 4.7619445615070256e-07, "loss": 2.7045, "step": 1170 }, { "epoch": 4.74, "learning_rate": 4.0983315263589004e-07, "loss": 2.7024, "step": 1175 }, { "epoch": 4.76, "learning_rate": 3.484175581632143e-07, "loss": 2.648, "step": 1180 }, { "epoch": 4.78, "learning_rate": 2.9195793775288916e-07, "loss": 2.6865, "step": 1185 }, { "epoch": 4.8, "learning_rate": 2.4046372808220084e-07, "loss": 2.6769, "step": 1190 }, { "epoch": 4.82, "learning_rate": 1.939435359082331e-07, "loss": 2.6381, "step": 1195 }, { "epoch": 4.84, "learning_rate": 1.524051366293644e-07, "loss": 2.6804, "step": 1200 }, { "epoch": 4.86, "learning_rate": 1.1585547298565091e-07, "loss": 2.5962, "step": 1205 }, { "epoch": 4.88, "learning_rate": 8.43006538984481e-08, "loss": 2.6744, "step": 1210 }, { "epoch": 4.9, "learning_rate": 5.774595344934075e-08, "loss": 2.6654, "step": 1215 }, { "epoch": 4.92, "learning_rate": 3.6195809998638144e-08, "loss": 2.673, "step": 1220 }, { "epoch": 4.94, "learning_rate": 1.9653825443547435e-08, "loss": 2.6151, "step": 1225 }, { "epoch": 4.96, "learning_rate": 8.122764616148581e-09, "loss": 2.7565, "step": 1230 }, { "epoch": 4.98, "learning_rate": 1.6045548212872963e-09, "loss": 2.6162, "step": 1235 }, { "epoch": 5.0, "learning_rate": 1.0028551442609057e-10, "loss": 2.6195, "step": 1240 }, { "epoch": 5.0, "eval_loss": 2.62469482421875, "eval_runtime": 163.7545, "eval_samples_per_second": 49.489, "eval_steps_per_second": 6.186, "step": 1240 }, { "epoch": 5.0, "step": 1240, "total_flos": 8.30390621259694e+16, "train_loss": 2.8163739796607725, "train_runtime": 27047.4241, "train_samples_per_second": 11.753, "train_steps_per_second": 0.046 } ], "max_steps": 1240, "num_train_epochs": 5, "total_flos": 8.30390621259694e+16, "trial_name": null, "trial_params": null }