{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.035964035964036, "global_step": 2038, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0013736056394868903, "loss": 1.4723, "step": 5 }, { "epoch": 0.01, "learning_rate": 0.0019425717247145285, "loss": 1.568, "step": 10 }, { "epoch": 0.01, "learning_rate": 0.0023791547571544325, "loss": 1.2706, "step": 15 }, { "epoch": 0.02, "learning_rate": 0.0027472112789737805, "loss": 1.3258, "step": 20 }, { "epoch": 0.02, "learning_rate": 0.003071475584169756, "loss": 1.2558, "step": 25 }, { "epoch": 0.03, "learning_rate": 0.0033646329245522657, "loss": 0.8074, "step": 30 }, { "epoch": 0.03, "learning_rate": 0.0036342189215581556, "loss": 1.4713, "step": 35 }, { "epoch": 0.04, "learning_rate": 0.003885143449429057, "loss": 1.2681, "step": 40 }, { "epoch": 0.04, "learning_rate": 0.004120816918460671, "loss": 1.2291, "step": 45 }, { "epoch": 0.05, "learning_rate": 0.004343722427630694, "loss": 1.306, "step": 50 }, { "epoch": 0.05, "learning_rate": 0.004555734516094203, "loss": 1.1953, "step": 55 }, { "epoch": 0.06, "learning_rate": 0.004758309514308865, "loss": 1.2176, "step": 60 }, { "epoch": 0.06, "learning_rate": 0.0049526055654364855, "loss": 1.2338, "step": 65 }, { "epoch": 0.07, "learning_rate": 0.005139561687500467, "loss": 1.381, "step": 70 }, { "epoch": 0.07, "learning_rate": 0.005319951765989316, "loss": 1.5546, "step": 75 }, { "epoch": 0.08, "learning_rate": 0.005494422557947561, "loss": 1.5126, "step": 80 }, { "epoch": 0.08, "learning_rate": 0.005663521139548541, "loss": 1.5132, "step": 85 }, { "epoch": 0.09, "learning_rate": 0.005827715174143585, "loss": 1.2736, "step": 90 }, { "epoch": 0.09, "learning_rate": 0.005987408170800916, "loss": 1.1807, "step": 95 }, { "epoch": 0.1, "learning_rate": 0.006142951168339512, "loss": 1.392, "step": 100 }, { "epoch": 0.1, "learning_rate": 0.0062946518179668965, "loss": 1.4068, "step": 105 }, { "epoch": 0.11, "learning_rate": 0.0064427815392316505, "loss": 1.2436, "step": 110 }, { "epoch": 0.11, "learning_rate": 0.0065875812264513545, "loss": 1.6382, "step": 115 }, { "epoch": 0.12, "learning_rate": 0.006729265849104531, "loss": 1.5855, "step": 120 }, { "epoch": 0.12, "learning_rate": 0.006868028197434452, "loss": 1.8619, "step": 125 }, { "epoch": 0.13, "learning_rate": 0.007004041959724749, "loss": 1.4162, "step": 130 }, { "epoch": 0.13, "learning_rate": 0.007137464271463298, "loss": 1.397, "step": 135 }, { "epoch": 0.14, "learning_rate": 0.007268437843116311, "loss": 1.376, "step": 140 }, { "epoch": 0.14, "learning_rate": 0.0073970927486462865, "loss": 1.5437, "step": 145 }, { "epoch": 0.15, "learning_rate": 0.007523547938632788, "loss": 1.8505, "step": 150 }, { "epoch": 0.15, "learning_rate": 0.0076479125281174514, "loss": 1.3138, "step": 155 }, { "epoch": 0.16, "learning_rate": 0.007770286898858114, "loss": 1.6157, "step": 160 }, { "epoch": 0.16, "learning_rate": 0.00789076364767037, "loss": 1.4061, "step": 165 }, { "epoch": 0.17, "learning_rate": 0.008009428406336274, "loss": 1.4829, "step": 170 }, { "epoch": 0.17, "learning_rate": 0.008126360553720012, "loss": 1.3897, "step": 175 }, { "epoch": 0.18, "learning_rate": 0.008241633836921341, "loss": 1.3801, "step": 180 }, { "epoch": 0.18, "learning_rate": 0.008355316915277182, "loss": 1.6133, "step": 185 }, { "epoch": 0.19, "learning_rate": 0.008467473838610143, "loss": 1.6826, "step": 190 }, { "epoch": 0.19, "learning_rate": 0.008578164469184382, "loss": 1.6101, "step": 195 }, { "epoch": 0.2, "learning_rate": 0.008687444855261389, "loss": 1.7164, "step": 200 }, { "epoch": 0.2, "learning_rate": 0.008795367562872955, "loss": 1.6603, "step": 205 }, { "epoch": 0.21, "learning_rate": 0.008901981971385245, "loss": 1.3604, "step": 210 }, { "epoch": 0.21, "learning_rate": 0.009007334537569819, "loss": 1.5517, "step": 215 }, { "epoch": 0.22, "learning_rate": 0.009111469032188405, "loss": 1.6048, "step": 220 }, { "epoch": 0.22, "learning_rate": 0.009214426752509268, "loss": 1.6281, "step": 225 }, { "epoch": 0.23, "learning_rate": 0.009316246713681893, "loss": 2.1204, "step": 230 }, { "epoch": 0.23, "learning_rate": 0.009416965821485117, "loss": 1.6439, "step": 235 }, { "epoch": 0.24, "learning_rate": 0.00951661902861773, "loss": 1.6982, "step": 240 }, { "epoch": 0.24, "learning_rate": 0.009615239476408233, "loss": 1.857, "step": 245 }, { "epoch": 0.25, "learning_rate": 0.009712858623572643, "loss": 1.5612, "step": 250 }, { "epoch": 0.25, "learning_rate": 0.009809506363438459, "loss": 1.4843, "step": 255 }, { "epoch": 0.26, "learning_rate": 0.009905211130872971, "loss": 1.81, "step": 260 }, { "epoch": 0.26, "learning_rate": 0.01, "loss": 1.5551, "step": 265 }, { "epoch": 0.27, "learning_rate": 0.009971799492385787, "loss": 1.8817, "step": 270 }, { "epoch": 0.27, "learning_rate": 0.009943598984771575, "loss": 1.7807, "step": 275 }, { "epoch": 0.28, "learning_rate": 0.009915398477157361, "loss": 1.7788, "step": 280 }, { "epoch": 0.28, "learning_rate": 0.009887197969543148, "loss": 1.9268, "step": 285 }, { "epoch": 0.29, "learning_rate": 0.009858997461928934, "loss": 1.767, "step": 290 }, { "epoch": 0.29, "learning_rate": 0.00983079695431472, "loss": 2.0681, "step": 295 }, { "epoch": 0.3, "learning_rate": 0.009802596446700508, "loss": 1.8078, "step": 300 }, { "epoch": 0.3, "learning_rate": 0.009774395939086295, "loss": 1.5819, "step": 305 }, { "epoch": 0.31, "learning_rate": 0.009746195431472081, "loss": 1.3608, "step": 310 }, { "epoch": 0.31, "learning_rate": 0.00971799492385787, "loss": 1.4035, "step": 315 }, { "epoch": 0.32, "learning_rate": 0.009689794416243654, "loss": 1.7838, "step": 320 }, { "epoch": 0.32, "learning_rate": 0.009661593908629442, "loss": 1.462, "step": 325 }, { "epoch": 0.33, "learning_rate": 0.009633393401015228, "loss": 1.8537, "step": 330 }, { "epoch": 0.33, "learning_rate": 0.009605192893401015, "loss": 1.6035, "step": 335 }, { "epoch": 0.34, "learning_rate": 0.009576992385786803, "loss": 1.7534, "step": 340 }, { "epoch": 0.34, "learning_rate": 0.00954879187817259, "loss": 1.5117, "step": 345 }, { "epoch": 0.35, "learning_rate": 0.009520591370558376, "loss": 1.6606, "step": 350 }, { "epoch": 0.35, "learning_rate": 0.009492390862944162, "loss": 2.2899, "step": 355 }, { "epoch": 0.36, "learning_rate": 0.009464190355329949, "loss": 1.6254, "step": 360 }, { "epoch": 0.36, "learning_rate": 0.009435989847715737, "loss": 1.7699, "step": 365 }, { "epoch": 0.37, "learning_rate": 0.009407789340101523, "loss": 1.5836, "step": 370 }, { "epoch": 0.37, "learning_rate": 0.00937958883248731, "loss": 1.9447, "step": 375 }, { "epoch": 0.38, "learning_rate": 0.009351388324873098, "loss": 1.7334, "step": 380 }, { "epoch": 0.38, "learning_rate": 0.009323187817258882, "loss": 1.5246, "step": 385 }, { "epoch": 0.39, "learning_rate": 0.00929498730964467, "loss": 1.7258, "step": 390 }, { "epoch": 0.39, "learning_rate": 0.009266786802030457, "loss": 1.6757, "step": 395 }, { "epoch": 0.4, "learning_rate": 0.009238586294416243, "loss": 1.7912, "step": 400 }, { "epoch": 0.4, "learning_rate": 0.009210385786802031, "loss": 1.8435, "step": 405 }, { "epoch": 0.41, "learning_rate": 0.009182185279187818, "loss": 2.0267, "step": 410 }, { "epoch": 0.41, "learning_rate": 0.009153984771573604, "loss": 1.6059, "step": 415 }, { "epoch": 0.42, "learning_rate": 0.00912578426395939, "loss": 1.7753, "step": 420 }, { "epoch": 0.42, "learning_rate": 0.009097583756345177, "loss": 1.988, "step": 425 }, { "epoch": 0.43, "learning_rate": 0.009069383248730965, "loss": 1.5201, "step": 430 }, { "epoch": 0.43, "learning_rate": 0.009041182741116751, "loss": 1.397, "step": 435 }, { "epoch": 0.44, "learning_rate": 0.009012982233502538, "loss": 1.7266, "step": 440 }, { "epoch": 0.44, "learning_rate": 0.008984781725888326, "loss": 1.6747, "step": 445 }, { "epoch": 0.45, "learning_rate": 0.008956581218274112, "loss": 1.5811, "step": 450 }, { "epoch": 0.45, "learning_rate": 0.008928380710659899, "loss": 1.4325, "step": 455 }, { "epoch": 0.46, "learning_rate": 0.008900180203045685, "loss": 1.9487, "step": 460 }, { "epoch": 0.46, "learning_rate": 0.008871979695431471, "loss": 1.4969, "step": 465 }, { "epoch": 0.47, "learning_rate": 0.00884377918781726, "loss": 1.6207, "step": 470 }, { "epoch": 0.47, "learning_rate": 0.008815578680203046, "loss": 1.9026, "step": 475 }, { "epoch": 0.48, "learning_rate": 0.008787378172588832, "loss": 1.6797, "step": 480 }, { "epoch": 0.48, "learning_rate": 0.00875917766497462, "loss": 1.5079, "step": 485 }, { "epoch": 0.49, "learning_rate": 0.008730977157360405, "loss": 1.5912, "step": 490 }, { "epoch": 0.49, "learning_rate": 0.008702776649746193, "loss": 1.706, "step": 495 }, { "epoch": 0.5, "learning_rate": 0.00867457614213198, "loss": 1.6559, "step": 500 }, { "epoch": 0.5, "learning_rate": 0.008646375634517766, "loss": 1.6109, "step": 505 }, { "epoch": 0.51, "learning_rate": 0.008618175126903554, "loss": 1.9024, "step": 510 }, { "epoch": 0.51, "learning_rate": 0.00858997461928934, "loss": 1.4508, "step": 515 }, { "epoch": 0.52, "learning_rate": 0.008561774111675127, "loss": 1.7011, "step": 520 }, { "epoch": 0.52, "learning_rate": 0.008533573604060913, "loss": 1.4233, "step": 525 }, { "epoch": 0.53, "learning_rate": 0.0085053730964467, "loss": 1.5886, "step": 530 }, { "epoch": 0.53, "learning_rate": 0.008477172588832488, "loss": 1.6088, "step": 535 }, { "epoch": 0.54, "learning_rate": 0.008448972081218274, "loss": 2.3507, "step": 540 }, { "epoch": 0.54, "learning_rate": 0.00842077157360406, "loss": 1.457, "step": 545 }, { "epoch": 0.55, "learning_rate": 0.008392571065989849, "loss": 1.7145, "step": 550 }, { "epoch": 0.55, "learning_rate": 0.008364370558375635, "loss": 1.6431, "step": 555 }, { "epoch": 0.56, "learning_rate": 0.008336170050761421, "loss": 1.6156, "step": 560 }, { "epoch": 0.56, "learning_rate": 0.008307969543147208, "loss": 1.4988, "step": 565 }, { "epoch": 0.57, "learning_rate": 0.008279769035532994, "loss": 1.6655, "step": 570 }, { "epoch": 0.57, "learning_rate": 0.008251568527918782, "loss": 1.3964, "step": 575 }, { "epoch": 0.58, "learning_rate": 0.008223368020304569, "loss": 1.5442, "step": 580 }, { "epoch": 0.58, "learning_rate": 0.008195167512690355, "loss": 1.576, "step": 585 }, { "epoch": 0.59, "learning_rate": 0.008166967005076143, "loss": 1.6838, "step": 590 }, { "epoch": 0.59, "learning_rate": 0.008138766497461928, "loss": 1.5472, "step": 595 }, { "epoch": 0.6, "learning_rate": 0.008110565989847716, "loss": 1.6603, "step": 600 }, { "epoch": 0.6, "learning_rate": 0.008082365482233502, "loss": 1.8896, "step": 605 }, { "epoch": 0.61, "learning_rate": 0.008054164974619289, "loss": 1.3879, "step": 610 }, { "epoch": 0.61, "learning_rate": 0.008025964467005077, "loss": 1.7506, "step": 615 }, { "epoch": 0.62, "learning_rate": 0.007997763959390863, "loss": 1.6107, "step": 620 }, { "epoch": 0.62, "learning_rate": 0.00796956345177665, "loss": 1.6015, "step": 625 }, { "epoch": 0.63, "learning_rate": 0.007941362944162436, "loss": 1.4905, "step": 630 }, { "epoch": 0.63, "learning_rate": 0.007913162436548223, "loss": 1.7908, "step": 635 }, { "epoch": 0.64, "learning_rate": 0.00788496192893401, "loss": 1.5734, "step": 640 }, { "epoch": 0.64, "learning_rate": 0.007856761421319797, "loss": 1.4382, "step": 645 }, { "epoch": 0.65, "learning_rate": 0.007828560913705583, "loss": 1.5483, "step": 650 }, { "epoch": 0.65, "learning_rate": 0.00780036040609137, "loss": 1.4697, "step": 655 }, { "epoch": 0.66, "learning_rate": 0.007772159898477159, "loss": 1.5878, "step": 660 }, { "epoch": 0.66, "learning_rate": 0.007743959390862944, "loss": 1.9756, "step": 665 }, { "epoch": 0.67, "learning_rate": 0.007715758883248732, "loss": 1.6656, "step": 670 }, { "epoch": 0.67, "learning_rate": 0.007687558375634519, "loss": 1.6744, "step": 675 }, { "epoch": 0.68, "learning_rate": 0.007659357868020306, "loss": 1.762, "step": 680 }, { "epoch": 0.68, "learning_rate": 0.0076311573604060925, "loss": 1.6982, "step": 685 }, { "epoch": 0.69, "learning_rate": 0.007602956852791878, "loss": 1.4908, "step": 690 }, { "epoch": 0.69, "learning_rate": 0.007574756345177666, "loss": 1.6065, "step": 695 }, { "epoch": 0.7, "learning_rate": 0.0075465558375634526, "loss": 1.2293, "step": 700 }, { "epoch": 0.7, "learning_rate": 0.007518355329949239, "loss": 1.6052, "step": 705 }, { "epoch": 0.71, "learning_rate": 0.007490154822335025, "loss": 1.3118, "step": 710 }, { "epoch": 0.71, "learning_rate": 0.0074619543147208135, "loss": 1.1742, "step": 715 }, { "epoch": 0.72, "learning_rate": 0.0074337538071066, "loss": 1.632, "step": 720 }, { "epoch": 0.72, "learning_rate": 0.007405553299492385, "loss": 1.6181, "step": 725 }, { "epoch": 0.73, "learning_rate": 0.007377352791878174, "loss": 1.6726, "step": 730 }, { "epoch": 0.73, "learning_rate": 0.00734915228426396, "loss": 1.5759, "step": 735 }, { "epoch": 0.74, "learning_rate": 0.007320951776649747, "loss": 1.3197, "step": 740 }, { "epoch": 0.74, "learning_rate": 0.0072927512690355335, "loss": 1.8665, "step": 745 }, { "epoch": 0.75, "learning_rate": 0.007264550761421322, "loss": 1.1938, "step": 750 }, { "epoch": 0.75, "learning_rate": 0.007236350253807108, "loss": 1.2782, "step": 755 }, { "epoch": 0.76, "learning_rate": 0.0072081497461928936, "loss": 1.6276, "step": 760 }, { "epoch": 0.76, "learning_rate": 0.007179949238578682, "loss": 1.5382, "step": 765 }, { "epoch": 0.77, "learning_rate": 0.007151748730964467, "loss": 1.5528, "step": 770 }, { "epoch": 0.77, "learning_rate": 0.0071235482233502545, "loss": 1.663, "step": 775 }, { "epoch": 0.78, "learning_rate": 0.007095347715736042, "loss": 1.5098, "step": 780 }, { "epoch": 0.78, "learning_rate": 0.007067147208121828, "loss": 1.781, "step": 785 }, { "epoch": 0.79, "learning_rate": 0.007038946700507615, "loss": 1.3787, "step": 790 }, { "epoch": 0.79, "learning_rate": 0.007010746192893401, "loss": 2.1853, "step": 795 }, { "epoch": 0.8, "learning_rate": 0.006982545685279189, "loss": 1.6639, "step": 800 }, { "epoch": 0.8, "learning_rate": 0.006954345177664975, "loss": 1.7152, "step": 805 }, { "epoch": 0.81, "learning_rate": 0.006926144670050761, "loss": 1.8148, "step": 810 }, { "epoch": 0.81, "learning_rate": 0.006897944162436549, "loss": 1.6564, "step": 815 }, { "epoch": 0.82, "learning_rate": 0.0068697436548223354, "loss": 1.3046, "step": 820 }, { "epoch": 0.82, "learning_rate": 0.006841543147208123, "loss": 1.3462, "step": 825 }, { "epoch": 0.83, "learning_rate": 0.006813342639593909, "loss": 1.4969, "step": 830 }, { "epoch": 0.83, "learning_rate": 0.006785142131979697, "loss": 1.7367, "step": 835 }, { "epoch": 0.84, "learning_rate": 0.006756941624365483, "loss": 1.5385, "step": 840 }, { "epoch": 0.84, "learning_rate": 0.00672874111675127, "loss": 1.5918, "step": 845 }, { "epoch": 0.85, "learning_rate": 0.006700540609137057, "loss": 1.4092, "step": 850 }, { "epoch": 0.85, "learning_rate": 0.006672340101522844, "loss": 1.6609, "step": 855 }, { "epoch": 0.86, "learning_rate": 0.006644139593908631, "loss": 1.6529, "step": 860 }, { "epoch": 0.86, "learning_rate": 0.006615939086294417, "loss": 2.0543, "step": 865 }, { "epoch": 0.87, "learning_rate": 0.0065877385786802046, "loss": 1.7507, "step": 870 }, { "epoch": 0.87, "learning_rate": 0.00655953807106599, "loss": 1.3364, "step": 875 }, { "epoch": 0.88, "learning_rate": 0.0065313375634517765, "loss": 1.5436, "step": 880 }, { "epoch": 0.88, "learning_rate": 0.006503137055837565, "loss": 1.5808, "step": 885 }, { "epoch": 0.89, "learning_rate": 0.00647493654822335, "loss": 1.1435, "step": 890 }, { "epoch": 0.89, "learning_rate": 0.006446736040609138, "loss": 1.7857, "step": 895 }, { "epoch": 0.9, "learning_rate": 0.006418535532994924, "loss": 1.506, "step": 900 }, { "epoch": 0.9, "learning_rate": 0.006390335025380713, "loss": 1.5276, "step": 905 }, { "epoch": 0.91, "learning_rate": 0.006362134517766498, "loss": 1.7656, "step": 910 }, { "epoch": 0.91, "learning_rate": 0.006333934010152285, "loss": 1.2465, "step": 915 }, { "epoch": 0.92, "learning_rate": 0.006305733502538073, "loss": 1.1414, "step": 920 }, { "epoch": 0.92, "learning_rate": 0.006277532994923858, "loss": 1.7263, "step": 925 }, { "epoch": 0.93, "learning_rate": 0.0062493324873096456, "loss": 1.6158, "step": 930 }, { "epoch": 0.93, "learning_rate": 0.006221131979695433, "loss": 1.3896, "step": 935 }, { "epoch": 0.94, "learning_rate": 0.00619293147208122, "loss": 1.8046, "step": 940 }, { "epoch": 0.94, "learning_rate": 0.006164730964467006, "loss": 1.6806, "step": 945 }, { "epoch": 0.95, "learning_rate": 0.006136530456852793, "loss": 1.7003, "step": 950 }, { "epoch": 0.95, "learning_rate": 0.006108329949238579, "loss": 1.4235, "step": 955 }, { "epoch": 0.96, "learning_rate": 0.006080129441624366, "loss": 1.5528, "step": 960 }, { "epoch": 0.96, "learning_rate": 0.006051928934010154, "loss": 1.6251, "step": 965 }, { "epoch": 0.97, "learning_rate": 0.006023728426395939, "loss": 1.7944, "step": 970 }, { "epoch": 0.97, "learning_rate": 0.0059955279187817265, "loss": 1.6361, "step": 975 }, { "epoch": 0.98, "learning_rate": 0.005967327411167512, "loss": 1.6142, "step": 980 }, { "epoch": 0.98, "learning_rate": 0.0059391269035533, "loss": 1.6014, "step": 985 }, { "epoch": 0.99, "learning_rate": 0.005910926395939088, "loss": 1.5756, "step": 990 }, { "epoch": 0.99, "learning_rate": 0.005882725888324872, "loss": 1.2877, "step": 995 }, { "epoch": 1.0, "learning_rate": 0.005854525380710661, "loss": 1.6494, "step": 1000 }, { "epoch": 1.0, "learning_rate": 0.005826324873096447, "loss": 1.4704, "step": 1005 }, { "epoch": 1.01, "learning_rate": 0.005798124365482234, "loss": 1.3735, "step": 1010 }, { "epoch": 1.01, "learning_rate": 0.005769923857868021, "loss": 1.3836, "step": 1015 }, { "epoch": 1.02, "learning_rate": 0.005741723350253808, "loss": 1.4496, "step": 1020 }, { "epoch": 1.02, "learning_rate": 0.005713522842639595, "loss": 1.4613, "step": 1025 }, { "epoch": 1.03, "learning_rate": 0.00568532233502538, "loss": 1.4161, "step": 1030 }, { "epoch": 1.03, "learning_rate": 0.0056571218274111676, "loss": 1.592, "step": 1035 }, { "epoch": 1.04, "learning_rate": 0.005628921319796955, "loss": 1.585, "step": 1040 }, { "epoch": 1.04, "learning_rate": 0.005600720812182741, "loss": 1.2444, "step": 1045 }, { "epoch": 1.05, "learning_rate": 0.005572520304568528, "loss": 1.3218, "step": 1050 }, { "epoch": 1.05, "learning_rate": 0.005544319796954316, "loss": 1.4106, "step": 1055 }, { "epoch": 1.06, "learning_rate": 0.005516119289340102, "loss": 1.6454, "step": 1060 }, { "epoch": 1.06, "learning_rate": 0.005487918781725888, "loss": 1.4884, "step": 1065 }, { "epoch": 1.07, "learning_rate": 0.005459718274111677, "loss": 1.387, "step": 1070 }, { "epoch": 1.07, "learning_rate": 0.005431517766497462, "loss": 1.5909, "step": 1075 }, { "epoch": 1.08, "learning_rate": 0.005403317258883249, "loss": 1.2899, "step": 1080 }, { "epoch": 1.08, "learning_rate": 0.005375116751269036, "loss": 1.2528, "step": 1085 }, { "epoch": 1.09, "learning_rate": 0.005346916243654824, "loss": 1.3185, "step": 1090 }, { "epoch": 1.09, "learning_rate": 0.00531871573604061, "loss": 1.751, "step": 1095 }, { "epoch": 1.1, "learning_rate": 0.005290515228426396, "loss": 1.2762, "step": 1100 }, { "epoch": 1.1, "learning_rate": 0.005262314720812184, "loss": 1.4006, "step": 1105 }, { "epoch": 1.11, "learning_rate": 0.0052341142131979695, "loss": 1.3565, "step": 1110 }, { "epoch": 1.11, "learning_rate": 0.005205913705583757, "loss": 1.7962, "step": 1115 }, { "epoch": 1.12, "learning_rate": 0.005177713197969543, "loss": 1.3809, "step": 1120 }, { "epoch": 1.12, "learning_rate": 0.00514951269035533, "loss": 1.6438, "step": 1125 }, { "epoch": 1.13, "learning_rate": 0.005121312182741118, "loss": 1.4397, "step": 1130 }, { "epoch": 1.13, "learning_rate": 0.005093111675126903, "loss": 1.8387, "step": 1135 }, { "epoch": 1.14, "learning_rate": 0.005064911167512691, "loss": 1.4722, "step": 1140 }, { "epoch": 1.14, "learning_rate": 0.005036710659898478, "loss": 1.6683, "step": 1145 }, { "epoch": 1.15, "learning_rate": 0.005008510152284263, "loss": 1.3579, "step": 1150 }, { "epoch": 1.15, "learning_rate": 0.004980309644670051, "loss": 1.4072, "step": 1155 }, { "epoch": 1.16, "learning_rate": 0.004952109137055839, "loss": 1.4808, "step": 1160 }, { "epoch": 1.16, "learning_rate": 0.004923908629441625, "loss": 1.37, "step": 1165 }, { "epoch": 1.17, "learning_rate": 0.004895708121827412, "loss": 1.4239, "step": 1170 }, { "epoch": 1.17, "learning_rate": 0.004867507614213198, "loss": 1.6803, "step": 1175 }, { "epoch": 1.18, "learning_rate": 0.004839307106598985, "loss": 1.3899, "step": 1180 }, { "epoch": 1.18, "learning_rate": 0.004811106598984772, "loss": 1.209, "step": 1185 }, { "epoch": 1.19, "learning_rate": 0.004782906091370559, "loss": 1.3444, "step": 1190 }, { "epoch": 1.19, "learning_rate": 0.004754705583756346, "loss": 1.5994, "step": 1195 }, { "epoch": 1.2, "learning_rate": 0.004726505076142133, "loss": 1.7138, "step": 1200 }, { "epoch": 1.2, "learning_rate": 0.0046983045685279196, "loss": 1.6732, "step": 1205 }, { "epoch": 1.21, "learning_rate": 0.004670104060913706, "loss": 1.6533, "step": 1210 }, { "epoch": 1.21, "learning_rate": 0.004641903553299492, "loss": 1.4821, "step": 1215 }, { "epoch": 1.22, "learning_rate": 0.00461370304568528, "loss": 1.9074, "step": 1220 }, { "epoch": 1.22, "learning_rate": 0.004585502538071067, "loss": 1.6595, "step": 1225 }, { "epoch": 1.23, "learning_rate": 0.004557302030456853, "loss": 1.5424, "step": 1230 }, { "epoch": 1.23, "learning_rate": 0.0045291015228426405, "loss": 1.669, "step": 1235 }, { "epoch": 1.24, "learning_rate": 0.004500901015228428, "loss": 1.7157, "step": 1240 }, { "epoch": 1.24, "learning_rate": 0.004472700507614213, "loss": 1.3508, "step": 1245 }, { "epoch": 1.25, "learning_rate": 0.0044445000000000005, "loss": 1.6086, "step": 1250 }, { "epoch": 1.25, "learning_rate": 0.004416299492385787, "loss": 2.1126, "step": 1255 }, { "epoch": 1.26, "learning_rate": 0.004388098984771574, "loss": 1.2235, "step": 1260 }, { "epoch": 1.26, "learning_rate": 0.0043598984771573614, "loss": 1.3403, "step": 1265 }, { "epoch": 1.27, "learning_rate": 0.004331697969543148, "loss": 1.4274, "step": 1270 }, { "epoch": 1.27, "learning_rate": 0.004303497461928934, "loss": 1.4858, "step": 1275 }, { "epoch": 1.28, "learning_rate": 0.0042752969543147215, "loss": 1.4251, "step": 1280 }, { "epoch": 1.28, "learning_rate": 0.004247096446700508, "loss": 1.2581, "step": 1285 }, { "epoch": 1.29, "learning_rate": 0.004218895939086295, "loss": 1.3023, "step": 1290 }, { "epoch": 1.29, "learning_rate": 0.0041906954314720815, "loss": 1.2824, "step": 1295 }, { "epoch": 1.3, "learning_rate": 0.004162494923857869, "loss": 1.6138, "step": 1300 }, { "epoch": 1.3, "learning_rate": 0.004134294416243656, "loss": 1.6641, "step": 1305 }, { "epoch": 1.31, "learning_rate": 0.0041060939086294415, "loss": 1.2635, "step": 1310 }, { "epoch": 1.31, "learning_rate": 0.004077893401015229, "loss": 1.5835, "step": 1315 }, { "epoch": 1.32, "learning_rate": 0.004049692893401016, "loss": 1.6182, "step": 1320 }, { "epoch": 1.32, "learning_rate": 0.0040214923857868025, "loss": 1.2402, "step": 1325 }, { "epoch": 1.33, "learning_rate": 0.00399329187817259, "loss": 1.6024, "step": 1330 }, { "epoch": 1.33, "learning_rate": 0.003965091370558376, "loss": 1.3873, "step": 1335 }, { "epoch": 1.34, "learning_rate": 0.003936890862944163, "loss": 1.5199, "step": 1340 }, { "epoch": 1.34, "learning_rate": 0.00390869035532995, "loss": 1.3065, "step": 1345 }, { "epoch": 1.35, "learning_rate": 0.0038804898477157357, "loss": 1.3709, "step": 1350 }, { "epoch": 1.35, "learning_rate": 0.003852289340101523, "loss": 1.5692, "step": 1355 }, { "epoch": 1.36, "learning_rate": 0.0038240888324873098, "loss": 1.2774, "step": 1360 }, { "epoch": 1.36, "learning_rate": 0.003795888324873097, "loss": 1.3966, "step": 1365 }, { "epoch": 1.37, "learning_rate": 0.0037676878172588843, "loss": 1.0405, "step": 1370 }, { "epoch": 1.37, "learning_rate": 0.0037394873096446707, "loss": 1.6305, "step": 1375 }, { "epoch": 1.38, "learning_rate": 0.003711286802030457, "loss": 1.3404, "step": 1380 }, { "epoch": 1.38, "learning_rate": 0.0036830862944162435, "loss": 1.5225, "step": 1385 }, { "epoch": 1.39, "learning_rate": 0.0036548857868020303, "loss": 1.4402, "step": 1390 }, { "epoch": 1.39, "learning_rate": 0.0036266852791878175, "loss": 1.8302, "step": 1395 }, { "epoch": 1.4, "learning_rate": 0.003598484771573604, "loss": 1.1884, "step": 1400 }, { "epoch": 1.4, "learning_rate": 0.003570284263959391, "loss": 1.6533, "step": 1405 }, { "epoch": 1.41, "learning_rate": 0.0035420837563451785, "loss": 1.449, "step": 1410 }, { "epoch": 1.41, "learning_rate": 0.003513883248730964, "loss": 1.4537, "step": 1415 }, { "epoch": 1.42, "learning_rate": 0.0034856827411167512, "loss": 1.4386, "step": 1420 }, { "epoch": 1.42, "learning_rate": 0.003457482233502538, "loss": 1.3043, "step": 1425 }, { "epoch": 1.43, "learning_rate": 0.0034292817258883244, "loss": 1.5577, "step": 1430 }, { "epoch": 1.43, "learning_rate": 0.0034010812182741117, "loss": 1.6184, "step": 1435 }, { "epoch": 1.44, "learning_rate": 0.003372880710659899, "loss": 1.2835, "step": 1440 }, { "epoch": 1.44, "learning_rate": 0.0033446802030456858, "loss": 1.4659, "step": 1445 }, { "epoch": 1.45, "learning_rate": 0.0033164796954314717, "loss": 1.1781, "step": 1450 }, { "epoch": 1.45, "learning_rate": 0.0032882791878172586, "loss": 1.4166, "step": 1455 }, { "epoch": 1.46, "learning_rate": 0.003260078680203046, "loss": 1.3821, "step": 1460 }, { "epoch": 1.46, "learning_rate": 0.0032318781725888326, "loss": 1.417, "step": 1465 }, { "epoch": 1.47, "learning_rate": 0.0032036776649746195, "loss": 1.0738, "step": 1470 }, { "epoch": 1.47, "learning_rate": 0.0031754771573604067, "loss": 1.2626, "step": 1475 }, { "epoch": 1.48, "learning_rate": 0.0031472766497461935, "loss": 1.4132, "step": 1480 }, { "epoch": 1.48, "learning_rate": 0.0031190761421319795, "loss": 1.3976, "step": 1485 }, { "epoch": 1.49, "learning_rate": 0.0030908756345177663, "loss": 1.1655, "step": 1490 }, { "epoch": 1.49, "learning_rate": 0.003062675126903553, "loss": 1.6067, "step": 1495 }, { "epoch": 1.5, "learning_rate": 0.0030344746192893404, "loss": 1.2399, "step": 1500 }, { "epoch": 1.5, "learning_rate": 0.0030062741116751272, "loss": 1.3499, "step": 1505 }, { "epoch": 1.51, "learning_rate": 0.002978073604060914, "loss": 1.2958, "step": 1510 }, { "epoch": 1.51, "learning_rate": 0.0029498730964467, "loss": 0.9839, "step": 1515 }, { "epoch": 1.52, "learning_rate": 0.002921672588832487, "loss": 1.423, "step": 1520 }, { "epoch": 1.52, "learning_rate": 0.002893472081218274, "loss": 1.4083, "step": 1525 }, { "epoch": 1.53, "learning_rate": 0.002865271573604061, "loss": 1.552, "step": 1530 }, { "epoch": 1.53, "learning_rate": 0.002837071065989848, "loss": 1.1468, "step": 1535 }, { "epoch": 1.54, "learning_rate": 0.002808870558375635, "loss": 1.1524, "step": 1540 }, { "epoch": 1.54, "learning_rate": 0.002780670050761422, "loss": 1.7909, "step": 1545 }, { "epoch": 1.55, "learning_rate": 0.0027524695431472078, "loss": 1.2374, "step": 1550 }, { "epoch": 1.55, "learning_rate": 0.0027242690355329946, "loss": 1.5076, "step": 1555 }, { "epoch": 1.56, "learning_rate": 0.0026960685279187814, "loss": 1.2157, "step": 1560 }, { "epoch": 1.56, "learning_rate": 0.0026678680203045687, "loss": 1.4255, "step": 1565 }, { "epoch": 1.57, "learning_rate": 0.002639667512690356, "loss": 1.1614, "step": 1570 }, { "epoch": 1.57, "learning_rate": 0.0026114670050761423, "loss": 1.2836, "step": 1575 }, { "epoch": 1.58, "learning_rate": 0.0025832664974619296, "loss": 0.9739, "step": 1580 }, { "epoch": 1.58, "learning_rate": 0.002555065989847715, "loss": 1.445, "step": 1585 }, { "epoch": 1.59, "learning_rate": 0.0025268654822335024, "loss": 1.3724, "step": 1590 }, { "epoch": 1.59, "learning_rate": 0.002498664974619289, "loss": 1.1985, "step": 1595 }, { "epoch": 1.6, "learning_rate": 0.002470464467005076, "loss": 1.5102, "step": 1600 }, { "epoch": 1.6, "learning_rate": 0.0024422639593908633, "loss": 1.4111, "step": 1605 }, { "epoch": 1.61, "learning_rate": 0.00241406345177665, "loss": 1.4832, "step": 1610 }, { "epoch": 1.61, "learning_rate": 0.002385862944162437, "loss": 1.7259, "step": 1615 }, { "epoch": 1.62, "learning_rate": 0.002357662436548223, "loss": 1.4051, "step": 1620 }, { "epoch": 1.62, "learning_rate": 0.0023294619289340097, "loss": 1.1585, "step": 1625 }, { "epoch": 1.63, "learning_rate": 0.002301261421319797, "loss": 1.4892, "step": 1630 }, { "epoch": 1.63, "learning_rate": 0.0022730609137055838, "loss": 1.2785, "step": 1635 }, { "epoch": 1.64, "learning_rate": 0.0022448604060913706, "loss": 1.4697, "step": 1640 }, { "epoch": 1.64, "learning_rate": 0.002216659898477158, "loss": 1.5335, "step": 1645 }, { "epoch": 1.65, "learning_rate": 0.0021884593908629447, "loss": 1.6132, "step": 1650 }, { "epoch": 1.65, "learning_rate": 0.0021602588832487306, "loss": 1.2333, "step": 1655 }, { "epoch": 1.66, "learning_rate": 0.0021320583756345175, "loss": 1.7243, "step": 1660 }, { "epoch": 1.66, "learning_rate": 0.0021038578680203043, "loss": 1.5654, "step": 1665 }, { "epoch": 1.67, "learning_rate": 0.0020756573604060915, "loss": 1.3233, "step": 1670 }, { "epoch": 1.67, "learning_rate": 0.0020474568527918784, "loss": 1.2538, "step": 1675 }, { "epoch": 1.68, "learning_rate": 0.002019256345177665, "loss": 1.5129, "step": 1680 }, { "epoch": 1.68, "learning_rate": 0.0019910558375634524, "loss": 1.2342, "step": 1685 }, { "epoch": 1.69, "learning_rate": 0.001962855329949238, "loss": 1.2484, "step": 1690 }, { "epoch": 1.69, "learning_rate": 0.0019346548223350254, "loss": 1.6045, "step": 1695 }, { "epoch": 1.7, "learning_rate": 0.0019064543147208123, "loss": 1.2544, "step": 1700 }, { "epoch": 1.7, "learning_rate": 0.0018782538071065993, "loss": 1.1989, "step": 1705 }, { "epoch": 1.71, "learning_rate": 0.0018500532994923861, "loss": 1.3844, "step": 1710 }, { "epoch": 1.71, "learning_rate": 0.0018218527918781732, "loss": 1.2898, "step": 1715 }, { "epoch": 1.72, "learning_rate": 0.0017936522842639591, "loss": 1.176, "step": 1720 }, { "epoch": 1.72, "learning_rate": 0.001765451776649746, "loss": 1.3886, "step": 1725 }, { "epoch": 1.73, "learning_rate": 0.001737251269035533, "loss": 1.4818, "step": 1730 }, { "epoch": 1.73, "learning_rate": 0.00170905076142132, "loss": 1.4334, "step": 1735 }, { "epoch": 1.74, "learning_rate": 0.0016808502538071069, "loss": 1.3581, "step": 1740 }, { "epoch": 1.74, "learning_rate": 0.0016526497461928939, "loss": 1.287, "step": 1745 }, { "epoch": 1.75, "learning_rate": 0.0016244492385786807, "loss": 1.4921, "step": 1750 }, { "epoch": 1.75, "learning_rate": 0.0015962487309644667, "loss": 1.6024, "step": 1755 }, { "epoch": 1.76, "learning_rate": 0.001568048223350254, "loss": 1.3267, "step": 1760 }, { "epoch": 1.76, "learning_rate": 0.0015398477157360405, "loss": 1.3324, "step": 1765 }, { "epoch": 1.77, "learning_rate": 0.0015116472081218278, "loss": 1.4282, "step": 1770 }, { "epoch": 1.77, "learning_rate": 0.0014834467005076146, "loss": 1.4165, "step": 1775 }, { "epoch": 1.78, "learning_rate": 0.0014552461928934012, "loss": 1.5209, "step": 1780 }, { "epoch": 1.78, "learning_rate": 0.0014270456852791885, "loss": 1.5915, "step": 1785 }, { "epoch": 1.79, "learning_rate": 0.001398845177664974, "loss": 1.5822, "step": 1790 }, { "epoch": 1.79, "learning_rate": 0.0013706446700507613, "loss": 1.4356, "step": 1795 }, { "epoch": 1.8, "learning_rate": 0.0013424441624365483, "loss": 1.3999, "step": 1800 }, { "epoch": 1.8, "learning_rate": 0.0013142436548223351, "loss": 1.3192, "step": 1805 }, { "epoch": 1.81, "learning_rate": 0.0012860431472081222, "loss": 0.9928, "step": 1810 }, { "epoch": 1.81, "learning_rate": 0.0012578426395939092, "loss": 1.7191, "step": 1815 }, { "epoch": 1.82, "learning_rate": 0.001229642131979696, "loss": 1.2173, "step": 1820 }, { "epoch": 1.82, "learning_rate": 0.001201441624365482, "loss": 1.1303, "step": 1825 }, { "epoch": 1.83, "learning_rate": 0.0011732411167512688, "loss": 1.649, "step": 1830 }, { "epoch": 1.83, "learning_rate": 0.0011450406091370558, "loss": 1.5897, "step": 1835 }, { "epoch": 1.84, "learning_rate": 0.0011168401015228429, "loss": 1.163, "step": 1840 }, { "epoch": 1.84, "learning_rate": 0.0010886395939086297, "loss": 1.1745, "step": 1845 }, { "epoch": 1.85, "learning_rate": 0.0010604390862944168, "loss": 1.2499, "step": 1850 }, { "epoch": 1.85, "learning_rate": 0.0010322385786802038, "loss": 1.1123, "step": 1855 }, { "epoch": 1.86, "learning_rate": 0.0010040380710659895, "loss": 1.6267, "step": 1860 }, { "epoch": 1.86, "learning_rate": 0.0009758375634517764, "loss": 1.7138, "step": 1865 }, { "epoch": 1.87, "learning_rate": 0.0009476370558375634, "loss": 1.0705, "step": 1870 }, { "epoch": 1.87, "learning_rate": 0.0009194365482233503, "loss": 1.5925, "step": 1875 }, { "epoch": 1.88, "learning_rate": 0.0008912360406091373, "loss": 1.2733, "step": 1880 }, { "epoch": 1.88, "learning_rate": 0.0008630355329949242, "loss": 1.5511, "step": 1885 }, { "epoch": 1.89, "learning_rate": 0.0008348350253807112, "loss": 1.3351, "step": 1890 }, { "epoch": 1.89, "learning_rate": 0.000806634517766497, "loss": 1.1094, "step": 1895 }, { "epoch": 1.9, "learning_rate": 0.000778434010152284, "loss": 1.1718, "step": 1900 }, { "epoch": 1.9, "learning_rate": 0.0007502335025380709, "loss": 1.2751, "step": 1905 }, { "epoch": 1.91, "learning_rate": 0.000722032994923858, "loss": 0.9808, "step": 1910 }, { "epoch": 1.91, "learning_rate": 0.0006938324873096449, "loss": 1.5196, "step": 1915 }, { "epoch": 1.92, "learning_rate": 0.0006656319796954318, "loss": 1.1567, "step": 1920 }, { "epoch": 1.92, "learning_rate": 0.0006374314720812188, "loss": 1.2295, "step": 1925 }, { "epoch": 1.93, "learning_rate": 0.0006092309644670046, "loss": 1.3395, "step": 1930 }, { "epoch": 1.93, "learning_rate": 0.0005810304568527917, "loss": 1.2176, "step": 1935 }, { "epoch": 1.94, "learning_rate": 0.0005528299492385786, "loss": 1.2218, "step": 1940 }, { "epoch": 1.94, "learning_rate": 0.0005246294416243655, "loss": 1.1824, "step": 1945 }, { "epoch": 1.95, "learning_rate": 0.0004964289340101526, "loss": 1.4026, "step": 1950 }, { "epoch": 1.95, "learning_rate": 0.0004682284263959396, "loss": 1.391, "step": 1955 }, { "epoch": 1.96, "learning_rate": 0.0004400279187817254, "loss": 1.5254, "step": 1960 }, { "epoch": 1.96, "learning_rate": 0.0004118274111675124, "loss": 1.2331, "step": 1965 }, { "epoch": 1.97, "learning_rate": 0.0003836269035532993, "loss": 1.2529, "step": 1970 }, { "epoch": 1.97, "learning_rate": 0.00035542639593908625, "loss": 1.0726, "step": 1975 }, { "epoch": 1.98, "learning_rate": 0.00032722588832487324, "loss": 1.2203, "step": 1980 }, { "epoch": 1.98, "learning_rate": 0.0002990253807106602, "loss": 1.2456, "step": 1985 }, { "epoch": 1.99, "learning_rate": 0.00027082487309644716, "loss": 1.3003, "step": 1990 }, { "epoch": 1.99, "learning_rate": 0.00024262436548223298, "loss": 1.299, "step": 1995 }, { "epoch": 2.0, "learning_rate": 0.00021442385786802, "loss": 1.488, "step": 2000 }, { "epoch": 2.0, "learning_rate": 0.00018622335025380695, "loss": 937.4512, "step": 2005 }, { "epoch": 2.01, "learning_rate": 0.00015802284263959394, "loss": 924.4408, "step": 2010 }, { "epoch": 2.01, "learning_rate": 0.00012982233502538087, "loss": 903.1811, "step": 2015 }, { "epoch": 2.02, "learning_rate": 0.00010162182741116783, "loss": 868.8327, "step": 2020 }, { "epoch": 2.02, "learning_rate": 7.342131979695479e-05, "loss": 833.6856, "step": 2025 }, { "epoch": 2.03, "learning_rate": 4.522081218274065e-05, "loss": 816.0007, "step": 2030 }, { "epoch": 2.03, "learning_rate": 1.70203045685276e-05, "loss": 795.98, "step": 2035 }, { "epoch": 2.04, "step": 2038, "total_flos": 2.2810681001063887e+23, "train_loss": 16.08543400488377, "train_runtime": 323733.6046, "train_samples_per_second": 103.142, "train_steps_per_second": 0.006 }, { "epoch": 2.04, "step": 2038, "total_flos": 2.2810681001063887e+23, "train_loss": 0.0, "train_runtime": 3248433.7519, "train_samples_per_second": 0.005, "train_steps_per_second": 0.0 } ], "max_steps": 1, "num_train_epochs": 1, "start_time": 1663027920.2259, "total_flos": 2.2810681001063887e+23, "trial_name": null, "trial_params": null }